From 9120bb21fc4ac791bd7cea1f1b9100833d3ce1a4 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 26 Oct 2023 10:53:51 -0700 Subject: [PATCH 01/87] bug fix extracted shoreline widget --- src/coastseg/extract_shorelines_widget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coastseg/extract_shorelines_widget.py b/src/coastseg/extract_shorelines_widget.py index 2ae3ee1f..50c9436c 100644 --- a/src/coastseg/extract_shorelines_widget.py +++ b/src/coastseg/extract_shorelines_widget.py @@ -83,7 +83,7 @@ def __init__(self, extracted_shoreline_traitlet): options=[], layout=ipywidgets.Layout(padding="0px", margin="0px"), ) - self.ROI_list_widget = ipywidgets.dropdown( + self.ROI_list_widget = ipywidgets.Dropdown( description="Available ROIs", options=[], layout=ipywidgets.Layout(padding="0px", margin="0px"), From c578d927ee0d75b1610b87e252bc6b191efc1422 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 26 Oct 2023 10:55:31 -0700 Subject: [PATCH 02/87] v1.1.12 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9151f119..00a66a8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.11" +version = "1.1.12" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From 9e0cf8dee362177820f8a3b1a96680a237b227b6 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 26 Oct 2023 15:18:33 -0700 Subject: [PATCH 03/87] add back watchable slider --- src/coastseg/watchable_slider.py | 131 +++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 src/coastseg/watchable_slider.py diff --git a/src/coastseg/watchable_slider.py b/src/coastseg/watchable_slider.py new file mode 100644 index 00000000..64fac44b --- /dev/null +++ b/src/coastseg/watchable_slider.py @@ -0,0 +1,131 @@ +from typing import Callable +import logging + +import ipywidgets +from ipywidgets import Layout + + +logger = logging.getLogger(__name__) + +""" +This class is a widget that allows the user to load extracted shorelines on the map. +""" + + +# write docstring for this class +class Extracted_Shoreline_widget(ipywidgets.VBox): + def __init__(self, map_interface=None): + # map interface that has extracted shorelines + self.map_interface = map_interface + self.map_interface.extract_shorelines_container.observe( + self.update_satname_widget, names="satname" + ) + self.map_interface.extract_shorelines_container.observe( + self.update_date_widget, names="date" + ) + + self.satellite_html = ipywidgets.HTML( + value=f"Satellite: {self.map_interface.extract_shorelines_container.satname}" + ) + self.date_html = ipywidgets.HTML( + value=f"Date: {self.map_interface.extract_shorelines_container.date}" + ) + title_html = ipywidgets.HTML( + value="

Load Extracted Shorelines

", layout=Layout(padding="0px") + ) + + self.create_dropdown() + self.create_slider() + + self.load_extracted_shorelines_button = ipywidgets.Button( + description="Load Shorelines" + ) + + # list of objects to watch + self._observables = [] + # Roi information bar + roi_info_row = ipywidgets.HBox([self.satellite_html, self.date_html]) + super().__init__( + [ + title_html, + self.dropdown, + self.slider, + ipywidgets.HTML(value="Extracted Shoreline Information: "), + roi_info_row, + ] + ) + + def update_satname_widget(self, change): + self.satellite_html.value = f"Satellite: {change['new']}" + + def update_date_widget(self, change): + self.date_html.value = f"Date: {change['new']}" + + def create_slider(self): + self.slider = ipywidgets.IntSlider( + value=self.map_interface.extract_shorelines_container.max_shorelines, + min=0, + max=1, + step=1, + description="Shoreline:", + disabled=True, + continuous_update=False, # only load in new value when slider is released + orientation="horizontal", + ) + + # Function to update widget options when the traitlet changes + def update_extracted_shoreline_slider(change): + self.slider.max = change["new"] + if change["new"] > 0: + self.slider.disabled = False + else: + self.slider.disabled = True + + # When the traitlet,id_container, trait 'max_shorelines' changes the update_extracted_shoreline_slider will be updated + self.map_interface.extract_shorelines_container.observe( + update_extracted_shoreline_slider, names="max_shorelines" + ) + self.slider.observe(self.on_slider_change, names="value") + + def create_dropdown(self): + self.dropdown = ipywidgets.Dropdown( + options=self.map_interface.id_container.ids, + description="Select ROI:", + style={"description_width": "initial"}, + ) + + # Function to update widget options when the traitlet changes + def update_select_roi_dropdown(change): + self.dropdown.options = change["new"] + + # When the traitlet,id_container, trait 'ids' changes the update_select_roi_dropdown will be updated + self.map_interface.id_container.observe(update_select_roi_dropdown, names="ids") + self.dropdown.observe(self.on_dropdown_change, names="value") + + def set_load_extracted_shorelines_button_on_click(self, on_click: Callable): + self.load_extracted_shorelines_button.on_click(lambda button: on_click()) + + def on_slider_change(self, change): + # get the row number from the extracted_shoreline_slider + row_number = change["new"] + # get the extracted shoreline by the row number from the map_interface + roi_id = self.dropdown.value + self.map_interface.load_extracted_shoreline_by_id(roi_id, row_number=row_number) + + def on_dropdown_change(self, change: dict): + """When the ROI ID in the dropdown changes load the + first extracted shoreline available. + + Args: + change (dict): a change dictionary containing the new change under the key ['new']. + change["new"] will be an string ROI_ID + """ + roi_id = change["new"] + # get the extracted shoreline by the row number from the map_interface + self.map_interface.load_extracted_shoreline_by_id(roi_id, row_number=0) + + def set_satellite_html(self, satellite: str): + self.satellite_html.value = f"Satellite: {satellite} " + + def set_date_html(self, date: str): + self.date_html.value = f"Date: {date} " From c60349a8f69344790b1630ced92bb79797b0cac0 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 26 Oct 2023 17:05:50 -0700 Subject: [PATCH 04/87] #197 save config_gdf as crs 4326 + zenodo issue --- src/coastseg/extract_shorelines_widget.py | 2 +- src/coastseg/zoo_model.py | 27 ++++++++++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/coastseg/extract_shorelines_widget.py b/src/coastseg/extract_shorelines_widget.py index 2ae3ee1f..50c9436c 100644 --- a/src/coastseg/extract_shorelines_widget.py +++ b/src/coastseg/extract_shorelines_widget.py @@ -83,7 +83,7 @@ def __init__(self, extracted_shoreline_traitlet): options=[], layout=ipywidgets.Layout(padding="0px", margin="0px"), ) - self.ROI_list_widget = ipywidgets.dropdown( + self.ROI_list_widget = ipywidgets.Dropdown( description="Available ROIs", options=[], layout=ipywidgets.Layout(padding="0px", margin="0px"), diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index f5a12e92..e7be1de8 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -104,10 +104,10 @@ def get_files_to_download( filenames = [filenames] url_dict = {} for filename in filenames: - response = next((f for f in available_files if f["filename"] == filename), None) + response = next((f for f in available_files if f["key"] == filename), None) if response is None: raise ValueError(f"Cannot find {filename} at {model_id}") - link = response["links"]["download"] + link = response["links"]["self"] file_path = os.path.join(model_path, filename) url_dict[file_path] = link return url_dict @@ -867,8 +867,15 @@ def postprocess_data( # Copy over the config_gdf.geojson file config_gdf_path = os.path.join(roi_directory, "config_gdf.geojson") if os.path.exists(config_gdf_path): - shutil.copy( - config_gdf_path, os.path.join(session_path, "config_gdf.geojson") + # Read in the GeoJSON file using geopandas + gdf = gpd.read_file(config_gdf_path) + + # Project the GeoDataFrame to EPSG:4326 + gdf_4326 = gdf.to_crs("EPSG:4326") + + # Save the projected GeoDataFrame to a new GeoJSON file + gdf_4326.to_file( + os.path.join(session_path, "config_gdf.geojson"), driver="GeoJSON" ) model_settings_path = os.path.join(session_path, "model_settings.json") file_utilities.write_to_json(model_settings_path, preprocessed_data) @@ -1311,7 +1318,7 @@ def download_best( download_dict = {} # download best_model.txt and read the name of the best model best_model_json = next( - (f for f in available_files if f["filename"] == "BEST_MODEL.txt"), None + (f for f in available_files if f["key"] == "BEST_MODEL.txt"), None ) if best_model_json is None: raise ValueError(f"Cannot find BEST_MODEL.txt in {model_id}") @@ -1321,7 +1328,7 @@ def download_best( # if best BEST_MODEL.txt file not exist then download it if not os.path.isfile(BEST_MODEL_txt_path): common.download_url( - best_model_json["links"]["download"], + best_model_json["links"]["self"], BEST_MODEL_txt_path, "Downloading best_model.txt", ) @@ -1375,10 +1382,8 @@ def download_ensemble( """ download_dict = {} # get json and models - all_models_reponses = [ - f for f in available_files if f["filename"].endswith(".h5") - ] - all_model_names = [f["filename"] for f in all_models_reponses] + all_models_reponses = [f for f in available_files if f["key"].endswith(".h5")] + all_model_names = [f["key"] for f in all_models_reponses] json_file_names = [ model_name.replace("_fullmodel.h5", ".json") for model_name in all_model_names @@ -1406,7 +1411,7 @@ def download_ensemble( logger.info(f"all_json_reponses : {all_json_reponses }") for response in all_models_reponses + all_json_reponses: # get the link of the best model - link = response["links"]["download"] + link = response["links"]["self"] filename = response["key"] filepath = os.path.join(model_path, filename) download_dict[filepath] = link From d60f3e7de6e8f5347d690efac03c223f9ec4e6c3 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 26 Oct 2023 17:08:21 -0700 Subject: [PATCH 05/87] #197 v1.1.13 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 00a66a8f..14e19ccf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.12" +version = "1.1.13" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From d34e7ca1a427480b9fd2eb9116019676d07b38df Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 27 Oct 2023 10:00:56 -0700 Subject: [PATCH 06/87] #180 script to request preview for specific dates --- scripts/preview_images_for_dates.py | 283 ++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 scripts/preview_images_for_dates.py diff --git a/scripts/preview_images_for_dates.py b/scripts/preview_images_for_dates.py new file mode 100644 index 00000000..286f552c --- /dev/null +++ b/scripts/preview_images_for_dates.py @@ -0,0 +1,283 @@ +import geopandas as gpd +import json +from typing import List, Optional, Set, Dict, Union +from datetime import datetime +import ee +import os +from typing import Collection +from tqdm.auto import tqdm +import argparse + + +def get_roi_polygon( + roi_gdf: gpd.GeoDataFrame, roi_id: Optional[int] = None, index: Optional[int] = None +) -> Optional[List[List[float]]]: + if roi_id is not None: + geoseries = roi_gdf[roi_gdf["id"] == roi_id]["geometry"] + elif index is not None: + geoseries = roi_gdf.iloc[[index]]["geometry"] + else: + return None + + if not geoseries.empty: + return [[[x, y] for x, y in list(geoseries.iloc[0].exterior.coords)]] + return None + + +def get_collection_by_tier( + polygon: List[List[float]], + start_date: Union[str, datetime], + end_date: Union[str, datetime], + satellite: str, + tier: int, + max_cloud_cover: float = 95, +) -> Union[ee.ImageCollection, None]: + """ + This function takes the required parameters and returns an ImageCollection from + the specified satellite and tier filtered by the given polygon, date range, and cloud cover. + + Args: + polygon (List[List[float]]): The polygon to filter the ImageCollection by. + start_date (Union[str, datetime]): The start date to filter the ImageCollection by. + end_date (Union[str, datetime]): The end date to filter the ImageCollection by. + satellite (str): The satellite to select the ImageCollection from. + tier (int): The tier of the satellite data. + max_cloud_cover (float): The maximum cloud cover percentage to filter the ImageCollection by. + + Returns: + ee.ImageCollection or None: The filtered ImageCollection or None if the inputs are invalid. + """ + + # Converting datetime objects to string if passed as datetime + if isinstance(start_date, datetime): + start_date = start_date.isoformat() + if isinstance(end_date, datetime): + end_date = end_date.isoformat() + + # Define collection names for tier 1 and tier 2 + col_names = { + 1: { + "L5": "LANDSAT/LT05/C02/T1_TOA", + "L7": "LANDSAT/LE07/C02/T1_TOA", + "L8": "LANDSAT/LC08/C02/T1_TOA", + "L9": "LANDSAT/LC09/C02/T1_TOA", + "S2": "COPERNICUS/S2", + }, + 2: { + "L5": "LANDSAT/LT05/C02/T2_TOA", + "L7": "LANDSAT/LE07/C02/T2_TOA", + "L8": "LANDSAT/LC08/C02/T2_TOA", + }, + } + + # Validate inputs and get collection name + if tier not in col_names: + print(f"Invalid tier ({tier})") + return None + # not all satellites are in tier 2 return None for any that are not + if satellite not in col_names[tier]: + return None + + collection_name = col_names[tier][satellite] + # Mapping satellite names to their respective cloud properties + cloud_properties = { + "L5": "CLOUD_COVER", + "L7": "CLOUD_COVER", + "L8": "CLOUD_COVER", + "L9": "CLOUD_COVER", + "S2": "CLOUDY_PIXEL_PERCENTAGE", + } + cloud_property = cloud_properties.get(satellite) + + collection = ( + ee.ImageCollection(collection_name) + .filterBounds(ee.Geometry.Polygon(polygon)) + .filterDate(ee.Date(start_date), ee.Date(end_date)) + .filterMetadata(cloud_property, "less_than", max_cloud_cover) + ) + return collection + + +def count_images_in_ee_collection( + polygon: list[list[float]], + start_date: Union[str, datetime], + end_date: Union[str, datetime], + max_cloud_cover: float = 95, + satellites: Collection[str] = ("L5", "L7", "L8", "L9", "S2"), +) -> dict: + """ + Count the number of images in specified satellite collections over a certain area and time period. + + Parameters: + polygon (list[list[float]]): A list of lists representing the vertices of a polygon in lon/lat coordinates. + start_date (str or datetime): The start date of the time period. If a string, it should be in 'YYYY-MM-DD' format. + end_date (str or datetime): The end date of the time period. If a string, it should be in 'YYYY-MM-DD' format. + max_cloud_cover (float, optional): The maximum cloud cover percentage. Images with a cloud cover percentage higher than this will be excluded. Defaults to 99. + satellites (Collection[str], optional): A collection of satellite names. The function will return image counts for these satellites. Defaults to ("L5","L7","L8","L9","S2"). + + Returns: + dict: A dictionary where the keys are the satellite names and the values are the image counts. + + Raises: + ValueError: If start_date or end_date are not strings or datetime objects. + + Example: + >>> polygon = [[[151.2957545, -33.7390216], + ... [151.312234, -33.7390216], + ... [151.312234, -33.7012561], + ... [151.2957545, -33.7012561], + ... [151.2957545, -33.7390216]]] + >>> start_date = '2017-12-01' + >>> end_date = '2018-01-01' + >>> count_images(polygon, start_date, end_date) + """ + # Check types of start_date and end_date + if isinstance(start_date, str): + start_date = datetime.strptime(start_date, "%Y-%m-%d") + elif not isinstance(start_date, datetime): + raise ValueError("start_date must be a string or datetime object") + + if isinstance(end_date, str): + end_date = datetime.strptime(end_date, "%Y-%m-%d") + elif not isinstance(end_date, datetime): + raise ValueError("end_date must be a string or datetime object") + + image_counts = {} + images_in_tier_count = 0 + for satellite in satellites: + images_in_tier_count = 0 + for tier in [1, 2]: + collection = get_collection_by_tier( + polygon, start_date, end_date, satellite, tier, max_cloud_cover + ) + if collection: + images_in_tier_count += collection.size().getInfo() + image_counts[satellite] = images_in_tier_count + + return image_counts + + +def process_roi(polygon, identifier, settings): + results = [] + for start_date, end_date in tqdm( + settings["dates"], + desc=f"Processing Date Ranges for ROI {identifier}", + leave=False, + ): + images_count = count_images_in_ee_collection( + polygon, + start_date, + end_date, + satellites=settings["sat_list"], + ) + result = { + "roi_id": identifier, + "date_range": [start_date, end_date], + "images_count": images_count, + "polygon": polygon, + } + results.append(result) + print(json.dumps(result, indent=2)) + return results + + +def preview_images_for_rois( + rois_gdf: gpd.GeoDataFrame, selected_ids: Set[int], settings: Dict +): + if "id" in rois_gdf.columns: + if selected_ids: + roi_ids = selected_ids + else: + roi_ids = set(rois_gdf["id"].tolist()) + + pbar = tqdm(roi_ids, desc="Querying API", leave=False, unit="roi") + for roi_id in pbar: + pbar.set_description(f"Querying API for ROI: {roi_id}") + polygon = get_roi_polygon(rois_gdf, roi_id=roi_id) + if polygon: + results = process_roi(polygon, roi_id, settings) + else: + pbar = tqdm(range(len(rois_gdf)), desc="Querying API", leave=False, unit="roi") + for index in pbar: + pbar.set_description(f"Querying API for ROI: {index}") + # for index in tqdm(range(len(rois_gdf)), desc="Querying API", leave=False): + polygon = get_roi_polygon(rois_gdf, index=index) + if polygon: + results = process_roi(polygon, index, settings) + + return results + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Query satellite image counts for regions of interest." + ) + parser.add_argument( + "geojson", + type=str, + help="Filepath to the GeoJSON file containing the regions of interest.", + ) + parser.add_argument( + "dates", + type=str, + nargs="+", + help="Date ranges for the queries. Format: 'start_date,end_date'", + ) + + args = parser.parse_args() + return args + + +def main(): + # ======================== + # === User Input Section == + # ======================== + args = parse_args() + # Step 1 - Specify the date ranges for the queries. + # -------------------------------------------------- + # Specify the date ranges for the queries. + # Each date range is a list containing two strings: the start date and the end date. + # Dates should be formatted as 'YYYY-MM-DD'. + date_ranges = [date_range.split(",") for date_range in args.dates] + + # Step 2 - Specify the geographic regions of interest (ROIs). + # -------------------------------------------------- + # Provide the filepath to your GeoJSON file containing the regions of interest (ROIs). + # Make sure to put an 'r' in front of the filepath to make it a raw string, especially on Windows. + rois_gdf = gpd.read_file(args.geojson) + + # Step 3 Optional - Specify the ROI ids you want to process. + # -------------------------------------------------- + # Optional: Select specific ROIs by their IDs. If you want to process all ROIs, leave this set empty. + # Example: selected_ids = set([1, 2, 3]) if you want to select ROIs with IDs 1, 2, and 3. + selected_ids = set() + + # List of satellite names you want to query for. You can add or remove items as needed. + sat_list = ["L5", "L7", "L8", "L9", "S2"] + + # Whether to save the query results to a JSON file. Set to True to save, False to not save. + save_to_file = True + + # ========================= + # === Script Execution ==== + # ========================= + + settings = {"dates": date_ranges, "sat_list": sat_list} + + # Check if EE was initialized or not + try: + ee.ImageCollection("LANDSAT/LT05/C01/T1_TOA") + except: + ee.Initialize() + + results = preview_images_for_rois(rois_gdf, selected_ids, settings) + + if save_to_file: + # Save results to JSON file + print(f"Saving results to results.json {os.path.abspath('results.json')}") + with open(os.path.abspath("results.json"), "w") as f: + json.dump(results, f, indent=2) + + +if __name__ == "__main__": + main() From c24f7703302c2176dac3f1f081ab96efb860963d Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 09:52:54 -0700 Subject: [PATCH 07/87] allow custom widgets to be added to settings --- src/coastseg/settings_UI.py | 816 ++++++++++++++++++++---------------- 1 file changed, 464 insertions(+), 352 deletions(-) diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 988ddffb..2a686873 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -1,370 +1,482 @@ # standard python imports # external python imports import ipywidgets -from ipywidgets import VBox -from ipywidgets import HTML -from ipywidgets import BoundedFloatText -from ipywidgets import Select -from ipywidgets import BoundedIntText -from ipywidgets import Accordion +import datetime +from typing import List, Union, Optional, Tuple -def str_to_bool(var: str) -> bool: - return var == "True" - - -class Settings_UI: - def __init__(self) -> None: - self.settings = {} - # button styles - self.remove_style = dict(button_color="red") - self.load_style = dict(button_color="#69add1", description_width="initial") - self.action_style = dict(button_color="#ae3cf0") - self.save_style = dict(button_color="#50bf8f") - self.clear_stlye = dict(button_color="#a3adac") - - def set_settings(self, **kwargs): - self.settings.update({key: value for key, value in kwargs.items()}) - - def get_settings(self) -> dict: - settings = { - "apply_cloud_mask": str_to_bool(self.apply_cloud_mask_toggle.value), - "max_dist_ref": self.shoreline_buffer_slider.value, - "along_dist": self.alongshore_distance_slider.value, - "dist_clouds": self.cloud_slider.value, - "min_beach_area": self.beach_area_slider.value, - "min_length_sl": self.min_length_sl_slider.value, - "cloud_thresh": self.cloud_threshold_slider.value, - "min_points": self.min_points_text.value, - "max_std": self.max_std_text.value, - "max_range": self.max_range_text.value, - "min_chainage": self.min_chainage_text.value, - "multiple_inter": self.outliers_mode.value, - "prc_multiple": self.prc_multiple_text.value, - "percent_no_data": self.no_data_slider.value, - } - self.set_settings(**settings) - - return self.settings - - def get_no_data_slider(self): - # returns slider to control no data slider - instructions = HTML( - value="Percentage Bad Pixels \ -
Percentage of Bad Pixels Allowed" - ) - - self.no_data_slider = ipywidgets.FloatSlider( - value=50.0, - min=0.0, - max=100.0, - step=1.0, - description="percent_no_data :", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([instructions, self.no_data_slider]) - - def get_min_points_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Minimum Number Shoreline of Points \ -
- Minimum number of shoreline points to calculate an intersection" - ) - - # min_points: minimum number of shoreline points to calculate an intersection. - self.min_points_text = BoundedIntText( - value=3, - min=1, - max=100, - step=1, - description="min_points :", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.min_points_text]) - - def get_min_length_sl_slider(self): - # returns slider to control beach area slider - min_length_sl_instr = HTML(value="Minimum shoreline length") - - self.min_length_sl_slider = ipywidgets.IntSlider( - value=500, - min=10, - max=1000, - step=1, - description="min_length_sl (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([min_length_sl_instr, self.min_length_sl_slider]) - - def get_max_range_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Max Range \ -
- Max range for shoreline points within the alongshore range, if range is above this value a NaN is returned for this intersection" - ) - # max_range: (in metres) maximum RANGE for the shoreline points within the alongshore range, if RANGE is above this value a NaN is returned for this intersection. - self.max_range_text = BoundedFloatText( - value=30.0, - min=1.0, - max=100.0, - step=1.0, - description="max_range (m)", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.max_range_text]) - - def get_outliers_mode(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Outliers Mode\ -
-How to deal with multiple shoreline intersections." - ) - # controls multiple_inter: ('auto','nan','max') defines how to deal with multiple shoreline intersections - self.outliers_mode = Select( - options=["auto", "nan", "max"], - value="auto", - description="multiple_inter :", - style={"description_width": "initial"}, - ) - return VBox([label, self.outliers_mode]) +class ButtonColors: + REMOVE = "red" + LOAD = "#69add1" + ACTION = "#ae3cf0" + SAVE = "#50bf8f" + CLEAR = "#a3adac" - def get_max_std_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Maximum STD \ -
- Maximum STD for the shoreline points within the alongshore range" - ) - - # max_std: (in metres) maximum STD for the shoreline points within the alongshore range, if STD is above this value a NaN is returned for this intersection. - self.max_std_text = BoundedFloatText( - value=15.0, - min=1.0, - max=100.0, - step=1.0, - description="max_std (m):", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.max_std_text]) - def get_beach_area_slider(self): - # returns slider to control beach area slider - beach_area_instr = HTML( - value="Minimum Beach Area \ -
- Minimum area (sqm) for object to be labelled as beach" - ) - - self.beach_area_slider = ipywidgets.IntSlider( - value=4500, - min=100, - max=10000, - step=10, - description="min_beach_area (sqm):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([beach_area_instr, self.beach_area_slider]) - - def get_shoreline_buffer_slider(self): - # returns slider to control beach area slider - shoreline_buffer_instr = HTML( - value="Reference Shoreline Buffer (m):\ -
- Buffer around reference shorelines in which shorelines can be extracted" - ) - - self.shoreline_buffer_slider = ipywidgets.IntSlider( - value=50, - min=5, - max=1000, - step=1, - description="max_dist_ref (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([shoreline_buffer_instr, self.shoreline_buffer_slider]) +def str_to_bool(var: str) -> bool: + return var == "True" - def get_prc_multiple_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Percentage of points std > max_std\ -
- Percentage of points whose std > max_std that will be set to 'max'.Only in 'auto' mode." - ) - # percentage of points whose std > max_std that will be set to 'max' - # percentage of data points where the std is larger than the user-defined max - # 'prc_multiple': percentage to use in 'auto' mode to switch from 'nan' to 'max' - self.prc_multiple_text = BoundedFloatText( - value=0.1, - min=0.0, - max=1.0, - step=0.01, - description="prc_multiple :", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.prc_multiple_text]) - def get_cloud_slider(self): - # returns slider to control beach area slider - cloud_instr = HTML( - value=" Cloud Distance\ -
- Allowed distance from extracted shoreline to detected clouds\ -
- Any extracted shorelines within this distance to any clouds will be dropped" - ) +def convert_date(date_str): + try: + return datetime.datetime.strptime(date_str, "%Y-%m-%d").date() + except ValueError as e: + raise ValueError(f"Invalid date: {date_str}. Expected format: 'YYYY-MM-DD'.{e}") - self.cloud_slider = ipywidgets.IntSlider( - value=300, - min=100, - max=1000, - step=1, - description="dist_clouds (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([cloud_instr, self.cloud_slider]) - def get_cloud_threshold_slider(self): - instr = HTML( - value="Cloud Threshold \ -
- Maximum percentage of cloud pixels allowed" - ) - self.cloud_threshold_slider = ipywidgets.FloatSlider( - value=0.5, - min=0, - max=1, - step=0.01, - description="cloud_thres :", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format=".2f", - style={"description_width": "initial"}, - ) - return VBox([instr, self.cloud_threshold_slider]) +class Settings_UI: + def __init__( + self, + basic_settings: Optional[List[str]] = None, + advanced_settings: Optional[List[str]] = None, + ) -> None: + # if no basic settings are provided, use the default settings + if basic_settings is None: + basic_settings = [ + "max_dist_ref", + "min_length_sl", + "min_beach_area", + "dist_clouds", + "apply_cloud_mask", + "cloud_thresh", + "percent_no_data", + ] + if advanced_settings is None: + advanced_settings = [ + "min_points", + "max_std", + "along_dist", + "max_range", + "min_chainage", + "multiple_inter", + "prc_multiple", + ] - def get_alongshore_distance_slider(self): - # returns slider to control beach area slider - instr = HTML( - value="Alongshore Distance:\ -
- Along-shore distance over which to consider shoreline points to compute median intersection with transects" - ) - self.alongshore_distance_slider = ipywidgets.IntSlider( - value=25, - min=10, - max=100, - step=1, - description="along_dist (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([instr, self.alongshore_distance_slider]) + self.settings = {} + self.basic_settings = basic_settings + self.advanced_settings = advanced_settings + self.settings_widgets = {} - def get_min_chainage_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value=" Max Landward Distance \ -
- Max distance landward of the transect origin that an intersection is accepted, beyond this point a NaN is returned." + # button styles + self.remove_style = dict(button_color=ButtonColors.REMOVE) + self.load_style = dict( + button_color=ButtonColors.LOAD, description_width="initial" ) + self.action_style = dict(button_color=ButtonColors.ACTION) + self.save_style = dict(button_color=ButtonColors.SAVE) + self.clear_stlye = dict(button_color=ButtonColors.CLEAR) + + # Create the basic settings tab + self.basic_settings_tab = self.create_settings_tab(self.basic_settings) + + # Create the advanced settings tab + self.advanced_settings_tab = self.create_settings_tab(self.advanced_settings) + + def create_settings_tab(self, settings: List[str]) -> ipywidgets.VBox: + # Create the settings tab + tab_contents = [] + for setting_name in settings: + # Create the widget for the setting + widget, instructions = self.create_setting_widget(setting_name) + + # Add the widget and instructions to the tab contents + tab_contents.append(ipywidgets.VBox([instructions, widget])) + + # Add the widget to the settings_widgets dictionary + self.settings_widgets[setting_name] = widget + + # Create the settings tab + tab = ipywidgets.VBox(children=tab_contents) + + return tab + + def add_custom_widget( + self, + widget: Union[ + ipywidgets.ToggleButton, ipywidgets.FloatSlider, ipywidgets.IntText + ], + setting_name: str, + title: str, + instructions: str, + advanced: bool = False, + index: Optional[int] = None, + ): + """ + Adds a custom widget to the basic or advanced settings tab at the specified index. + + Args: + widget: The widget to add. + setting_name: The name of the setting. + title: The title of the setting. + instructions: Optional instructions for the widget. + advanced: Whether to add the widget to the advanced settings tab. If False, adds to the basic settings tab. + index: The index at which to insert the widget. If None, the widget is added to the end of the settings list. + """ + # Check for missing title, setting_name, or instructions + if not title: + raise ValueError("Title cannot be empty.") + if not setting_name: + raise ValueError("Setting name cannot be empty.") + if not instructions: + instructions = "" + # Add the widget to the settings tab + if advanced: + if index is None: + index = len(self.advanced_settings) + self.advanced_settings.insert(index, setting_name) + self.settings_widgets[setting_name] = widget + self.advanced_settings_tab.children = ( + self.advanced_settings_tab.children[:index] + + (ipywidgets.HTML(value=f"{title}
{instructions}."),) + + (self.settings_widgets[setting_name],) + + self.advanced_settings_tab.children[index:] + ) + else: + if index is None: + index = len(self.basic_settings) + self.basic_settings.insert(index, setting_name) + self.settings_widgets[setting_name] = widget + self.basic_settings_tab.children = ( + self.basic_settings_tab.children[:index] + + (ipywidgets.HTML(value=f"{title}
{instructions}."),) + + (self.settings_widgets[setting_name],) + + self.basic_settings_tab.children[index:] + ) + + def create_setting_widget( + self, setting_name: str + ) -> Tuple[ + Union[ipywidgets.ToggleButton, ipywidgets.FloatSlider, ipywidgets.IntText], + ipywidgets.HTML, + ]: + # Create the widget for the setting + if setting_name == "apply_cloud_mask": + widget = ipywidgets.ToggleButtons( + options=["True", "False"], + description="Apply Cloud Mask", + tooltips=[ + "Cloud Masking On", + "Cloud Masking Off", + ], + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Apply Cloud Mask
Enable/disable cloud masking." + ) + elif setting_name == "max_dist_ref": + widget = ipywidgets.IntSlider( + description="Max Distance Reference", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Max Distance Reference
Maximum distance from the shoreline to search for reference points." + ) + elif setting_name == "along_dist": + widget = ipywidgets.IntSlider( + description="Alongshore Distance", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Alongshore Distance
Distance along the shoreline to search for reference points." + ) + elif setting_name == "dist_clouds": + widget = ipywidgets.IntSlider( + description="Distance to Clouds", + min=0, + step=1, + max=1000, + value=300, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Distance to Clouds
Maximum distance from the shoreline to search for clouds." + ) + elif setting_name == "min_beach_area": + widget = ipywidgets.IntSlider( + description="Minimum Beach Area", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Minimum Beach Area
Minimum area of beach required to be considered a valid reference point." + ) + elif setting_name == "min_length_sl": + widget = ipywidgets.IntSlider( + description="Minimum Shoreline Length", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Minimum Shoreline Length
Minimum length of shoreline required to be considered a valid reference point." + ) + elif setting_name == "cloud_thresh": + widget = ipywidgets.IntSlider( + description="Cloud Threshold", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Cloud Threshold
Threshold for cloud detection." + ) + elif setting_name == "min_points": + widget = ipywidgets.IntText( + description="Minimum Points", + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Minimum Points
Minimum number of reference points required to calculate shoreline." + ) + elif setting_name == "max_std": + widget = ipywidgets.IntSlider( + description="Maximum Standard Deviation", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Maximum Standard Deviation
Maximum standard deviation allowed for reference points." + ) + elif setting_name == "max_range": + widget = ipywidgets.IntSlider( + description="Maximum Range", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Maximum Range
Maximum range allowed for reference points." + ) + elif setting_name == "min_chainage": + widget = ipywidgets.IntSlider( + description="Minimum Chainage", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Minimum Chainage
Minimum chainage required to be considered a valid reference point." + ) + elif setting_name == "multiple_inter": + widget = ipywidgets.ToggleButton( + description="Multiple Intersections", + value=True, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Multiple Intersections
Enable/disable multiple intersection detection." + ) + elif setting_name == "prc_multiple": + widget = ipywidgets.FloatSlider( + description="Percentage Multiple Intersections", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Percentage Multiple Intersections
Percentage of multiple intersections allowed." + ) + elif setting_name == "percent_no_data": + widget = ipywidgets.FloatSlider( + description="Percentage of Bad Pixels Allowed", + min=0, + max=100, + value=10, + style={"description_width": "initial"}, + ) + instructions = ipywidgets.HTML( + value="Percentage Bad Pixels
Percentage of bad pixels allowed." + ) + else: + raise ValueError(f"Invalid setting name: {setting_name}") + + return widget, instructions - # min_chainage: (in metres) furthest distance landward of the transect origin that an intersection is accepted, beyond this point a NaN is returned. - self.min_chainage_text = BoundedFloatText( - value=-100.0, - min=-500.0, - max=-1.0, - step=-1.0, - description="min_chainage (m)", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.min_chainage_text]) + def get_settings(self) -> dict: + for setting_name, widget in self.settings_widgets.items(): + self.settings[setting_name] = widget.value - def get_apply_could_mask_toggle(self): - instr = HTML( - value="Cloud Mask Toggle \ -
- Defaults to True. Switch to False to turn off cloud masking." - ) - self.apply_cloud_mask_toggle = ipywidgets.ToggleButtons( - options=["True", "False"], - description="Apply Cloud Masking", - disabled=False, - tooltips=[ - "Cloud Masking On", - "Cloud Masking Off", - ], - ) - return VBox([instr, self.apply_cloud_mask_toggle]) + return self.settings.copy() - def render(self): - # create settings accordion widget - settings_accordion = Accordion( - children=[ - self.get_basic_settings_section(), - self.get_advanced_settings_section(), - ] + def render(self) -> None: + # Display the settings UI + # Create the settings UI + self.settings_ui = ipywidgets.Tab( + children=[self.basic_settings_tab, self.advanced_settings_tab] ) - # settings_accordion.set_title(0, "Settings") - settings_accordion.set_title(0, "Basic Settings") - settings_accordion.set_title(1, "Advanced Settings") - settings_accordion.selected_index = 0 - - return settings_accordion - - def get_advanced_settings_section(self): - # declare settings widgets - settings = { - "along_dist": self.get_alongshore_distance_slider(), - "min_points": self.get_min_points_text(), - "max_std": self.get_max_std_text(), - "max_range": self.get_max_range_text(), - "min_chainage": self.get_min_chainage_text(), - "multiple_inter": self.get_outliers_mode(), - "prc_multiple": self.get_prc_multiple_text(), - } - - # create settings vbox - settings_vbox = VBox([widget for widget_name, widget in settings.items()]) - return settings_vbox - - def get_basic_settings_section(self): - # declare settings widgets - settings = { - "shoreline_buffer_slider": self.get_shoreline_buffer_slider(), - "min_length_sl_slider": self.get_min_length_sl_slider(), - "beach_area_slider": self.get_beach_area_slider(), - "cloud_slider": self.get_cloud_slider(), - "apply_cloud_mask": self.get_apply_could_mask_toggle(), - "cloud_threshold_slider": self.get_cloud_threshold_slider(), - "no_data_slider": self.get_no_data_slider(), - } - - # create settings vbox - settings_vbox = VBox([widget for widget_name, widget in settings.items()]) - return settings_vbox + # self.settings_ui = ipywidgets.Accordion( + # children=[self.basic_settings_tab, self.advanced_settings_tab] + # ) + self.settings_ui.set_title(0, "Basic Settings") + self.settings_ui.set_title(1, "Advanced Settings") + return self.settings_ui + + # def create_setting_widget( + # self, setting_name: str + # ) -> Tuple[ + # Union[ipywidgets.ToggleButton, ipywidgets.FloatSlider, ipywidgets.IntText], + # ipywidgets.HTML, + # ]: + # # Create the widget for the setting + # if setting_name == "apply_cloud_mask": + # widget = ipywidgets.ToggleButton( + # description="Apply Cloud Mask", value=True, **self.action_style + # ) + # instructions = ipywidgets.HTML( + # value="Apply Cloud Mask
Enable/disable cloud masking." + # ) + # elif setting_name == "max_dist_ref": + # widget = ipywidgets.FloatSlider( + # description="Max Distance Reference", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Max Distance Reference
Maximum distance from the shoreline to search for reference points." + # ) + # elif setting_name == "along_dist": + # widget = ipywidgets.FloatSlider( + # description="Alongshore Distance", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Alongshore Distance
Distance along the shoreline to search for reference points." + # ) + # elif setting_name == "dist_clouds": + # widget = ipywidgets.FloatSlider( + # description="Distance to Clouds", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Distance to Clouds
Maximum distance from the shoreline to search for clouds." + # ) + # elif setting_name == "min_beach_area": + # widget = ipywidgets.FloatSlider( + # description="Minimum Beach Area", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Minimum Beach Area
Minimum area of beach required to be considered a valid reference point." + # ) + # elif setting_name == "min_length_sl": + # widget = ipywidgets.FloatSlider( + # description="Minimum Shoreline Length", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Minimum Shoreline Length
Minimum length of shoreline required to be considered a valid reference point." + # ) + # elif setting_name == "cloud_thresh": + # widget = ipywidgets.FloatSlider( + # description="Cloud Threshold", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Cloud Threshold
Threshold for cloud detection." + # ) + # elif setting_name == "min_points": + # widget = ipywidgets.IntText( + # description="Minimum Points", value=10, **self.action_style + # ) + # instructions = ipywidgets.HTML( + # value="Minimum Points
Minimum number of reference points required to calculate shoreline." + # ) + # elif setting_name == "max_std": + # widget = ipywidgets.FloatSlider( + # description="Maximum Standard Deviation", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Maximum Standard Deviation
Maximum standard deviation allowed for reference points." + # ) + # elif setting_name == "max_range": + # widget = ipywidgets.FloatSlider( + # description="Maximum Range", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Maximum Range
Maximum range allowed for reference points." + # ) + # elif setting_name == "min_chainage": + # widget = ipywidgets.FloatSlider( + # description="Minimum Chainage", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Minimum Chainage
Minimum chainage required to be considered a valid reference point." + # ) + # elif setting_name == "multiple_inter": + # widget = ipywidgets.ToggleButton( + # description="Multiple Intersections", value=True, **self.action_style + # ) + # instructions = ipywidgets.HTML( + # value="Multiple Intersections
Enable/disable multiple intersection detection." + # ) + # elif setting_name == "prc_multiple": + # widget = ipywidgets.FloatSlider( + # description="Percentage Multiple Intersections", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Percentage Multiple Intersections
Percentage of multiple intersections allowed." + # ) + # elif setting_name == "percent_no_data": + # widget = ipywidgets.FloatSlider( + # description="Percentage Bad Pixels", + # min=0, + # max=100, + # value=10, + # **self.action_style, + # ) + # instructions = ipywidgets.HTML( + # value="Percentage Bad Pixels
Percentage of bad pixels allowed." + # ) + # else: + # raise ValueError(f"Invalid setting name: {setting_name}") + + # return widget, instructions From 8ed282cd4c44e15c0ceb5d8acdca4017b8a238dd Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 10:37:32 -0700 Subject: [PATCH 08/87] new mapbox widget --- settings_ui.ipynb | 313 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 settings_ui.ipynb diff --git a/settings_ui.ipynb b/settings_ui.ipynb new file mode 100644 index 00000000..d3efbd4c --- /dev/null +++ b/settings_ui.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1d4aae553bee47f48c104633dc5f18ea", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Accordion(children=(Tab(children=(VBox(children=(VBox(children=(HTML(value='Pick a date:'), DateBox(chiā€¦" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.settings_UI import Settings_UI\n", + "import ipywidgets\n", + "\n", + "instructions = \"Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)\"\n", + "widget = ipywidgets.Dropdown(\n", + " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", + " value=\"default\",\n", + " description=\"sand_color :\",\n", + " disabled=False,\n", + " )\n", + "\n", + "basic_settings = [\n", + " \"dates\",\n", + " \"max_dist_ref\",\n", + " \"min_length_sl\",\n", + " \"min_beach_area\",\n", + " \"dist_clouds\",\n", + " \"apply_cloud_mask\",\n", + " \"cloud_thresh\",\n", + " \"percent_no_data\",\n", + " ]\n", + "\n", + "settings_dashboard=Settings_UI(basic_settings)\n", + "settings_dashboard.add_custom_widget(widget,'sand_dropbox','Select Sand Color',instructions,advanced=True,index=0)\n", + "\n", + "settings_dashboard.render()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.add_custom_widget(widget,'sand_dropbox',None,instructions,advanced=True,index=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dates': ['2018-12-29', '2019-03-01'],\n", + " 'max_dist_ref': 10,\n", + " 'min_length_sl': 10,\n", + " 'min_beach_area': 10,\n", + " 'dist_clouds': 300,\n", + " 'apply_cloud_mask': 'True',\n", + " 'cloud_thresh': 10,\n", + " 'percent_no_data': 10.0,\n", + " 'min_points': 10,\n", + " 'max_std': 10,\n", + " 'along_dist': 10,\n", + " 'max_range': 10,\n", + " 'min_chainage': 10,\n", + " 'multiple_inter': True,\n", + " 'prc_multiple': 10.0,\n", + " 'sand_dropbox': 'default'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "settings_dashboard.get_settings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets\n", + "\n", + "instructions = \"Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)\"\n", + "widget = ipywidgets.Dropdown(\n", + " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", + " value=\"default\",\n", + " description=\"sand_color :\",\n", + " disabled=False,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.add_custom_widget(widget,instructions,advanced=True,index=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.render()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.advanced_settings.insert(0, 'sand_dropbox')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.advanced_settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['sand_dropbox'] = widget\n", + "settings_dashboard.settings_widgets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index=0\n", + "settings_dashboard.advanced_settings_tab.children = (\n", + " settings_dashboard.advanced_settings_tab.children[:index]\n", + " + (ipywidgets.HTML(value=f\"{instructions}\"),)\n", + " + settings_dashboard.advanced_settings_tab.children[index:]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.advanced_settings_tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.render()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + " if instructions is not None:\n", + " self.advanced_settings_tab.children = (\n", + " self.advanced_settings_tab.children[:index]\n", + " + (ipywidgets.HTML(value=f\"{instructions}\"),)\n", + " + self.advanced_settings_tab.children[index:]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.add_custom_widget(widget,instructions,advanced=True,index=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " def get_sand_dropbox(self):\n", + " sand_color_instr = HTML(\n", + " value=\"Sand Color\\\n", + "
- Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)
\"\n", + " )\n", + " self.sand_dropdown = ipywidgets.Dropdown(\n", + " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", + " value=\"default\",\n", + " description=\"sand_color :\",\n", + " disabled=False,\n", + " )\n", + " return VBox([sand_color_instr, self.sand_dropdown])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "widgets.ToggleButtons(" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.get_settings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.settings_UI_old import Settings_UI\n", + "\n", + "\n", + "settings_dashboard=Settings_UI()\n", + "settings_dashboard.render()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.settings_UI_old import Settings_UI\n", + "\n", + "\n", + "settings_dashboard=Settings_UI()\n", + "settings_dashboard.render()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "coastseg_transformers10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 0df4a10f3c4e3ef0d85f05c932f02b045b841399 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 10:56:41 -0700 Subject: [PATCH 09/87] add mapbox ui, update widget defaults --- src/coastseg/settings_UI.py | 310 +++++++++++++----------------------- 1 file changed, 113 insertions(+), 197 deletions(-) diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 2a686873..47f14c1d 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -24,6 +24,49 @@ def convert_date(date_str): raise ValueError(f"Invalid date: {date_str}. Expected format: 'YYYY-MM-DD'.{e}") +class DateBox(ipywidgets.HBox): + def __init__(self, start_date=None, end_date=None, **kwargs): + if start_date is None: + start_date = datetime.date(2018, 12, 1) + if end_date is None: + end_date = datetime.date(2019, 3, 1) + + self.start_date = ipywidgets.DatePicker( + description="Start Date", + value=start_date, + disabled=False, + ) + self.end_date = ipywidgets.DatePicker( + description="End Date", + value=end_date, + disabled=False, + ) + super().__init__([self.start_date, self.end_date], **kwargs) + + @property + def value(self): + return [str(self.start_date.value), str(self.end_date.value)] + + @property + def options(self): + return [self.start_date.value, self.end_date.value] + + @options.setter + def options(self, values): + if len(values) != 2: + raise ValueError("You must provide a list of two dates.") + + start_date, end_date = values + + if isinstance(start_date, str): + start_date = datetime.date.fromisoformat(start_date) + if isinstance(end_date, str): + end_date = datetime.date.fromisoformat(end_date) + + self.start_date.value = start_date + self.end_date.value = end_date + + class Settings_UI: def __init__( self, @@ -113,8 +156,8 @@ def add_custom_widget( index: The index at which to insert the widget. If None, the widget is added to the end of the settings list. """ # Check for missing title, setting_name, or instructions - if not title: - raise ValueError("Title cannot be empty.") + if title is None: + raise ValueError("Title cannot be None.") if not setting_name: raise ValueError("Setting name cannot be empty.") if not instructions: @@ -177,13 +220,14 @@ def create_setting_widget( elif setting_name == "along_dist": widget = ipywidgets.IntSlider( description="Alongshore Distance", - min=0, + value=25, + min=10, max=100, - value=10, + step=1, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Alongshore Distance
Distance along the shoreline to search for reference points." + value="Alongshore Distance
Along-shore distance over which to consider shoreline points to compute median intersection with transects." ) elif setting_name == "dist_clouds": widget = ipywidgets.IntSlider( @@ -211,98 +255,114 @@ def create_setting_widget( elif setting_name == "min_length_sl": widget = ipywidgets.IntSlider( description="Minimum Shoreline Length", - min=0, - max=100, - value=10, + value=500, + min=50, + max=1000, + step=1, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( value="Minimum Shoreline Length
Minimum length of shoreline required to be considered a valid reference point." ) elif setting_name == "cloud_thresh": - widget = ipywidgets.IntSlider( + widget = ipywidgets.FloatSlider( description="Cloud Threshold", + value=0.5, min=0, - max=100, - value=10, + max=1, + step=0.01, + readout_format=".2f", style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Cloud Threshold
Threshold for cloud detection." + value="Cloud Threshold
Maximum percentage of cloud pixels allowed." ) elif setting_name == "min_points": - widget = ipywidgets.IntText( + widget = ipywidgets.BoundedIntText( description="Minimum Points", - value=10, + value=3, + min=1, + max=100, + step=1, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Minimum Points
Minimum number of reference points required to calculate shoreline." + value="Minimum Points
Minimum number of shoreline points required to calculate shoreline." ) elif setting_name == "max_std": - widget = ipywidgets.IntSlider( + widget = ipywidgets.BoundedFloatText( description="Maximum Standard Deviation", - min=0, - max=100, - value=10, + value=15.0, + min=1.0, + max=100.0, + step=1.0, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( value="Maximum Standard Deviation
Maximum standard deviation allowed for reference points." ) elif setting_name == "max_range": - widget = ipywidgets.IntSlider( - description="Maximum Range", - min=0, - max=100, - value=10, + widget = ipywidgets.BoundedFloatText( + description="Maximum Range(m)", + min=1.0, + max=100.0, + value=30.0, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Maximum Range
Maximum range allowed for reference points." + value="Maximum Range
Max range for shoreline points within the alongshore range, if range is above this value a NaN is returned for this intersection." ) elif setting_name == "min_chainage": - widget = ipywidgets.IntSlider( + widget = ipywidgets.BoundedFloatText( description="Minimum Chainage", - min=0, - max=100, - value=10, + value=-100.0, + min=-500.0, + max=-1.0, + step=-1.0, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Minimum Chainage
Minimum chainage required to be considered a valid reference point." + value="Minimum Chainage
Max distance landward of the transect origin that an intersection is accepted, beyond this point a NaN is returned." ) elif setting_name == "multiple_inter": - widget = ipywidgets.ToggleButton( + widget = ipywidgets.Select( description="Multiple Intersections", - value=True, + options=["auto", "nan", "max"], + value="auto", style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( value="Multiple Intersections
Enable/disable multiple intersection detection." ) elif setting_name == "prc_multiple": - widget = ipywidgets.FloatSlider( + widget = ipywidgets.BoundedFloatText( description="Percentage Multiple Intersections", - min=0, - max=100, - value=10, + value=0.1, + min=0.0, + max=1.0, + step=0.01, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Percentage Multiple Intersections
Percentage of multiple intersections allowed." + value="Percentage Multiple Intersections
Percentage of points whose std > max_std that will be set to 'max'.Only in 'auto' mode." ) elif setting_name == "percent_no_data": widget = ipywidgets.FloatSlider( description="Percentage of Bad Pixels Allowed", - min=0, - max=100, - value=10, + value=50.0, + min=0.0, + max=100.0, + step=1.0, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( value="Percentage Bad Pixels
Percentage of bad pixels allowed." ) + elif setting_name == "dates": + widget = DateBox() + instructions = ipywidgets.HTML( + value="Pick a date:", + ) else: raise ValueError(f"Invalid setting name: {setting_name}") @@ -317,166 +377,22 @@ def get_settings(self) -> dict: def render(self) -> None: # Display the settings UI # Create the settings UI - self.settings_ui = ipywidgets.Tab( + settings_tabs = ipywidgets.Tab( children=[self.basic_settings_tab, self.advanced_settings_tab] ) + # self.settings_ui = ipywidgets.Tab( + # children=[self.basic_settings_tab, self.advanced_settings_tab] + # ) # self.settings_ui = ipywidgets.Accordion( # children=[self.basic_settings_tab, self.advanced_settings_tab] # ) - self.settings_ui.set_title(0, "Basic Settings") - self.settings_ui.set_title(1, "Advanced Settings") - return self.settings_ui + # self.settings_ui.set_title(0, "Basic Settings") + # self.settings_ui.set_title(1, "Advanced Settings") + settings_tabs.set_title(0, "Basic Settings") + settings_tabs.set_title(1, "Advanced Settings") - # def create_setting_widget( - # self, setting_name: str - # ) -> Tuple[ - # Union[ipywidgets.ToggleButton, ipywidgets.FloatSlider, ipywidgets.IntText], - # ipywidgets.HTML, - # ]: - # # Create the widget for the setting - # if setting_name == "apply_cloud_mask": - # widget = ipywidgets.ToggleButton( - # description="Apply Cloud Mask", value=True, **self.action_style - # ) - # instructions = ipywidgets.HTML( - # value="Apply Cloud Mask
Enable/disable cloud masking." - # ) - # elif setting_name == "max_dist_ref": - # widget = ipywidgets.FloatSlider( - # description="Max Distance Reference", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Max Distance Reference
Maximum distance from the shoreline to search for reference points." - # ) - # elif setting_name == "along_dist": - # widget = ipywidgets.FloatSlider( - # description="Alongshore Distance", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Alongshore Distance
Distance along the shoreline to search for reference points." - # ) - # elif setting_name == "dist_clouds": - # widget = ipywidgets.FloatSlider( - # description="Distance to Clouds", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Distance to Clouds
Maximum distance from the shoreline to search for clouds." - # ) - # elif setting_name == "min_beach_area": - # widget = ipywidgets.FloatSlider( - # description="Minimum Beach Area", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Minimum Beach Area
Minimum area of beach required to be considered a valid reference point." - # ) - # elif setting_name == "min_length_sl": - # widget = ipywidgets.FloatSlider( - # description="Minimum Shoreline Length", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Minimum Shoreline Length
Minimum length of shoreline required to be considered a valid reference point." - # ) - # elif setting_name == "cloud_thresh": - # widget = ipywidgets.FloatSlider( - # description="Cloud Threshold", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Cloud Threshold
Threshold for cloud detection." - # ) - # elif setting_name == "min_points": - # widget = ipywidgets.IntText( - # description="Minimum Points", value=10, **self.action_style - # ) - # instructions = ipywidgets.HTML( - # value="Minimum Points
Minimum number of reference points required to calculate shoreline." - # ) - # elif setting_name == "max_std": - # widget = ipywidgets.FloatSlider( - # description="Maximum Standard Deviation", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Maximum Standard Deviation
Maximum standard deviation allowed for reference points." - # ) - # elif setting_name == "max_range": - # widget = ipywidgets.FloatSlider( - # description="Maximum Range", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Maximum Range
Maximum range allowed for reference points." - # ) - # elif setting_name == "min_chainage": - # widget = ipywidgets.FloatSlider( - # description="Minimum Chainage", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Minimum Chainage
Minimum chainage required to be considered a valid reference point." - # ) - # elif setting_name == "multiple_inter": - # widget = ipywidgets.ToggleButton( - # description="Multiple Intersections", value=True, **self.action_style - # ) - # instructions = ipywidgets.HTML( - # value="Multiple Intersections
Enable/disable multiple intersection detection." - # ) - # elif setting_name == "prc_multiple": - # widget = ipywidgets.FloatSlider( - # description="Percentage Multiple Intersections", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Percentage Multiple Intersections
Percentage of multiple intersections allowed." - # ) - # elif setting_name == "percent_no_data": - # widget = ipywidgets.FloatSlider( - # description="Percentage Bad Pixels", - # min=0, - # max=100, - # value=10, - # **self.action_style, - # ) - # instructions = ipywidgets.HTML( - # value="Percentage Bad Pixels
Percentage of bad pixels allowed." - # ) - # else: - # raise ValueError(f"Invalid setting name: {setting_name}") - - # return widget, instructions + self.settings_ui = ipywidgets.Accordion( + children=[settings_tabs], selected_index=0 + ) + self.settings_ui.set_title(0, "Settings") + return self.settings_ui From 0c50d75f2184f1484643c69b2a8a13da82b27570 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 11:22:19 -0700 Subject: [PATCH 10/87] update descriptions for widgets --- src/coastseg/settings_UI.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 47f14c1d..474c032e 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -208,14 +208,15 @@ def create_setting_widget( ) elif setting_name == "max_dist_ref": widget = ipywidgets.IntSlider( - description="Max Distance Reference", - min=0, - max=100, - value=10, + description="Reference Shoreline Buffer", + value=100, + min=5, + max=1000, + step=1, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Max Distance Reference
Maximum distance from the shoreline to search for reference points." + value="Reference Shoreline Buffer
Max size of the buffer around reference shorelines in which shorelines can be extracted" ) elif setting_name == "along_dist": widget = ipywidgets.IntSlider( @@ -232,14 +233,14 @@ def create_setting_widget( elif setting_name == "dist_clouds": widget = ipywidgets.IntSlider( description="Distance to Clouds", + value=300, min=0, - step=1, max=1000, - value=300, + step=1, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Distance to Clouds
Maximum distance from the shoreline to search for clouds." + value="Allowed Distance from Clouds
Any shorelines within this distance of a cloud will be removed." ) elif setting_name == "min_beach_area": widget = ipywidgets.IntSlider( @@ -250,7 +251,7 @@ def create_setting_widget( style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Minimum Beach Area
Minimum area of beach required to be considered a valid reference point." + value="Minimum Beach Area
Minimum area (sqm) for object to be labeled as beach" ) elif setting_name == "min_length_sl": widget = ipywidgets.IntSlider( @@ -262,7 +263,7 @@ def create_setting_widget( style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Minimum Shoreline Length
Minimum length of shoreline required to be considered a valid reference point." + value="Minimum Shoreline Length
Minimum length of shoreline in meters." ) elif setting_name == "cloud_thresh": widget = ipywidgets.FloatSlider( @@ -348,7 +349,7 @@ def create_setting_widget( ) elif setting_name == "percent_no_data": widget = ipywidgets.FloatSlider( - description="Percentage of Bad Pixels Allowed", + description="% Bad Pixels", value=50.0, min=0.0, max=100.0, @@ -372,6 +373,10 @@ def get_settings(self) -> dict: for setting_name, widget in self.settings_widgets.items(): self.settings[setting_name] = widget.value + if "sat_list" in self.settings: + sat_tuple = self.settings["sat_list"] + self.settings["sat_list"] = list(sat_tuple) + return self.settings.copy() def render(self) -> None: From ef328d9ffd952f04d0c2c81cf36eba7df109d4b9 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 11:44:54 -0700 Subject: [PATCH 11/87] update settings ui to hold better descriptions --- src/coastseg/settings_UI.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 474c032e..cba4afa6 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -86,9 +86,9 @@ def __init__( ] if advanced_settings is None: advanced_settings = [ + "along_dist", "min_points", "max_std", - "along_dist", "max_range", "min_chainage", "multiple_inter", @@ -234,7 +234,7 @@ def create_setting_widget( widget = ipywidgets.IntSlider( description="Distance to Clouds", value=300, - min=0, + min=1, max=1000, step=1, style={"description_width": "initial"}, @@ -245,7 +245,7 @@ def create_setting_widget( elif setting_name == "min_beach_area": widget = ipywidgets.IntSlider( description="Minimum Beach Area", - min=0, + min=10, max=100, value=10, style={"description_width": "initial"}, @@ -333,7 +333,7 @@ def create_setting_widget( style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( - value="Multiple Intersections
Enable/disable multiple intersection detection." + value="Outliers Mode
Enable/disable multiple intersection detection." ) elif setting_name == "prc_multiple": widget = ipywidgets.BoundedFloatText( @@ -377,6 +377,14 @@ def get_settings(self) -> dict: sat_tuple = self.settings["sat_list"] self.settings["sat_list"] = list(sat_tuple) + if "apply_cloud_mask" in self.settings: + apply_cloud_mask = self.settings["apply_cloud_mask"] + self.settings["apply_cloud_mask"] = str_to_bool(apply_cloud_mask) + + if "cloud_mask_issue" in self.settings: + cloud_mask_issue = self.settings["cloud_mask_issue"] + self.settings["cloud_mask_issue"] = str_to_bool(cloud_mask_issue) + return self.settings.copy() def render(self) -> None: From 659ae54a31a7d3fd2ddf835904808362a48e6c10 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 30 Oct 2023 11:48:35 -0700 Subject: [PATCH 12/87] update setting ui testing notebook --- settings_ui.ipynb | 110 +++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 80 deletions(-) diff --git a/settings_ui.ipynb b/settings_ui.ipynb index d3efbd4c..b66e16ea 100644 --- a/settings_ui.ipynb +++ b/settings_ui.ipynb @@ -2,25 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1d4aae553bee47f48c104633dc5f18ea", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Accordion(children=(Tab(children=(VBox(children=(VBox(children=(HTML(value='Pick a date:'), DateBox(chiā€¦" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from coastseg.settings_UI import Settings_UI\n", "import ipywidgets\n", @@ -33,6 +17,30 @@ " disabled=False,\n", " )\n", "\n", + "satellite_selection = ipywidgets.SelectMultiple(\n", + " options=[\"L5\", \"L7\", \"L8\", \"L9\", \"S2\"],\n", + " value=[\"L8\"],\n", + " description=\"Satellites\",\n", + " disabled=False,\n", + " )\n", + "\n", + "image_size_filter_checkbox = ipywidgets.Checkbox(\n", + " value=True, \n", + " description=\"Enable Image Size Filter\",\n", + " indent=False, # To align the description with the label\n", + " )\n", + "\n", + "cloud_mask_issue=ipywidgets.ToggleButtons(\n", + " options=[\"False\", \"True\"],\n", + " description=\" Switch to True if sand pixels are masked (in black) on many images\",\n", + " disabled=False,\n", + " button_style=\"\",\n", + " tooltips=[\n", + " \"No cloud mask issue\",\n", + " \"Fix cloud masking\",\n", + " ],\n", + " )\n", + "\n", "basic_settings = [\n", " \"dates\",\n", " \"max_dist_ref\",\n", @@ -46,78 +54,20 @@ "\n", "settings_dashboard=Settings_UI(basic_settings)\n", "settings_dashboard.add_custom_widget(widget,'sand_dropbox','Select Sand Color',instructions,advanced=True,index=0)\n", + "settings_dashboard.add_custom_widget(cloud_mask_issue,\"cloud_mask_issue\",'Cloud Mask Issue',\"Switch to True if sand pixels are masked (in black) on many images\",advanced=True,index=-1)\n", "\n", + "settings_dashboard.add_custom_widget(satellite_selection,'sat_list','Select Satellites',\"Pick multiple satellites by holding the control key\",advanced=False,index=1)\n", + "settings_dashboard.add_custom_widget(image_size_filter_checkbox,\"image_size_filter\",'Image Size Filter',\"Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).\",advanced=False,index=-1)\n", "settings_dashboard.render()" ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "settings_dashboard.add_custom_widget(widget,'sand_dropbox',None,instructions,advanced=True,index=0)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'dates': ['2018-12-29', '2019-03-01'],\n", - " 'max_dist_ref': 10,\n", - " 'min_length_sl': 10,\n", - " 'min_beach_area': 10,\n", - " 'dist_clouds': 300,\n", - " 'apply_cloud_mask': 'True',\n", - " 'cloud_thresh': 10,\n", - " 'percent_no_data': 10.0,\n", - " 'min_points': 10,\n", - " 'max_std': 10,\n", - " 'along_dist': 10,\n", - " 'max_range': 10,\n", - " 'min_chainage': 10,\n", - " 'multiple_inter': True,\n", - " 'prc_multiple': 10.0,\n", - " 'sand_dropbox': 'default'}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "settings_dashboard.get_settings()" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import ipywidgets\n", - "\n", - "instructions = \"Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)\"\n", - "widget = ipywidgets.Dropdown(\n", - " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", - " value=\"default\",\n", - " description=\"sand_color :\",\n", - " disabled=False,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "settings_dashboard.add_custom_widget(widget,instructions,advanced=True,index=0)" + "settings_dashboard.get_settings()" ] }, { From 57208c0749f3ded8663d147d19ec23b98273c264 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 2 Nov 2023 15:59:42 -0700 Subject: [PATCH 13/87] fix extracted shoreline render widget --- src/coastseg/coastseg_map.py | 24 ++++++++++++++++------- src/coastseg/downloads.py | 37 +++++++++++++++--------------------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index b0ffc980..f93ad0e0 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -18,6 +18,7 @@ from tqdm.auto import tqdm import traitlets from shapely.geometry import Polygon +from pandas import to_datetime # Internal/Local imports: specific classes/functions from coastseg.bbox import Bounding_Box @@ -2127,13 +2128,22 @@ def load_extracted_shorelines_to_map(self, row_number: int = 0) -> None: # if extracted shorelines exist, load them onto map, if none exist nothing loads if hasattr(extracted_shorelines, "gdf"): if not extracted_shorelines.gdf.empty: - self.extract_shorelines_container.load_list = ( - extracted_shorelines.gdf["satname"] - + "_" - + extracted_shorelines.gdf["date"].apply( - lambda x: x.strftime("%Y-%m-%d-%H-%M-%S") - ) - ).tolist() + if extracted_shorelines.gdf["date"].dtype == "datetime64[ns]": + self.extract_shorelines_container.load_list = ( + extracted_shorelines.gdf["satname"] + + "_" + + extracted_shorelines.gdf["date"].apply( + lambda x: x.strftime("%Y-%m-%d-%H-%M-%S") + ) + ).tolist() + else: + self.extract_shorelines_container.load_list = ( + extracted_shorelines.gdf["satname"] + + "_" + + to_datetime(extracted_shorelines.gdf["date"]).apply( + lambda x: x.strftime("%Y-%m-%d-%H-%M-%S") + ) + ).tolist() self.extract_shorelines_container.trash_list = [] self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) diff --git a/src/coastseg/downloads.py b/src/coastseg/downloads.py index 06908343..111a0015 100644 --- a/src/coastseg/downloads.py +++ b/src/coastseg/downloads.py @@ -1,39 +1,32 @@ -import os +import asyncio +import concurrent.futures +from datetime import datetime +import glob import json +import logging import math -from typing import List, Optional, Tuple +import os import platform -import logging import shutil -from glob import glob -import concurrent.futures import zipfile -import requests -from datetime import datetime -from coastseg import common -from coastseg import file_utilities - -import asyncio -import nest_asyncio import aiohttp -import tqdm -import tqdm.auto -import tqdm.asyncio -from typing import Union -from typing import Collection +import area import ee -from area import area import geopandas as gpd +import nest_asyncio +import tqdm +import tqdm.asyncio +import tqdm.auto from shapely.geometry import LineString, MultiPolygon, Polygon from shapely.ops import split +from typing import Collection, List, Optional, Tuple, Union -logger = logging.getLogger(__name__) +from coastseg import common +from coastseg import file_utilities -from typing import List, Union -from datetime import datetime +logger = logging.getLogger(__name__) -import ee def get_collection_by_tier( From 9abf3f6dc1da088e7e431b614dde1afe4c2928c3 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 2 Nov 2023 17:15:57 -0700 Subject: [PATCH 14/87] Merge branch 'update_sliders_1.1.2' --- src/coastseg/common.py | 58 ++++++++++++++++++++--------- src/coastseg/extracted_shoreline.py | 6 +++ src/coastseg/map_UI.py | 4 +- src/coastseg/settings_UI.py | 4 +- 4 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 387c2d8f..0639f9ec 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -1208,24 +1208,21 @@ def download_url(url: str, save_path: str, filename: str = None, chunk_size: int content_length = r.headers.get("Content-Length") if content_length: total_length = int(content_length) + with open(save_path, "wb") as fd: + with tqdm( + total=total_length, + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=f"Downloading {filename}", + initial=0, + ascii=True, + ) as pbar: + for chunk in r.iter_content(chunk_size=chunk_size): + fd.write(chunk) + pbar.update(len(chunk)) else: - print( - "Content length not found in response headers. Downloading without progress bar." - ) - total_length = None - with open(save_path, "wb") as fd: - with tqdm( - total=total_length, - unit="B", - unit_scale=True, - unit_divisor=1024, - desc=f"Downloading {filename}", - initial=0, - ascii=True, - ) as pbar: - for chunk in r.iter_content(chunk_size=chunk_size): - fd.write(chunk) - pbar.update(len(chunk)) + logger.warning("Content length not found in response headers") def get_center_point(coords: list) -> tuple: @@ -1241,6 +1238,31 @@ def get_center_point(coords: list) -> tuple: return center_x, center_y +def convert_linestrings_to_multipoints(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Convert LineString geometries in a GeoDataFrame to MultiPoint geometries. + Args: + - gdf (gpd.GeoDataFrame): The input GeoDataFrame. + Returns: + - gpd.GeoDataFrame: A new GeoDataFrame with MultiPoint geometries. If the input GeoDataFrame + already contains MultiPoints, the original GeoDataFrame is returned. + """ + + # Check if the gdf already contains MultiPoints + if any(gdf.geometry.type == "MultiPoint"): + return gdf + + def linestring_to_multipoint(linestring): + if isinstance(linestring, LineString): + return MultiPoint(linestring.coords) + return linestring + + # Convert each LineString to a MultiPoint + gdf["geometry"] = gdf["geometry"].apply(linestring_to_multipoint) + + return gdf + + def get_epsg_from_geometry(geometry: "shapely.geometry.polygon.Polygon") -> int: """Uses geometry of shapely rectangle in crs 4326 to return the most accurate utm code as a string of format 'epsg:utm_code' @@ -1645,7 +1667,7 @@ def save_extracted_shorelines( geomtype="lines", ) - # Save extracted shorelines as a GeoJSON file + # Save extracted shorelines to GeoJSON files extracted_shorelines.to_file( save_path, "extracted_shorelines_lines.geojson", extracted_shorelines_gdf_lines ) diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index f733f365..2b478ddc 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -285,6 +285,12 @@ def combine_satellite_data(satellite_data: dict) -> dict: # Fill the satellite_data dict for satname, sat_data in satellite_data.items(): + satellite_data[satname].setdefault("dates", []) + satellite_data[satname].setdefault("geoaccuracy", []) + satellite_data[satname].setdefault("shorelines", []) + satellite_data[satname].setdefault("cloud_cover", []) + satellite_data[satname].setdefault("filename", []) + satellite_data[satname].setdefault("idx", []) satellite_data[satname].setdefault("dates", []) satellite_data[satname].setdefault("geoaccuracy", []) satellite_data[satname].setdefault("shorelines", []) diff --git a/src/coastseg/map_UI.py b/src/coastseg/map_UI.py index 00ed9978..e8776fd7 100644 --- a/src/coastseg/map_UI.py +++ b/src/coastseg/map_UI.py @@ -612,7 +612,7 @@ def get_beach_area_slider(self): self.beach_area_slider = ipywidgets.IntSlider( value=1000, - min=10, + min=5, max=10000, step=10, description="min_beach_area (sqm):", @@ -741,7 +741,7 @@ def get_min_length_sl_slider(self): self.min_length_sl_slider = ipywidgets.IntSlider( value=500, - min=50, + min=5, max=1000, step=1, description="min_length_sl (m):", diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 988ddffb..b026f719 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -168,7 +168,7 @@ def get_beach_area_slider(self): self.beach_area_slider = ipywidgets.IntSlider( value=4500, - min=100, + min=5, max=10000, step=10, description="min_beach_area (sqm):", @@ -233,7 +233,7 @@ def get_cloud_slider(self): self.cloud_slider = ipywidgets.IntSlider( value=300, - min=100, + min=0, max=1000, step=1, description="dist_clouds (m):", From 9b42a8a9fe663cfd6049f0dea8115feb86425ca3 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 2 Nov 2023 20:14:46 -0700 Subject: [PATCH 15/87] hotfix: get coastseg back to original --- src/coastseg/coastseg_map.py | 261 ++++++++++++++--------------------- src/coastseg/map_UI.py | 34 ++--- 2 files changed, 116 insertions(+), 179 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index b0ffc980..3718c050 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -52,32 +52,50 @@ class IDContainer(traitlets.HasTraits): class ExtractShorelinesContainer(traitlets.HasTraits): - # ROI_ids = traitlets.List(trait=traitlets.Unicode()) - # list of shorelines that can be loaded - load_list = traitlets.List(trait=traitlets.Unicode()) - # list of shorelines that will be thrown away - trash_list = traitlets.List(trait=traitlets.Unicode()) - - def __init__(self, load_list_widget=None, trash_list_widget=None): + max_shorelines = traitlets.Int(0) + layer_name = traitlets.Unicode("") + # geo_data = traitlets.Instance(klass=dict) + geo_data = traitlets.Instance(GeoJSON) + satname = traitlets.Unicode("") + date = traitlets.Unicode("") + + def __init__( + self, + geo_data: GeoJSON = GeoJSON(data={}), + ): super().__init__() - if load_list_widget: - self.link_load_list(load_list_widget) - if trash_list_widget: - self.link_trash_list(trash_list_widget) - # Link the widgets and the traits - # traitlets.dlink((self, 'ROI_ids'), (ROI_ids_widget, 'options')) - - def link_load_list(self, widget): - if hasattr(widget, "options"): - traitlets.dlink((self, "load_list"), (widget, "options")) - - def link_trash_list(self, widget): - if hasattr(widget, "options"): - traitlets.dlink((self, "trash_list"), (widget, "options")) + if geo_data: + self.geo_data = geo_data + self.observe(self._on_geo_data_changed, names="geo_data") + + @traitlets.validate("satname") + def _validate_satname(self, proposal): + if isinstance(proposal["value"], str): + if proposal["value"] in set(["", "L5", "L7", "L8", "L9", "S2"]): + return proposal["value"] + else: + raise traitlets.TraitError( + f"{proposal['value']}, satname must be one of the following L5,L7,L8,L9 or S2" + ) + else: + raise traitlets.TraitError("satname must a be str") + + def _on_geo_data_changed(self, change): + # change['new'] is a GeoJSON object with the methods .data and .name + if change["new"].data == {}: + self.layer_name = "" + self.satname = "" + self.date = "" + else: + self.layer_name = change["new"].name + properties = change["new"].data.get("properties", {}) + if properties: + self.satname = properties.get("satname", "") + self.date = properties.get("date", "") class CoastSeg_Map: - def __init__(self, **kwargs): + def __init__(self): # Basic settings and configurations self.settings = {} self.set_settings() @@ -140,73 +158,6 @@ def get_session_name(self): def set_session_name(self, name: str): self.session_name = name - def load_extracted_shoreline_layer(self, gdf, layer_name, style): - map_crs = "epsg:4326" - # create a layer with the extracted shorelines selected - points_gdf = extracted_shoreline.convert_linestrings_to_multipoints(gdf) - projected_gdf = points_gdf.to_crs(map_crs) - projected_gdf = common.stringify_datetime_columns(projected_gdf) - features_json = json.loads(projected_gdf.to_json()) - # create an ipyleaflet GeoJSON layer with the extracted shorelines selected - new_layer = GeoJSON( - data=features_json, name=layer_name, style=style, point_style=style - ) - self.replace_layer_by_name(layer_name, new_layer, on_hover=None, on_click=None) - - def delete_selected_shorelines( - self, layer_name: str, selected_shorelines: List = None - ) -> None: - if selected_shorelines: - pass - # this will remove the selected shorelines from the files - # do some fancy logic to remove the selected shorelines from the files - print(f"Deleting {selected_shorelines} ") - self.remove_layer_by_name(layer_name) - - def load_selected_shorelines_on_map( - self, selected_shorelines: List, layer_name: str, style: dict - ) -> None: - def get_selected_shorelines(gdf, selected_items) -> gpd.GeoDataFrame: - # Filtering criteria - frames = [] # List to collect filtered frames - - # Loop through each dictionary in dates_tuple - for criteria in list(selected_items): - satname, dates = criteria.split("_") - # print(f"satname: {satname} dates: {dates}") - filtered = gdf[ - (gdf["date"] == datetime.strptime(dates, "%Y-%m-%d-%H-%M-%S")) - & (gdf["satname"] == satname) - ] - frames.append(filtered) - - # Concatenate the frames to get the final result - filtered_gdf = gpd.GeoDataFrame(columns=["geometry"]) - filtered_gdf.crs = "epsg:4326" - if frames: - filtered_gdf = pd.concat(frames) - return filtered_gdf - - # @todo pass in the ROI ID that the extracted shorelines are from - # @todo this code is temporary - # Get the list of the ROI IDs that have extracted shorelines - ids_with_extracted_shorelines = self.get_roi_ids(has_extracted_shorelines=True) - if ids_with_extracted_shorelines == []: - logger.warning("No ROIs with extracted shorelines found") - return - # select the first ROI ID with extracted shorelines - selected_id = ids_with_extracted_shorelines[0] - # ------------------------------------------- - # load the extracted shorelines for the selected ROI ID - extracted_shorelines = self.rois.get_extracted_shoreline(selected_id) - - # get the geodataframe for the extracted shorelines - selected_gdf = get_selected_shorelines( - extracted_shorelines.gdf, selected_shorelines - ) - if not selected_gdf.empty: - self.load_extracted_shoreline_layer(selected_gdf, layer_name, style) - def create_map(self): """create an interactive map object using the map_settings Returns: @@ -411,8 +362,6 @@ def load_session_from_directory(self, dir_path: str) -> None: # add extracted shoreline and transect intersections to ROI they were extracted from self.rois.add_cross_shore_distances(cross_distances, roi_id) - self.load_extracted_shorelines_to_map(1) - def load_fresh_session(self, session_path: str) -> None: # remove all the old features from the map self.remove_all() @@ -1881,12 +1830,12 @@ def remove_extracted_shorelines(self): # remove extracted shoreline vectors from the map self.remove_extracted_shoreline_layers() self.id_container.ids = [] - self.extract_shorelines_container.load_list = [] - self.extract_shorelines_container.trash_list = [] + self.extract_shorelines_container.max_shorelines = 0 def remove_extracted_shoreline_layers(self): - self.remove_layer_by_name("delete") - self.remove_layer_by_name("extracted shoreline") + if self.extract_shorelines_container.geo_data.data != {}: + self.remove_layer_by_name(self.extract_shorelines_container.geo_data.name) + self.extract_shorelines_container.geo_data = GeoJSON(data={}) def remove_bbox(self): """Remove all the bounding boxes from the map""" @@ -2070,29 +2019,6 @@ def load_extracted_shoreline_by_id(self, selected_id: str, row_number: int = 0): # if extracted shorelines exist, load them onto map, if none exist nothing loads self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) - def get_roi_ids( - self, has_extracted_shorelines: bool = False, raise_exception: bool = True - ) -> list: - matching_ids = [] - # Check if any ROIs are loaded - if raise_exception: - if self.rois is None: - logger.warning("No ROIs found. Please load ROIs.") - raise Exception("No ROIs found. Please load ROIs.") - if has_extracted_shorelines: - # Get the list of the ROI IDs that have extracted shorelines - ids_with_extracted_shorelines = ( - self.rois.get_ids_with_extracted_shorelines() - ) - matching_ids = [ - id - for id in ids_with_extracted_shorelines - if self.rois.get_extracted_shoreline(id) is not None - ] - else: - matching_ids = self.get_all_roi_ids() - return matching_ids - def load_extracted_shorelines_to_map(self, row_number: int = 0) -> None: """Loads stylized extracted shorelines onto the map for a single selected region of interest (ROI). @@ -2105,38 +2031,53 @@ def load_extracted_shorelines_to_map(self, row_number: int = 0) -> None: Returns: None: This function does not return anything, but rather loads the extracted shorelines onto the map. """ - # Get the list of the ROI IDs that have extracted shorelines - ids_with_extracted_shorelines = self.get_roi_ids(has_extracted_shorelines=True) + + logger.info(f"row_number: {row_number}") + # Remove any existing extracted shorelines self.remove_extracted_shoreline_layers() - # if no ROIs have extracted shorelines, return otherwise load extracted shorelines for the first ROI ID with extracted shorelines - if not ids_with_extracted_shorelines: - self.id_container.ids = [] + # Check if any ROIs are loaded + if self.rois is None: + logger.warning("No ROIs found. Please load ROIs.") + raise Exception("No ROIs found. Please load ROIs.") + + # Get the extracted shorelines for all ROIs + ids_with_extracted_shorelines = self.rois.get_ids_with_extracted_shorelines() + + # Get the available ROI IDs + available_ids = self.get_all_roi_ids() + + if not available_ids: + logger.warning("No ROIs found. Please load ROIs.") + raise Exception("No ROIs found. Please load ROIs.") + + # Find ROI IDs with extracted shorelines + roi_ids_with_extracted_shorelines = set(available_ids).intersection( + ids_with_extracted_shorelines + ) + + if not roi_ids_with_extracted_shorelines: logger.warning("No ROIs found with extracted shorelines.") return - else: + + if ids_with_extracted_shorelines is None: + self.id_container.ids = [] + elif not isinstance(ids_with_extracted_shorelines, list): self.id_container.ids = list(ids_with_extracted_shorelines) + else: + self.id_container.ids = ids_with_extracted_shorelines # Load extracted shorelines for the first ROI ID with extracted shorelines + for selected_id in roi_ids_with_extracted_shorelines: + extracted_shorelines = self.rois.get_extracted_shoreline(selected_id) + logger.info( + f"ROI ID {selected_id} extracted shorelines {extracted_shorelines}" + ) - # select the first ROI ID with extracted shorelines - selected_id = ids_with_extracted_shorelines[0] - # load the extracted shorelines for the selected ROI ID - extracted_shorelines = self.rois.get_extracted_shoreline(selected_id) - logger.info(f"ROI ID {selected_id} extracted shorelines {extracted_shorelines}") - # if extracted shorelines exist, load them onto map, if none exist nothing loads - if hasattr(extracted_shorelines, "gdf"): - if not extracted_shorelines.gdf.empty: - self.extract_shorelines_container.load_list = ( - extracted_shorelines.gdf["satname"] - + "_" - + extracted_shorelines.gdf["date"].apply( - lambda x: x.strftime("%Y-%m-%d-%H-%M-%S") - ) - ).tolist() - - self.extract_shorelines_container.trash_list = [] - self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) + if extracted_shorelines is not None: + logger.info(f"Extracted shorelines found for ROI {selected_id}") + self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) + break def load_extracted_shorelines_on_map( self, @@ -2152,19 +2093,31 @@ def load_extracted_shorelines_on_map( """ if extracted_shorelines is None: return - style = { - "color": "#001aff", # Outline color - "opacity": 1, # opacity 1 means no transparency - "weight": 3, # Width - "fillColor": "#001aff", # Fill color - "fillOpacity": 0.8, # Fill opacity. - "radius": 1, - } - # create the extracted shoreline layer and add it to the map - layer_name = "extracted shoreline" - self.load_extracted_shoreline_layer( - extracted_shorelines.gdf.iloc[[row_number]], layer_name, style + # Loads stylized extracted shorelines onto map for single roi + logger.info(f"row_number: {row_number}") + # Convert the extracted shoreline's geometry to points + points_gdf = extracted_shoreline.convert_linestrings_to_multipoints( + extracted_shorelines.gdf + ) + self.extract_shorelines_container.max_shorelines = len(points_gdf) - 1 + + new_layer = extracted_shorelines.get_styled_layer( + points_gdf, + row_number, + style={ + "radius": 1, + }, ) + layer_name = extracted_shorelines.get_layer_name() + logger.info( + f"Extracted shoreline layer: {new_layer}\n" + f"Layer name: {layer_name}\n" + f"Extracted shoreline layers: {new_layer}\n" + ) + # new_layer.on_hover(self.update_extracted_shoreline_html) + self.map.add_layer(new_layer) + # update the extracted shoreline layer and number of shorelines available + self.extract_shorelines_container.geo_data = new_layer def load_feature_on_map( self, feature_name: str, file: str = "", gdf: gpd.GeoDataFrame = None, **kwargs diff --git a/src/coastseg/map_UI.py b/src/coastseg/map_UI.py index e8776fd7..5ace7405 100644 --- a/src/coastseg/map_UI.py +++ b/src/coastseg/map_UI.py @@ -7,7 +7,8 @@ from coastseg import exception_handler from coastseg import common from coastseg import file_utilities -from coastseg.extract_shorelines_widget import Extracted_Shoreline_widget +from coastseg.watchable_slider import Extracted_Shoreline_widget + # External Python imports import ipywidgets @@ -67,36 +68,19 @@ class UI: download_view = Output(layout={"border": "1px solid black"}) preview_view = Output() - def __init__(self, coastseg_map, **kwargs): + def __init__(self, coastseg_map): # save an instance of coastseg_map self.coastseg_map = coastseg_map self.session_name = "" self.session_directory = "" - # extracted_shoreline_traitlet = kwargs.get("extracted_shoreline_traitlet",None) - # if extracted_shoreline_traitlet: - # self.extract_shorelines_widget = Extracted_Shoreline_widget() - # create the extract shorelines widget that controls shorelines on the map - self.extract_shorelines_widget = Extracted_Shoreline_widget( - coastseg_map.extract_shorelines_container - ) - # add callbacks to the extract shorelines widget - self.extract_shorelines_widget.add_load_callback( - coastseg_map.load_selected_shorelines_on_map - ) - self.extract_shorelines_widget.add_remove_all_callback( - coastseg_map.delete_selected_shorelines - ) - self.extract_shorelines_widget.add_remove_callback( - coastseg_map.remove_layer_by_name - ) - # link the widgets to the traitlets - coastseg_map.extract_shorelines_container.link_load_list( - self.extract_shorelines_widget.load_list_widget - ) - coastseg_map.extract_shorelines_container.link_trash_list( - self.extract_shorelines_widget.trash_list_widget + # the widget will update whenever the value of the extracted_shoreline_layer or number_extracted_shorelines changes + self.extract_shorelines_widget = Extracted_Shoreline_widget(self.coastseg_map) + # have the slider watch the extracted_shoreline_layer, number_extracted_shorelines,roi_selected_to_extract_shoreline + + self.extract_shorelines_widget.set_load_extracted_shorelines_button_on_click( + self.coastseg_map.load_extracted_shorelines_to_map ) # create button styles From 2d22ae4c20b970760f8e433c13e1d6ff12174e86 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 2 Nov 2023 20:18:03 -0700 Subject: [PATCH 16/87] v1.1.14 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 14e19ccf..de3846d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.13" +version = "1.1.14" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From 0a36e8b0093e58234da1a43171106ac1363682e7 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 7 Nov 2023 12:24:25 -0800 Subject: [PATCH 17/87] convert_linestrings_to_multipoints use all --- src/coastseg/coastseg_map.py | 4 +--- src/coastseg/common.py | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index f93ad0e0..bad4db69 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -239,9 +239,7 @@ def compute_tidal_corrections( logger.info( f"Computing tides for ROIs {roi_ids} beach_slope: {beach_slope} reference_elevation: {reference_elevation}" ) - # session_path = file_utilities.get_session_location( - # session_name=session_name, raise_error=True - # ) + session_name = self.get_session_name() try: tide_correction.correct_all_tides( diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 387c2d8f..c262c570 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -1466,6 +1466,24 @@ def get_transect_points_dict(feature: gpd.geodataframe) -> dict: def get_cross_distance_df( extracted_shorelines: dict, cross_distance_transects: dict ) -> pd.DataFrame: + """ + Creates a DataFrame from extracted shorelines and cross distance transects by + getting the dates from extracted shorelines and saving it to the as the intersection time for each extracted shoreline + for each transect + + Parameters: + extracted_shorelines : dict + A dictionary containing the extracted shorelines. It must have a "dates" key with a list of dates. + cross_distance_transects : dict + A dictionary containing the transects and the cross distance where the extracted shorelines intersected it. The keys are transect names and the values are lists of cross distances. + eg. + { 'tranect 1': [1,2,3], + 'tranect 2': [4,5,6], + } + Returns: + DataFrame + A DataFrame where each column is a transect from cross_distance_transects and the "dates" column from extracted_shorelines. Each row corresponds to a date and contains the cross distances for each transect on that date. + """ transects_csv = {} # copy dates from extracted shoreline transects_csv["dates"] = extracted_shorelines["dates"] @@ -1609,8 +1627,8 @@ def convert_linestrings_to_multipoints(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFram already contains MultiPoints, the original GeoDataFrame is returned. """ - # Check if the gdf already contains MultiPoints - if any(gdf.geometry.type == "MultiPoint"): + # Check if all geometries in the gdf are MultiPoints + if all(gdf.geometry.type == "MultiPoint"): return gdf def linestring_to_multipoint(linestring): From 75a271f5db90405cfbc028c3c49fa978376c8089 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 7 Nov 2023 16:05:24 -0800 Subject: [PATCH 18/87] #200 pixel_size_per_satellite L5 30m to 15m --- src/coastseg/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 0639f9ec..f42149d9 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -282,7 +282,7 @@ def filter_images( "L7": 15, "L8": 15, "L9": 15, - "L5": 30, + "L5": 15, # coastsat modifies the per pixel resolution from 30m to 15m for L5 } bad_files = [] jpg_files = [ From db513af570c6a49e1bdaef5da979154319455396 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 7 Nov 2023 18:44:47 -0800 Subject: [PATCH 19/87] #200 write new tests for filter images --- src/coastseg/common.py | 33 ++++++++++++++++++++++----------- tests/conftest.py | 32 +++++++++++++++++++++++++++----- tests/test_common.py | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index f42149d9..408c04e1 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -301,23 +301,34 @@ def filter_images( continue filepath = os.path.join(directory, file) - with Image.open(filepath) as img: - width, height = img.size - img_area = ( - width - * pixel_size_per_satellite[satname] - * height - * pixel_size_per_satellite[satname] - ) - img_area /= 1e6 # convert to square kilometers - if img_area > max_area or img_area < min_area: - bad_files.append(file) + img_area = calculate_image_area(filepath, pixel_size_per_satellite[satname]) + if img_area < min_area or (max_area is not None and img_area > max_area): + bad_files.append(file) + bad_files = list(map(lambda s: os.path.join(directory, s), bad_files)) # move the bad files to the bad folder file_utilities.move_files(bad_files, output_directory) return bad_files # Optionally return the list of bad files +def calculate_image_area(filepath: str, pixel_size: int) -> float: + """ + Calculate the area of an image in square kilometers. + + Args: + filepath (str): The path to the image file. + pixel_size (int): The size of a pixel in the image in meters. + + Returns: + float: The area of the image in square kilometers. + """ + with Image.open(filepath) as img: + width, height = img.size + img_area = width * pixel_size * height * pixel_size + img_area /= 1e6 # convert to square kilometers + return img_area + + def validate_geometry_types( gdf: gpd.GeoDataFrame, valid_types: set, diff --git a/tests/conftest.py b/tests/conftest.py index 782687ec..b6eb75c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -56,6 +56,7 @@ def valid_geojson_path(geojson_directory): gdf.to_file(file_path, driver="GeoJSON") return file_path + @pytest.fixture(scope="session") def config_gdf_missing_rois_path(geojson_directory): """Create a valid geojson file and return its path.""" @@ -82,6 +83,7 @@ def config_gdf_missing_rois_path(geojson_directory): gdf.to_file(file_path, driver="GeoJSON") return file_path + @pytest.fixture(scope="session") def empty_geojson_path(geojson_directory): """Create an empty geojson file and return its path.""" @@ -107,12 +109,32 @@ def setup_image_directory(tmpdir): # Create dummy images for different satellites based on the new naming scheme sizes = { - "S2": (10, 50), - "L7": (66, 66), - "L8": (66, 66), - "L9": (66, 66), - "L5": (10, 33), + "S2": (200, 200), # make this image too small 4.0`km^2 + "L7": (320, 348), + "L8": (320, 348), + "L9": (320, 348), + "L5": (100, 100), # make this image too small 2.5`km^2 + } + for sat, size in sizes.items(): + img = Image.new("RGB", size, "white") + img.save(os.path.join(tmpdir, f"dummy_prefix_{sat}_image.jpg")) + + return tmpdir + + +@pytest.fixture +def setup_good_image_directory(tmpdir): + os.makedirs(tmpdir, exist_ok=True) + + # Create dummy images for different satellites that are all equivalent to 25 km^2 + sizes = { + "S2": (500, 500), + "L7": (320, 348), + "L8": (320, 348), + "L9": (320, 348), + "L5": (320, 348), } + # the area for all these images is 25 km^2 for sat, size in sizes.items(): img = Image.new("RGB", size, "white") img.save(os.path.join(tmpdir, f"dummy_prefix_{sat}_image.jpg")) diff --git a/tests/test_common.py b/tests/test_common.py index 6d71c35b..e5cd1666 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -81,8 +81,9 @@ def test_invalid_projection(): def test_filter_images_existing_directory(setup_image_directory): + # min area is 60% of area 25km^2 and max area is 150% of area 25km^2 bad_images = common.filter_images( - 0.8, 1.5, setup_image_directory, setup_image_directory.join("bad") + 15, 30, setup_image_directory, setup_image_directory.join("bad") ) assert len(bad_images) == 2 @@ -94,6 +95,14 @@ def test_filter_images_existing_directory(setup_image_directory): ) +def test_filter_images_all_good_images(setup_good_image_directory): + # min area is 60% of area 25km^2 and max area is 150% of area 25km^2 + bad_images = common.filter_images( + 15, 30, setup_good_image_directory, setup_good_image_directory.join("bad") + ) + assert len(bad_images) == 0 + + def test_filter_images_non_existing_directory(): with pytest.raises(FileNotFoundError): common.filter_images(0.8, 1.5, "non_existing_path", "some_output_path") @@ -104,8 +113,27 @@ def test_filter_images_no_jpg_files_found(tmpdir): assert len(bad_files) == 0 +def test_filter_images_no_output_directory_provided_no_max_area(setup_image_directory): + # min area is 60% of area 25km^2 and max area is None + bad_images = common.filter_images(15, None, setup_image_directory) + assert len(bad_images) == 2 + assert ( + os.path.join(setup_image_directory, "dummy_prefix_S2_image.jpg") in bad_images + ) + assert ( + os.path.join(setup_image_directory, "dummy_prefix_L5_image.jpg") in bad_images + ) + assert os.path.exists( + os.path.join(setup_image_directory, "bad", "dummy_prefix_S2_image.jpg") + ) + assert os.path.exists( + os.path.join(setup_image_directory, "bad", "dummy_prefix_L5_image.jpg") + ) + + def test_filter_images_no_output_directory_provided(setup_image_directory): - bad_images = common.filter_images(0.8, 1.5, setup_image_directory) + # min area is 60% of area 25km^2 and max area is 150% of area 25km^2 + bad_images = common.filter_images(15, 30, setup_image_directory) assert len(bad_images) == 2 assert ( os.path.join(setup_image_directory, "dummy_prefix_S2_image.jpg") in bad_images From 4378131b179bcc6e8431bbb5a997a515e0cc0408 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 8 Nov 2023 20:09:55 -0800 Subject: [PATCH 20/87] #202 coastsat_package 0.1.27 handles request size --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index de3846d2..b0c9bf14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.26", + "coastsat-package>=0.1.27", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From bb7db5001fe6682e6d3437a11bf72ea7ed39b1dd Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 8 Nov 2023 20:17:19 -0800 Subject: [PATCH 21/87] #200 test filter_images bad images --- tests/conftest.py | 19 +++++++++++++++++++ tests/test_common.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index b6eb75c9..6c01d9a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -122,6 +122,25 @@ def setup_image_directory(tmpdir): return tmpdir +@pytest.fixture +def setup_image_directory_bad_images(tmpdir): + os.makedirs(tmpdir, exist_ok=True) + + # Create dummy images for different satellites based on the new naming scheme + sizes = { + "S2": (380, 390), # make this image too small 14.82`km^2 + "L7": (200, 320), # make this image too small 14.4 + "L8": (320, 100), # make this image too small 7.2 + "L9": (320, 150), # make this image too small 10.8 + "L5": (200, 320), # make this image too small 14.4`km^2 + } + for sat, size in sizes.items(): + img = Image.new("RGB", size, "white") + img.save(os.path.join(tmpdir, f"dummy_prefix_{sat}_image.jpg")) + + return tmpdir + + @pytest.fixture def setup_good_image_directory(tmpdir): os.makedirs(tmpdir, exist_ok=True) diff --git a/tests/test_common.py b/tests/test_common.py index e5cd1666..c5add729 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -95,6 +95,38 @@ def test_filter_images_existing_directory(setup_image_directory): ) +def test_filter_images_existing_directory_bad_images(setup_image_directory_bad_images): + # min area is 60% of area 25km^2 and max area is 150% of area 25km^2 + bad_images = common.filter_images( + 15, + 30, + setup_image_directory_bad_images, + setup_image_directory_bad_images.join("bad"), + ) + assert len(bad_images) == 5 + + assert ( + os.path.join(setup_image_directory_bad_images, "dummy_prefix_S2_image.jpg") + in bad_images + ) + assert ( + os.path.join(setup_image_directory_bad_images, "dummy_prefix_L5_image.jpg") + in bad_images + ) + assert ( + os.path.join(setup_image_directory_bad_images, "dummy_prefix_L7_image.jpg") + in bad_images + ) + assert ( + os.path.join(setup_image_directory_bad_images, "dummy_prefix_L8_image.jpg") + in bad_images + ) + assert ( + os.path.join(setup_image_directory_bad_images, "dummy_prefix_L9_image.jpg") + in bad_images + ) + + def test_filter_images_all_good_images(setup_good_image_directory): # min area is 60% of area 25km^2 and max area is 150% of area 25km^2 bad_images = common.filter_images( From 0786da9de98c4eec4026bf6a4595d15ab02ac1c0 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 8 Nov 2023 20:27:10 -0800 Subject: [PATCH 22/87] v1.1.15 #200 #202 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b0c9bf14..815498f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.14" +version = "1.1.15" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From 953891ec684b89a29425fe9d7440fa410eadbc08 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 13 Nov 2023 10:09:37 -0800 Subject: [PATCH 23/87] coastsat 0.1.28 plot ref shoreline buffer v1.1.16 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 815498f3..c88dc3e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,14 +5,14 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.15" +version = "1.1.16" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.27", + "coastsat-package>=0.1.28", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From 3a6b3cd21602088e9a181a42feecc92e7750566d Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 14 Nov 2023 11:02:30 -0800 Subject: [PATCH 24/87] v1.1.17 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c88dc3e7..61d4d0d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,14 +5,14 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.16" +version = "1.1.17" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.28", + "coastsat-package>=0.1.31", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From af3d90968dcc5bd8bf9c37e7182801f46fc373b8 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 15 Nov 2023 11:09:31 -0800 Subject: [PATCH 25/87] v1.1.18 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 61d4d0d3..1e494660 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,14 +5,14 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.17" +version = "1.1.18" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.31", + "coastsat-package>=0.1.33", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From f02adcdefd29eeee28d3663682ff728d7c6f81b0 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 15 Nov 2023 12:16:58 -0800 Subject: [PATCH 26/87] 1.1.19 plot ref shoreline buffer as red --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1e494660..34ecae3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,14 +5,14 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.18" +version = "1.1.19" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.33", + "coastsat-package>=0.1.34", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From d8c8f0172c3bbb5101331fc3ac94e2dcdf9451aa Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 17 Nov 2023 12:12:46 -0800 Subject: [PATCH 27/87] work in progress testing merge_utils --- ..._session_final_nonoverlapping_script.ipynb | 5631 +++++++++++++++++ src/coastseg/merge_utils.py | 644 ++ tests/test_merge_utils.py | 0 3 files changed, 6275 insertions(+) create mode 100644 merge_session_final_nonoverlapping_script.ipynb create mode 100644 src/coastseg/merge_utils.py create mode 100644 tests/test_merge_utils.py diff --git a/merge_session_final_nonoverlapping_script.ipynb b/merge_session_final_nonoverlapping_script.ipynb new file mode 100644 index 00000000..486d81d6 --- /dev/null +++ b/merge_session_final_nonoverlapping_script.ipynb @@ -0,0 +1,5631 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging Sessions Script\n", + "\n", + "This script is used to merge two or more sessions, provided they do not contain overlapping regions of interest (ROIs).\n", + "\n", + "### Prerequisites:\n", + "- Paths to two session directories with extracted shorelines.\n", + "- The desired name for the merged session directory that will be saved in the `sessions` directory.\n", + "\n", + "### Optional:\n", + "- A `config.json` file with transect settings for calculating shoreline-transect intersections.\n", + "\n", + "### Instructions:\n", + "1. Enter the paths to the session directories below:\n", + " ``` python\n", + " session_locations=[\n", + " '',\n", + " ''\n", + " ]\n", + " ```\n", + " Example:\n", + " - Notice that because these are Windows locations we put `r` at the beginning of each location\n", + " ``` python\n", + " session_locations=[\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_13_datetime06-05-23__04_16_45',\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_12_datetime06-05-23__04_16_45'\n", + " ]\n", + " ```\n", + "2. Specify the name for the merged session directory:\n", + " - `merged_session_directory`: `\"\"`\n", + "\n", + "3. (Optional) If you want to use your own advanced settings in a `config.json` file, include its path:\n", + " - `config_file`: `\"\"`\n", + "\n", + "With the above information, the script can be executed to merge the specified sessions into a single session directory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "session_locations=[r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_13_datetime06-05-23__04_16_45',\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_12_datetime06-05-23__04_16_45']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "merged_session_directory='merged_session_test1'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\n", + "Merged session will be saved to c:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\merged_session_test1\n" + ] + } + ], + "source": [ + "import os\n", + "# enter the location of your sessions directory if this is not correct\n", + "sessions_directory = os.path.join(os.getcwd(), 'sessions')\n", + "print(sessions_directory)\n", + "merged_session_location = os.path.join(sessions_directory, merged_session_directory)\n", + "os.makedirs(merged_session_location, exist_ok=True)\n", + "\n", + "print(f\"Merged session will be saved to {merged_session_location}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shoreline-Transect Intersection Analysis Settings\n", + "\n", + "The default settings listed below should suffice for most use cases to find where extracted shorelines intersect transects. However, if you modified the advanced settings then you will need to adjust the settings.\n", + "\n", + "\n", + "Most users will want to just use the default settings listed below." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "settings_transects ={\n", + " \"along_dist\": 25, # along-shore distance to use for computing the intersection\n", + " \"min_points\": 3, # minimum number of shoreline points to calculate an intersection\n", + " \"max_std\": 15, # max std for points around transect\n", + " \"max_range\": 30, # max range for points around transect\n", + " \"min_chainage\": -100, # largest negative value along transect (landwards of transect origin)\n", + " \"multiple_inter\": \"auto\", # mode for removing outliers ('auto', 'nan', 'max')\n", + " \"prc_multiple\": 0.1, # percentage of the time that multiple intersects are present to use the max\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "import os\n", + "from typing import List, Union\n", + "\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from shapely.geometry import LineString, MultiLineString, MultiPoint\n", + "from shapely.ops import unary_union\n", + "\n", + "from coastseg import geodata_processing\n", + "from coastseg.file_utilities import to_file\n", + "from coastseg.common import get_cross_distance_df\n", + "from coastseg.common import convert_linestrings_to_multipoints, stringify_datetime_columns\n", + "from coastsat import SDS_transects\n", + "\n", + "def convert_multipoints_to_linestrings(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Convert MultiPoint geometries in a GeoDataFrame to LineString geometries.\n", + "\n", + " Args:\n", + " - gdf (gpd.GeoDataFrame): The input GeoDataFrame.\n", + "\n", + " Returns:\n", + " - gpd.GeoDataFrame: A new GeoDataFrame with LineString geometries. If the input GeoDataFrame\n", + " already contains LineStrings, the original GeoDataFrame is returned.\n", + " \"\"\"\n", + "\n", + " # Create a copy of the GeoDataFrame\n", + " gdf_copy = gdf.copy()\n", + "\n", + " # Check if all geometries in the gdf are LineStrings\n", + " if all(gdf_copy.geometry.type == \"LineString\"):\n", + " return gdf_copy\n", + "\n", + " def multipoint_to_linestring(multipoint):\n", + " if isinstance(multipoint, MultiPoint):\n", + " return LineString(multipoint.geoms)\n", + " return multipoint\n", + "\n", + " # Convert each MultiPoint to a LineString\n", + " gdf_copy[\"geometry\"] = gdf_copy[\"geometry\"].apply(multipoint_to_linestring)\n", + "\n", + " return gdf_copy\n", + "\n", + "def dataframe_to_dict(df: pd.DataFrame, key_map: dict) -> dict:\n", + " \"\"\"\n", + " Converts a DataFrame to a dictionary, with specific mapping between dictionary keys and DataFrame columns.\n", + "\n", + " Parameters:\n", + " df : DataFrame\n", + " The DataFrame to convert.\n", + " key_map : dict\n", + " A dictionary where keys are the desired dictionary keys and values are the corresponding DataFrame column names.\n", + "\n", + " Returns:\n", + " dict\n", + " The resulting dictionary.\n", + " \"\"\"\n", + " result_dict = defaultdict(list)\n", + "\n", + " for dict_key, df_key in key_map.items():\n", + " if df_key in df.columns:\n", + " if df_key == 'date':\n", + " # Assumes the column to be converted to date is the one specified in the mapping with key 'date'\n", + " result_dict[dict_key] = list(df[df_key].apply(lambda x: x.strftime(\"%Y-%m-%d %H:%M:%S\") if pd.notnull(x) else None))\n", + " elif df_key == 'geometry':\n", + " # Assumes the column to be converted to geometry is the one specified in the mapping with key 'geometry'\n", + " result_dict[dict_key] = list(df[df_key].apply(lambda x: np.array([list(point.coords[0]) for point in x.geoms]) if pd.notnull(x) else None))\n", + " else:\n", + " result_dict[dict_key] = list(df[df_key])\n", + " \n", + " return dict(result_dict)\n", + "\n", + "def convert_lines_to_multipoints(gdf):\n", + " # Create a copy of the input GeoDataFrame to avoid modifying it in place\n", + " gdf = gdf.copy()\n", + "\n", + " # Define a function to convert LineString or MultiLineString to MultiPoint\n", + " def line_to_multipoint(geometry):\n", + " if isinstance(geometry, LineString):\n", + " return MultiPoint(geometry.coords)\n", + " elif isinstance(geometry, MultiLineString):\n", + " points = [MultiPoint(line.coords) for line in geometry]\n", + " return MultiPoint([point for multi in points for point in multi])\n", + " else:\n", + " return geometry # Return the original geometry if it's not a LineString or MultiLineString\n", + "\n", + " # Apply the conversion function to each row in the GeoDataFrame\n", + " gdf['geometry'] = gdf['geometry'].apply(line_to_multipoint)\n", + "\n", + " return gdf\n", + "\n", + "def merge_geodataframes(on, how='inner', aggregation_funcs=None,crs='epsg:4326', *gdfs):\n", + " \"\"\"\n", + " Merges multiple GeoDataFrames based on a common column.\n", + " \n", + " Parameters:\n", + " on : str or list of str\n", + " Column name or list of column names to merge on.\n", + " how : str, optional\n", + " Type of merge to be performed (default is 'inner').\n", + " aggregation_funcs : dict, optional\n", + " Dictionary of column names to aggregation functions.\n", + " Example: for the columns 'cloud_cover' and 'geoaccuracy', the mean aggregation function can be specified as:\n", + " aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + " *gdfs : GeoDataFrames\n", + " Variable number of GeoDataFrames to be merged.\n", + " \n", + " Returns:\n", + " GeoDataFrame\n", + " The merged GeoDataFrame with aggregated columns as specified.\n", + " \"\"\"\n", + " if len(gdfs) < 2:\n", + " raise ValueError(\"At least two GeoDataFrames must be provided for merging\")\n", + "\n", + " # Set default aggregation functions if none are provided\n", + " if aggregation_funcs is None:\n", + " aggregation_funcs = {}\n", + " \n", + " # Perform the merge while applying the custom aggregation functions\n", + " merged_gdf = gdfs[0]\n", + " merged_gdf.set_crs(crs)\n", + " for gdf in gdfs[1:]:\n", + " merged_gdf = pd.merge(merged_gdf, gdf, on=on, how=how, suffixes=('_left', '_right'))\n", + "\n", + " # Apply aggregation functions\n", + " for col, func in aggregation_funcs.items():\n", + " col_left = f'{col}_left'\n", + " col_right = f'{col}_right'\n", + "\n", + " # Check if the columns exist in both GeoDataFrames\n", + " if col_left in merged_gdf.columns and col_right in merged_gdf.columns:\n", + " # Apply the aggregation function and drop the original columns\n", + " merged_gdf[col] = merged_gdf[[col_left, col_right]].agg(func, axis=1)\n", + " merged_gdf = merged_gdf.drop(columns=[col_left, col_right])\n", + " \n", + " return merged_gdf\n", + "\n", + "def read_first_geojson_file(directory:str,filenames=['extracted_shorelines_lines.geojson', 'extracted_shorelines.geojson']):\n", + " # Loop over the filenames\n", + " for filename in filenames:\n", + " filepath = os.path.join(directory, filename)\n", + "\n", + " # If the file exists, read it and return the GeoDataFrame\n", + " if os.path.exists(filepath):\n", + " return geodata_processing.read_gpd_file(filepath)\n", + "\n", + " # If none of the files exist, raise an exception\n", + " raise FileNotFoundError(f\"None of the files {filenames} exist in the directory {directory}\")\n", + "\n", + "def clip_gdfs(gdfs, overlap_gdf):\n", + " \"\"\"\n", + " Clips GeoDataFrames to an overlapping region.\n", + "\n", + " Parameters:\n", + " gdfs : list of GeoDataFrames\n", + " The GeoDataFrames to be clipped.\n", + " overlap_gdf : GeoDataFrame\n", + " The overlapping region to which the GeoDataFrames will be clipped.\n", + "\n", + " Returns:\n", + " list of GeoDataFrames\n", + " The clipped GeoDataFrames.\n", + " \"\"\"\n", + " clipped_gdfs = []\n", + " for gdf in gdfs:\n", + " clipped_gdf = gpd.clip(gdf, overlap_gdf)\n", + " if not clipped_gdf.empty:\n", + " clipped_gdfs.append(clipped_gdf)\n", + " clipped_gdf.plot()\n", + " return clipped_gdfs\n", + "\n", + "def calculate_overlap(gdf):\n", + " # Check if the input GeoDataFrame is empty\n", + " if not hasattr(gdf,'empty'):\n", + " return gpd.GeoDataFrame()\n", + " if gdf.empty:\n", + " if hasattr(gdf,'crs'):\n", + " return gpd.GeoDataFrame(crs=gdf.crs)\n", + " else:\n", + " return gpd.GeoDataFrame()\n", + " \n", + " # Initialize an empty list to store the results\n", + " overlap_list = []\n", + " \n", + " # Loop over each pair of rows in gdf\n", + " for i in range(len(gdf)):\n", + " for j in range(i+1, len(gdf)):\n", + " # Check for intersection\n", + " if gdf.iloc[i].geometry.intersects(gdf.iloc[j].geometry):\n", + " # Calculate the intersection\n", + " intersection = gdf.iloc[i].geometry.intersection(gdf.iloc[j].geometry)\n", + " \n", + " # Create a new row with the intersection and append to the result list\n", + " overlap_list.append({'geometry': intersection})\n", + " \n", + " # Create a DataFrame from the results list\n", + " overlap_df = pd.DataFrame(overlap_list)\n", + " \n", + " # Convert the result DataFrame to a GeoDataFrame and set the CRS\n", + " overlap_gdf = gpd.GeoDataFrame(overlap_df, geometry='geometry', crs=gdf.crs)\n", + " \n", + " return overlap_gdf\n", + "\n", + "def average_multipoints(multipoints)->MultiPoint:\n", + " \"\"\"\n", + " Calculate the average MultiPoint geometry from a list of MultiPoint geometries.\n", + " \n", + " This function takes a list of shapely MultiPoint geometries, ensures they all have the same number of points\n", + " by padding shorter MultiPoints with their last point, and then calculates the average coordinates\n", + " for each point position across all the input MultiPoint geometries. \n", + " \n", + " The result is a new MultiPoint geometry that represents the average shape of the input MultiPoints.\n", + " \n", + " Parameters:\n", + " multipoints (list of shapely.geometry.MultiPoint): A list of shapely MultiPoint geometries to be averaged.\n", + " \n", + " Returns:\n", + " shapely.geometry.MultiPoint: A MultiPoint geometry representing the average shape of the input MultiPoints.\n", + " \n", + " Raises:\n", + " ValueError: If the input list of MultiPoint geometries is empty.\n", + " \n", + " Example:\n", + " >>> from shapely.geometry import MultiPoint\n", + " >>> multipoint1 = MultiPoint([(0, 0), (1, 1), (2, 2)])\n", + " >>> multipoint2 = MultiPoint([(1, 1), (2, 2)])\n", + " >>> multipoint3 = MultiPoint([(0, 0), (1, 1), (2, 2), (3, 3)])\n", + " >>> average_mp = average_multipoints([multipoint1, multipoint2, multipoint3])\n", + " >>> print(average_mp)\n", + " MULTIPOINT (0.3333333333333333 0.3333333333333333, 1.3333333333333333 1.3333333333333333, 2 2, 3 3)\n", + " \"\"\"\n", + " if not multipoints:\n", + " raise ValueError(\"The list of MultiPoint geometries is empty\")\n", + " \n", + " # Find the maximum number of points in any MultiPoint\n", + " max_len = max(len(mp.geoms) for mp in multipoints)\n", + " \n", + " # Pad shorter MultiPoints with their last point\n", + " padded_multipoints = []\n", + " for mp in multipoints:\n", + " if len(mp.geoms) < max_len:\n", + " padded_multipoints.append(MultiPoint(list(mp.geoms) + [mp.geoms[-1]] * (max_len - len(mp.geoms))))\n", + " else:\n", + " padded_multipoints.append(mp)\n", + " \n", + " # Calculate the average coordinates for each point\n", + " num_multipoints = len(padded_multipoints)\n", + " average_coords = []\n", + " for i in range(max_len):\n", + " avg_left = sum(mp.geoms[i].x for mp in padded_multipoints) / num_multipoints\n", + " avg_right = sum(mp.geoms[i].y for mp in padded_multipoints) / num_multipoints\n", + " average_coords.append((avg_left, avg_right))\n", + " \n", + " return MultiPoint(average_coords)\n", + "\n", + "def average_columns(df, col1, col2, new_col):\n", + " df[new_col] = df[[col1, col2]].mean(axis=1,skipna=True)\n", + " return df\n", + "\n", + "def combine_dataframes(df1, df2, join_columns):\n", + " # Perform an outer join and mark the origin of each row\n", + " all_rows = pd.merge(df1, df2, on=join_columns, how='outer', indicator=True)\n", + "\n", + " # Keep only the rows that are in 'df1' but not in 'df2'\n", + " df1_unique = all_rows[all_rows['_merge'] == 'left_only']\n", + " if 'cloud_cover_x' in df1_unique.columns and 'cloud_cover_y' in df1_unique.columns:\n", + " df1_unique = average_columns(df1_unique, 'cloud_cover_x', 'cloud_cover_y', 'cloud_cover')\n", + " df1_unique.drop(columns=['cloud_cover_x', 'cloud_cover_y'], inplace=True)\n", + " if 'geoaccuracy_x' in df1_unique.columns and 'geoaccuracy_y' in df1_unique.columns:\n", + " df1_unique = average_columns(df1_unique, 'geoaccuracy_x', 'geoaccuracy_y', 'geoaccuracy')\n", + " df1_unique.drop(columns=['geoaccuracy_x', 'geoaccuracy_y'], inplace=True)\n", + " df1_unique.drop(columns=['_merge'], inplace=True)\n", + " \n", + " # Concatenate 'df2' and the unique rows from 'df1'\n", + " result = pd.concat([df2, df1_unique], ignore_index=True)\n", + "\n", + " def assign_geometry(row):\n", + " if pd.isnull(row['geometry']):\n", + " if pd.notnull(row['geometry_x']):\n", + " return row['geometry_x']\n", + " elif pd.notnull(row['geometry_y']):\n", + " return row['geometry_y']\n", + " else:\n", + " return row['geometry']\n", + "\n", + " if 'geometry_x' in result.columns and 'geometry_y' in result.columns:\n", + " result['geometry'] = result.apply(assign_geometry, axis=1)\n", + " result.drop(columns=['geometry_x', 'geometry_y'], inplace=True)\n", + " return result\n", + "\n", + "def combine_geodataframes(gdf1, gdf2, join_columns, average_columns=None):\n", + " \"\"\"\n", + " Combines two GeoDataFrames, performing an outer join and averaging specified numerical columns.\n", + "\n", + " Parameters:\n", + " gdf1, gdf2 : GeoDataFrame\n", + " The GeoDataFrames to combine.\n", + " join_columns : list of str\n", + " The columns to join on.\n", + " average_columns : list of str, optional\n", + " The columns to average. If None, all numerical columns with the same name in both GeoDataFrames will be averaged.\n", + "\n", + " Returns:\n", + " GeoDataFrame\n", + " The combined GeoDataFrame.\n", + " \"\"\"\n", + " # Ensure that the 'geometry' column is present in both GeoDataFrames\n", + " if 'geometry' not in gdf1.columns or 'geometry' not in gdf2.columns:\n", + " raise ValueError(\"Both GeoDataFrames must have a 'geometry' column.\")\n", + "\n", + " # Combine GeoDataFrames using an outer join\n", + " combined_gdf = pd.merge(gdf1, gdf2, on=join_columns, how='outer', suffixes=('_gdf1', '_gdf2'))\n", + "\n", + " if average_columns is None:\n", + " # List of numerical columns to be averaged\n", + " average_columns = [\n", + " col for col in gdf1.columns\n", + " if col in gdf2.columns\n", + " and col not in join_columns + ['geometry']\n", + " and np.issubdtype(gdf1[col].dtype, np.number)\n", + " and np.issubdtype(gdf2[col].dtype, np.number)\n", + " ]\n", + "\n", + " # Average specified numerical columns\n", + " for col in average_columns:\n", + " if f'{col}_gdf1' in combined_gdf.columns and f'{col}_gdf2' in combined_gdf.columns:\n", + " combined_gdf[col] = combined_gdf[[f'{col}_gdf1', f'{col}_gdf2']].mean(axis=1)\n", + " combined_gdf.drop(columns=[f'{col}_gdf1', f'{col}_gdf2'], inplace=True)\n", + "\n", + " # Resolve geometry conflicts by prioritizing non-null values\n", + " combined_gdf['geometry'] = combined_gdf['geometry_gdf1'].combine_first(combined_gdf['geometry_gdf2'])\n", + " combined_gdf.drop(columns=['geometry_gdf1', 'geometry_gdf2'], inplace=True)\n", + "\n", + " return gpd.GeoDataFrame(combined_gdf, geometry='geometry')\n", + "\n", + "def mergeRightUnique(left_df: gpd.GeoDataFrame, right_df:gpd.GeoDataFrame, join_columns: Union[str, List[str]] = ['date', 'satname'], CRS:str='EPSG:4326') -> pd.DataFrame:\n", + " \"\"\"\n", + " Merges two GeoDataFrames, keeping only the unique rows from the right GeoDataFrame based on the specified join columns.\n", + "\n", + " Parameters:\n", + " left_df : GeoDataFrame\n", + " The left GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set.\n", + " right_df : GeoDataFrame\n", + " The right GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set.\n", + " join_columns : str or list of str, default ['date', 'satname']\n", + " The columns to join on. These columns are set as the index for both GeoDataFrames. If a string is passed, it is converted to a list.\n", + " CRS : str, default 'EPSG:4326'\n", + " The Coordinate Reference System to set for the GeoDataFrames if not already set.\n", + "\n", + " Returns:\n", + " GeoDataFrame\n", + " The merged GeoDataFrame, containing all rows from the left GeoDataFrame and only the unique rows from the right GeoDataFrame based on the join columns.\n", + " \"\"\"\n", + " if not left_df.crs:\n", + " left_df.set_crs(CRS, inplace=True)\n", + " if not right_df.crs:\n", + " right_df.set_crs(CRS, inplace=True)\n", + " \n", + " if isinstance(join_columns, str):\n", + " join_columns = [join_columns]\n", + " # Ensure that join are set as the index for both DataFrames\n", + " left_df.set_index(join_columns, inplace=True)\n", + " right_df.set_index(join_columns, inplace=True)\n", + "\n", + " # Find the difference in the MultiIndex between right_df and merged_gdf\n", + " unique_indices = right_df.index.difference(merged_gdf.index)\n", + "\n", + " # Select only those rows from right_df that have unique indices\n", + " unique_to_right_df = right_df.loc[unique_indices]\n", + " if unique_to_right_df.crs:\n", + " unique_to_right_df.crs = right_df.crs\n", + "\n", + " # Now concatenate the merged_gdf with the unique_to_right_df\n", + " combined_gdf = pd.concat([merged_gdf.reset_index(), unique_to_right_df.reset_index()], ignore_index=True)\n", + " return combined_gdf\n", + "\n", + "def merge_geometries(merged_gdf, columns=None, operation=unary_union):\n", + " \"\"\"\n", + " Performs a specified operation for the geometries with the same date and satname.\n", + "\n", + " Parameters:\n", + " merged_gdf : GeoDataFrame\n", + " The GeoDataFrame to perform the operation on.\n", + " columns : list of str, optional\n", + " The columns to perform the operation on. If None, all columns with 'geometry' in the name are used.\n", + " operation : function, optional\n", + " The operation to perform. If None, unary_union is used.\n", + "\n", + " Returns:\n", + " GeoDataFrame\n", + " The GeoDataFrame with the operation performed.\n", + " \"\"\"\n", + " if columns is None:\n", + " columns = [col for col in merged_gdf.columns if 'geometry' in col]\n", + " else:\n", + " columns = [col for col in columns if col in merged_gdf.columns]\n", + "\n", + " merged_gdf['geometry'] = merged_gdf[columns].apply(lambda row: operation(row.tolist()), axis=1)\n", + " for col in columns:\n", + " if col in merged_gdf.columns:\n", + " merged_gdf = merged_gdf.drop(columns=col)\n", + " return merged_gdf\n", + "\n", + "def merge_geojson_files(*file_paths:str, )->gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Merges any number of GeoJSON files into a single GeoDataFrame, removing any duplicate rows.\n", + "\n", + " Parameters:\n", + " - *file_paths (str): Paths to the GeoJSON files.\n", + "\n", + " Returns:\n", + " - GeoDataFrame: A GeoDataFrame containing the merged data from all input files, with duplicates removed.\n", + " \"\"\"\n", + " merged_gdf = gpd.GeoDataFrame()\n", + " for filepath in file_paths:\n", + " gdf = geodata_processing.read_gpd_file(filepath)\n", + " # Merging the two dataframes\n", + " merged_gdf = gpd.GeoDataFrame(pd.concat([merged_gdf, gdf], ignore_index=True))\n", + "\n", + " # Dropping any duplicated rows based on all columns\n", + " merged_gdf_cleaned = merged_gdf.drop_duplicates()\n", + " return merged_gdf_cleaned\n", + "\n", + "def create_csv_per_transect(\n", + " save_path: str,\n", + " cross_distance_transects: dict,\n", + " extracted_shorelines_dict: dict,\n", + " roi_id: str = None, # ROI ID is now optional and defaults to None\n", + " filename_suffix: str = \"_timeseries_raw.csv\",\n", + "):\n", + " for key, distances in cross_distance_transects.items():\n", + " # Initialize the dictionary for DataFrame with mandatory keys\n", + " data_dict = {\n", + " 'dates': extracted_shorelines_dict['dates'],\n", + " 'satname': extracted_shorelines_dict['satname'],\n", + " key: distances\n", + " }\n", + " \n", + " # Add roi_id to the dictionary if provided\n", + " if roi_id is not None:\n", + " data_dict['roi_id'] = [roi_id] * len(extracted_shorelines_dict['dates'])\n", + "\n", + " # Create a DataFrame directly with the data dictionary\n", + " df = pd.DataFrame(data_dict).set_index('dates')\n", + "\n", + " # Construct the full file path\n", + " csv_filename = f\"{key}{filename_suffix}\"\n", + " fn = os.path.join(save_path, csv_filename)\n", + "\n", + " # Save to CSV file, 'mode' set to 'w' for overwriting\n", + " try:\n", + " df.to_csv(fn, sep=\",\", mode='w')\n", + " print(f\"Time-series for transect {key} saved to {fn}\")\n", + " except Exception as e:\n", + " print(f\"Failed to save time-series for transect {key}: {e}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge all the config_gdf.geojson files together" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['C:\\\\development\\\\doodleverse\\\\coastseg\\\\CoastSeg\\\\sessions\\\\es1\\\\ID_13_datetime06-05-23__04_16_45\\\\config_gdf.geojson', 'C:\\\\development\\\\doodleverse\\\\coastseg\\\\CoastSeg\\\\sessions\\\\es1\\\\ID_12_datetime06-05-23__04_16_45\\\\config_gdf.geojson']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtypeslopegeometry
012roiNaNPOLYGON ((-117.46826 33.22493, -117.46847 33.2...
113roiNaNPOLYGON ((-117.46847 33.26526, -117.46869 33.3...
2hxj1shorelineNaNLINESTRING (-117.25543 32.90299, -117.25543 32...
3hxj2shorelineNaNLINESTRING (-117.25575 32.90568, -117.25575 32...
4hxj3shorelineNaNLINESTRING (-117.25804 32.91288, -117.25820 32...
...............
712gpv628transectNaNLINESTRING (-117.58446 33.38388, -117.58544 33...
713gpv629transectNaNLINESTRING (-117.58550 33.38406, -117.58649 33...
714gpv630transectNaNLINESTRING (-117.58670 33.38427, -117.58739 33...
715gpv631transectNaNLINESTRING (-117.58746 33.38427, -117.58873 33...
716NaNbboxNaNPOLYGON ((-117.61002 32.90299, -117.61002 33.3...
\n", + "

717 rows Ɨ 4 columns

\n", + "
" + ], + "text/plain": [ + " id type slope \\\n", + "0 12 roi NaN \n", + "1 13 roi NaN \n", + "2 hxj1 shoreline NaN \n", + "3 hxj2 shoreline NaN \n", + "4 hxj3 shoreline NaN \n", + ".. ... ... ... \n", + "712 gpv628 transect NaN \n", + "713 gpv629 transect NaN \n", + "714 gpv630 transect NaN \n", + "715 gpv631 transect NaN \n", + "716 NaN bbox NaN \n", + "\n", + " geometry \n", + "0 POLYGON ((-117.46826 33.22493, -117.46847 33.2... \n", + "1 POLYGON ((-117.46847 33.26526, -117.46869 33.3... \n", + "2 LINESTRING (-117.25543 32.90299, -117.25543 32... \n", + "3 LINESTRING (-117.25575 32.90568, -117.25575 32... \n", + "4 LINESTRING (-117.25804 32.91288, -117.25820 32... \n", + ".. ... \n", + "712 LINESTRING (-117.58446 33.38388, -117.58544 33... \n", + "713 LINESTRING (-117.58550 33.38406, -117.58649 33... \n", + "714 LINESTRING (-117.58670 33.38427, -117.58739 33... \n", + "715 LINESTRING (-117.58746 33.38427, -117.58873 33... \n", + "716 POLYGON ((-117.61002 32.90299, -117.61002 33.3... \n", + "\n", + "[717 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg import geodata_processing\n", + "import os\n", + "\n", + "# read all the config_gdfs from the session locations\n", + "filepaths = [os.path.join(session_location, 'config_gdf.geojson') for session_location in session_locations]\n", + "print(filepaths)\n", + "# merge all the config_gdfs into one\n", + "merged_config = merge_geojson_files(*filepaths)\n", + "# optionally save the merged config_gdf \n", + "merged_config.to_file(os.path.join(merged_session_location, 'config_gdf.geojson'), driver='GeoJSON')\n", + "merged_config" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The ROI Listed Below Will be Merged Together" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtypeslopegeometry
012roiNaNPOLYGON ((-117.46826 33.22493, -117.46847 33.2...
113roiNaNPOLYGON ((-117.46847 33.26526, -117.46869 33.3...
\n", + "
" + ], + "text/plain": [ + " id type slope geometry\n", + "0 12 roi NaN POLYGON ((-117.46826 33.22493, -117.46847 33.2...\n", + "1 13 roi NaN POLYGON ((-117.46847 33.26526, -117.46869 33.3..." + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "roi_rows = merged_config[merged_config['type'] == 'roi']\n", + "roi_rows" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Read 2 extracted shorelines GeoDataFrames\n", + "No overlapping ROIs found. Sessions can be merged.\n", + "Combined 53 rows from 2 GeoDataFrames\n", + "The following dataframe contains the combined extracted shorelines from all sessions.\n", + " Shorelines that were extracted on the same dates have been combined.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
satnamedatecloud_covergeoaccuracygeometry
0L82018-12-30 18:22:250.0000005.088MULTIPOINT (-117.46831 33.29341, -117.46822 33...
43L82019-02-16 18:22:170.0000005.802MULTIPOINT (-117.45881 33.28239, -117.45891 33...
44L82019-03-20 18:22:080.0000006.596MULTIPOINT (-117.45875 33.28242, -117.45889 33...
45L82019-06-08 18:22:200.2639674.826MULTIPOINT (-117.44480 33.26540, -117.44481 33...
1L82019-07-10 18:22:290.0038384.275MULTIPOINT (-117.46834 33.29332, -117.46831 33...
2L82019-07-26 18:22:330.0076324.286MULTIPOINT (-117.46209 33.28590, -117.46199 33...
3L82019-08-11 18:22:400.0000004.080MULTIPOINT (-117.46831 33.29335, -117.46828 33...
4L82019-08-27 18:22:440.0000004.208MULTIPOINT (-117.46832 33.29332, -117.46831 33...
5L82019-09-12 18:22:480.0000004.128MULTIPOINT (-117.46829 33.29345, -117.46817 33...
6L82019-10-14 18:22:560.0079244.002MULTIPOINT (-117.46154 33.28536, -117.46150 33...
7L82019-10-30 18:22:560.0148484.851MULTIPOINT (-117.46817 33.29345, -117.46815 33...
8L82019-11-15 18:22:530.0000004.661MULTIPOINT (-117.46824 33.29345, -117.46815 33...
9L82019-12-17 18:22:500.0035465.080MULTIPOINT (-117.46827 33.29345, -117.46815 33...
10L82020-01-02 18:22:450.0000005.318MULTIPOINT (-117.46829 33.29345, -117.46818 33...
11L82020-01-18 18:22:420.0000004.996MULTIPOINT (-117.46842 33.29331, -117.46833 33...
46L82020-03-22 18:22:200.0000005.147MULTIPOINT (-117.45891 33.28236, -117.45892 33...
47L82020-04-23 18:22:050.0149044.790MULTIPOINT (-117.43233 33.24960, -117.43234 33...
12L82020-06-10 18:22:090.0000004.624MULTIPOINT (-117.46831 33.29337, -117.46826 33...
13L82020-07-12 18:22:240.0000004.785MULTIPOINT (-117.46833 33.29332, -117.46831 33...
14L82020-08-29 18:22:410.0035024.401MULTIPOINT (-117.46829 33.29345, -117.46818 33...
15L82020-09-30 18:22:510.0000004.235MULTIPOINT (-117.46822 33.29345, -117.46815 33...
16L82020-12-03 18:22:540.0000004.724MULTIPOINT (-117.46824 33.29345, -117.46815 33...
17L82020-12-19 18:22:530.0000004.938MULTIPOINT (-117.46822 33.29345, -117.46815 33...
18L82021-01-04 18:22:480.0000004.964MULTIPOINT (-117.46824 33.29345, -117.46815 33...
19L82021-01-20 18:22:410.0000005.149MULTIPOINT (-117.46845 33.29331, -117.46836 33...
20L82021-02-05 18:22:400.0000004.352MULTIPOINT (-117.46831 33.29341, -117.46823 33...
21L82021-02-21 18:22:350.0000004.232MULTIPOINT (-117.46831 33.29343, -117.46820 33...
48L82021-03-09 18:22:270.0600215.644MULTIPOINT (-117.45881 33.28239, -117.45890 33...
22L82021-06-13 18:22:220.0100564.434MULTIPOINT (-117.45511 33.27808, -117.45503 33...
23L82021-10-19 18:22:590.0000004.356MULTIPOINT (-117.46817 33.29345, -117.46815 33...
24L92021-11-23 18:25:010.0010554.860MULTIPOINT (-117.46823 33.29345, -117.46815 33...
25L82021-12-22 18:22:510.1062715.356MULTIPOINT (-117.45486 33.27788, -117.45479 33...
26L82022-01-23 18:22:430.0000005.018MULTIPOINT (-117.46831 33.29334, -117.46829 33...
27L92022-02-16 18:22:440.0000004.984MULTIPOINT (-117.46811 33.29345, -117.46803 33...
28L82022-02-24 18:22:340.0000004.809MULTIPOINT (-117.46831 33.29332, -117.46831 33...
50L92022-03-04 18:22:340.3482536.277MULTIPOINT (-117.46464 33.28832, -117.46458 33...
29L82022-03-12 18:22:300.0000004.449MULTIPOINT (-117.46841 33.29332, -117.46831 33...
30L82022-04-13 18:22:230.0000004.154MULTIPOINT (-117.46829 33.29345, -117.46817 33...
31L92022-04-21 18:22:180.0351513.886MULTIPOINT (-117.44798 33.26931, -117.44789 33...
32L92022-06-24 18:22:260.0000004.793MULTIPOINT (-117.46831 33.29341, -117.46824 33...
33L82022-07-02 18:22:470.0000004.817MULTIPOINT (-117.46823 33.29345, -117.46815 33...
34L92022-08-11 18:22:430.0000004.334MULTIPOINT (-117.46812 33.29345, -117.46804 33...
35L82022-08-19 18:23:050.0194614.262MULTIPOINT (-117.45778 33.28118, -117.45777 33...
36L92022-08-27 18:22:470.0030084.178MULTIPOINT (-117.46284 33.28711, -117.46280 33...
37L82022-09-20 18:23:120.0040183.943MULTIPOINT (-117.46106 33.28482, -117.46102 33...
49L82022-10-22 18:23:080.2482555.138MULTIPOINT (-117.45891 33.28233, -117.45894 33...
51L92022-10-30 18:22:590.0201574.587MULTIPOINT (-117.44076 33.26042, -117.44082 33...
38L92022-11-15 18:23:010.0000004.828MULTIPOINT (-117.46825 33.29345, -117.46815 33...
39L82022-11-23 18:23:050.0000004.894MULTIPOINT (-117.46810 33.29345, -117.46802 33...
40L82022-12-09 18:23:050.0000005.371MULTIPOINT (-117.46810 33.29345, -117.46805 33...
52L92023-01-02 18:23:000.0598425.832MULTIPOINT (-117.45891 33.28230, -117.45897 33...
41L92023-01-18 18:22:520.1497055.798MULTIPOINT (-117.46223 33.28630, -117.46215 33...
42L92023-02-19 18:22:550.0000005.287MULTIPOINT (-117.46813 33.29345, -117.46805 33...
\n", + "
" + ], + "text/plain": [ + " satname date cloud_cover geoaccuracy \\\n", + "0 L8 2018-12-30 18:22:25 0.000000 5.088 \n", + "43 L8 2019-02-16 18:22:17 0.000000 5.802 \n", + "44 L8 2019-03-20 18:22:08 0.000000 6.596 \n", + "45 L8 2019-06-08 18:22:20 0.263967 4.826 \n", + "1 L8 2019-07-10 18:22:29 0.003838 4.275 \n", + "2 L8 2019-07-26 18:22:33 0.007632 4.286 \n", + "3 L8 2019-08-11 18:22:40 0.000000 4.080 \n", + "4 L8 2019-08-27 18:22:44 0.000000 4.208 \n", + "5 L8 2019-09-12 18:22:48 0.000000 4.128 \n", + "6 L8 2019-10-14 18:22:56 0.007924 4.002 \n", + "7 L8 2019-10-30 18:22:56 0.014848 4.851 \n", + "8 L8 2019-11-15 18:22:53 0.000000 4.661 \n", + "9 L8 2019-12-17 18:22:50 0.003546 5.080 \n", + "10 L8 2020-01-02 18:22:45 0.000000 5.318 \n", + "11 L8 2020-01-18 18:22:42 0.000000 4.996 \n", + "46 L8 2020-03-22 18:22:20 0.000000 5.147 \n", + "47 L8 2020-04-23 18:22:05 0.014904 4.790 \n", + "12 L8 2020-06-10 18:22:09 0.000000 4.624 \n", + "13 L8 2020-07-12 18:22:24 0.000000 4.785 \n", + "14 L8 2020-08-29 18:22:41 0.003502 4.401 \n", + "15 L8 2020-09-30 18:22:51 0.000000 4.235 \n", + "16 L8 2020-12-03 18:22:54 0.000000 4.724 \n", + "17 L8 2020-12-19 18:22:53 0.000000 4.938 \n", + "18 L8 2021-01-04 18:22:48 0.000000 4.964 \n", + "19 L8 2021-01-20 18:22:41 0.000000 5.149 \n", + "20 L8 2021-02-05 18:22:40 0.000000 4.352 \n", + "21 L8 2021-02-21 18:22:35 0.000000 4.232 \n", + "48 L8 2021-03-09 18:22:27 0.060021 5.644 \n", + "22 L8 2021-06-13 18:22:22 0.010056 4.434 \n", + "23 L8 2021-10-19 18:22:59 0.000000 4.356 \n", + "24 L9 2021-11-23 18:25:01 0.001055 4.860 \n", + "25 L8 2021-12-22 18:22:51 0.106271 5.356 \n", + "26 L8 2022-01-23 18:22:43 0.000000 5.018 \n", + "27 L9 2022-02-16 18:22:44 0.000000 4.984 \n", + "28 L8 2022-02-24 18:22:34 0.000000 4.809 \n", + "50 L9 2022-03-04 18:22:34 0.348253 6.277 \n", + "29 L8 2022-03-12 18:22:30 0.000000 4.449 \n", + "30 L8 2022-04-13 18:22:23 0.000000 4.154 \n", + "31 L9 2022-04-21 18:22:18 0.035151 3.886 \n", + "32 L9 2022-06-24 18:22:26 0.000000 4.793 \n", + "33 L8 2022-07-02 18:22:47 0.000000 4.817 \n", + "34 L9 2022-08-11 18:22:43 0.000000 4.334 \n", + "35 L8 2022-08-19 18:23:05 0.019461 4.262 \n", + "36 L9 2022-08-27 18:22:47 0.003008 4.178 \n", + "37 L8 2022-09-20 18:23:12 0.004018 3.943 \n", + "49 L8 2022-10-22 18:23:08 0.248255 5.138 \n", + "51 L9 2022-10-30 18:22:59 0.020157 4.587 \n", + "38 L9 2022-11-15 18:23:01 0.000000 4.828 \n", + "39 L8 2022-11-23 18:23:05 0.000000 4.894 \n", + "40 L8 2022-12-09 18:23:05 0.000000 5.371 \n", + "52 L9 2023-01-02 18:23:00 0.059842 5.832 \n", + "41 L9 2023-01-18 18:22:52 0.149705 5.798 \n", + "42 L9 2023-02-19 18:22:55 0.000000 5.287 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.46831 33.29341, -117.46822 33... \n", + "43 MULTIPOINT (-117.45881 33.28239, -117.45891 33... \n", + "44 MULTIPOINT (-117.45875 33.28242, -117.45889 33... \n", + "45 MULTIPOINT (-117.44480 33.26540, -117.44481 33... \n", + "1 MULTIPOINT (-117.46834 33.29332, -117.46831 33... \n", + "2 MULTIPOINT (-117.46209 33.28590, -117.46199 33... \n", + "3 MULTIPOINT (-117.46831 33.29335, -117.46828 33... \n", + "4 MULTIPOINT (-117.46832 33.29332, -117.46831 33... \n", + "5 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", + "6 MULTIPOINT (-117.46154 33.28536, -117.46150 33... \n", + "7 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", + "8 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "9 MULTIPOINT (-117.46827 33.29345, -117.46815 33... \n", + "10 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", + "11 MULTIPOINT (-117.46842 33.29331, -117.46833 33... \n", + "46 MULTIPOINT (-117.45891 33.28236, -117.45892 33... \n", + "47 MULTIPOINT (-117.43233 33.24960, -117.43234 33... \n", + "12 MULTIPOINT (-117.46831 33.29337, -117.46826 33... \n", + "13 MULTIPOINT (-117.46833 33.29332, -117.46831 33... \n", + "14 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", + "15 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", + "16 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "17 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", + "18 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "19 MULTIPOINT (-117.46845 33.29331, -117.46836 33... \n", + "20 MULTIPOINT (-117.46831 33.29341, -117.46823 33... \n", + "21 MULTIPOINT (-117.46831 33.29343, -117.46820 33... \n", + "48 MULTIPOINT (-117.45881 33.28239, -117.45890 33... \n", + "22 MULTIPOINT (-117.45511 33.27808, -117.45503 33... \n", + "23 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", + "24 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", + "25 MULTIPOINT (-117.45486 33.27788, -117.45479 33... \n", + "26 MULTIPOINT (-117.46831 33.29334, -117.46829 33... \n", + "27 MULTIPOINT (-117.46811 33.29345, -117.46803 33... \n", + "28 MULTIPOINT (-117.46831 33.29332, -117.46831 33... \n", + "50 MULTIPOINT (-117.46464 33.28832, -117.46458 33... \n", + "29 MULTIPOINT (-117.46841 33.29332, -117.46831 33... \n", + "30 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", + "31 MULTIPOINT (-117.44798 33.26931, -117.44789 33... \n", + "32 MULTIPOINT (-117.46831 33.29341, -117.46824 33... \n", + "33 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", + "34 MULTIPOINT (-117.46812 33.29345, -117.46804 33... \n", + "35 MULTIPOINT (-117.45778 33.28118, -117.45777 33... \n", + "36 MULTIPOINT (-117.46284 33.28711, -117.46280 33... \n", + "37 MULTIPOINT (-117.46106 33.28482, -117.46102 33... \n", + "49 MULTIPOINT (-117.45891 33.28233, -117.45894 33... \n", + "51 MULTIPOINT (-117.44076 33.26042, -117.44082 33... \n", + "38 MULTIPOINT (-117.46825 33.29345, -117.46815 33... \n", + "39 MULTIPOINT (-117.46810 33.29345, -117.46802 33... \n", + "40 MULTIPOINT (-117.46810 33.29345, -117.46805 33... \n", + "52 MULTIPOINT (-117.45891 33.28230, -117.45897 33... \n", + "41 MULTIPOINT (-117.46223 33.28630, -117.46215 33... \n", + "42 MULTIPOINT (-117.46813 33.29345, -117.46805 33... " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.merge_utils import calculate_overlap\n", + "\n", + "result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", + "combined_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", + "# calculate the overlapping regions between the ROIs\n", + "overlap_gdf=calculate_overlap(roi_rows)\n", + "\n", + "# read all the extracted shorelines from the session locations\n", + "gdfs = []\n", + "for session_dir in session_locations:\n", + " # attempt to read the extracted shoreline files\n", + " es_gdf = read_first_geojson_file(session_dir,['extracted_shorelines_points.geojson', 'extracted_shorelines.geojson'])\n", + " es_gdf = convert_lines_to_multipoints(es_gdf)\n", + " es_gdf = es_gdf.to_crs('epsg:4326')\n", + " gdfs.append(es_gdf)\n", + "print(f\"Read {len(gdfs)} extracted shorelines GeoDataFrames\")\n", + "\n", + "# clip the extracted shorelines to the overlapping regions\n", + "clipped_shorelines_gdfs=clip_gdfs(gdfs, overlap_gdf)\n", + "\n", + "# sometimes there are not shorelines in the overlapping regions\n", + "if overlap_gdf.empty or len(clipped_shorelines_gdfs) == 0:\n", + " print(\"No overlapping ROIs found. Sessions can be merged.\")\n", + " # merge the geodataframes on date and satname and average the cloud_cover and geoaccuracy for the merged rows\n", + " aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + " merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "\n", + " # merge the geometries with the same date and satname\n", + " geometry_columns = [col for col in merged_gdf.columns if 'geometry' in col]\n", + " merged_gdf = merge_geometries(merged_gdf, columns=geometry_columns, operation=unary_union)\n", + " # combine all the extracted shorelines gdfs into a single gdf\n", + " result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", + " result_gdf.set_crs(\"EPSG:4326\", inplace=True)\n", + "\n", + " for gdf in gdfs:\n", + " if not gdf.crs:\n", + " gdf.set_crs(\"EPSG:4326\", inplace=True)\n", + " result_gdf = pd.concat([gdf, result_gdf], ignore_index=True)\n", + "\n", + " # combine the merged shorelines with all the extracted shorelines whose date and satname weren't in the merged shorelines\n", + " combined_gdf = mergeRightUnique(merged_gdf, result_gdf, ['satname', 'date'],CRS=\"EPSG:4326\").sort_values(by='date')\n", + "\n", + "else:\n", + " print(\"Overlapping ROIs found. This script does not support overlapping ROIs. Any shorelines in overlapping regions will be combined with a union resulting in multiple shorelines on the same date.\")\n", + " # merge the geodataframes on date and satname and average the cloud_cover and geoaccuracy for the merged rows\n", + " aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + " merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "\n", + " # merge the geometries with the same date and satname\n", + " geometry_columns = [col for col in merged_gdf.columns if 'geometry' in col]\n", + " merged_gdf = merge_geometries(merged_gdf, columns=geometry_columns, operation=unary_union)\n", + " # combine all the extracted shorelines gdfs into a single gdf\n", + " result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", + " result_gdf.set_crs(\"EPSG:4326\", inplace=True)\n", + "\n", + " for gdf in gdfs:\n", + " if not gdf.crs:\n", + " gdf.set_crs(\"EPSG:4326\", inplace=True)\n", + " result_gdf = pd.concat([gdf, result_gdf], ignore_index=True)\n", + "\n", + " # combine the merged shorelines with all the extracted shorelines whose date and satname weren't in the merged shorelines\n", + " combined_gdf = mergeRightUnique(merged_gdf, result_gdf, ['satname', 'date'],CRS=\"EPSG:4326\").sort_values(by='date')\n", + "\n", + "print(f\"Combined {len(combined_gdf)} rows from {len(gdfs)} GeoDataFrames\")\n", + "print(f\"The following dataframe contains the combined extracted shorelines from all sessions.\\n Shorelines that were extracted on the same dates have been combined.\")\n", + "combined_gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamegeoaccuracycloud_covergeometry
02018-12-30 18:22:25L85.0880.000000MULTIPOINT (-117.45892 33.28226, -117.45899 33...
12019-02-16 18:22:17L85.8020.000000MULTIPOINT (-117.45881 33.28239, -117.45891 33...
22019-03-20 18:22:08L86.5960.000000MULTIPOINT (-117.45875 33.28242, -117.45889 33...
32019-06-08 18:22:20L84.8260.263967MULTIPOINT (-117.44480 33.26540, -117.44481 33...
42019-07-10 18:22:29L84.2750.000000MULTIPOINT (-117.45899 33.28226, -117.45907 33...
52019-07-26 18:22:33L84.2860.015263MULTIPOINT (-117.45896 33.28226, -117.45904 33...
62019-08-11 18:22:40L84.0800.000000MULTIPOINT (-117.45896 33.28226, -117.45906 33...
72019-08-27 18:22:44L84.2080.000000MULTIPOINT (-117.45894 33.28226, -117.45902 33...
82019-09-12 18:22:48L84.1280.000000MULTIPOINT (-117.45891 33.28232, -117.45894 33...
92019-10-14 18:22:56L84.0020.015847MULTIPOINT (-117.45891 33.28235, -117.45892 33...
\n", + "
" + ], + "text/plain": [ + " date satname geoaccuracy cloud_cover \\\n", + "0 2018-12-30 18:22:25 L8 5.088 0.000000 \n", + "1 2019-02-16 18:22:17 L8 5.802 0.000000 \n", + "2 2019-03-20 18:22:08 L8 6.596 0.000000 \n", + "3 2019-06-08 18:22:20 L8 4.826 0.263967 \n", + "4 2019-07-10 18:22:29 L8 4.275 0.000000 \n", + "5 2019-07-26 18:22:33 L8 4.286 0.015263 \n", + "6 2019-08-11 18:22:40 L8 4.080 0.000000 \n", + "7 2019-08-27 18:22:44 L8 4.208 0.000000 \n", + "8 2019-09-12 18:22:48 L8 4.128 0.000000 \n", + "9 2019-10-14 18:22:56 L8 4.002 0.015847 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.45892 33.28226, -117.45899 33... \n", + "1 MULTIPOINT (-117.45881 33.28239, -117.45891 33... \n", + "2 MULTIPOINT (-117.45875 33.28242, -117.45889 33... \n", + "3 MULTIPOINT (-117.44480 33.26540, -117.44481 33... \n", + "4 MULTIPOINT (-117.45899 33.28226, -117.45907 33... \n", + "5 MULTIPOINT (-117.45896 33.28226, -117.45904 33... \n", + "6 MULTIPOINT (-117.45896 33.28226, -117.45906 33... \n", + "7 MULTIPOINT (-117.45894 33.28226, -117.45902 33... \n", + "8 MULTIPOINT (-117.45891 33.28232, -117.45894 33... \n", + "9 MULTIPOINT (-117.45891 33.28235, -117.45892 33... " + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdfs[0].head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2018-12-30 18:22:25')" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdfs[0]['date'].iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "geopandas.geodataframe.GeoDataFrame" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(combined_gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2018-12-30 18:22:25')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_gdf['date'].iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2018-12-30 18:22:25')" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp('2018-12-30 18:22:25')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MULTIPOINT (-117.46831121455644 33.2934149376039, -117.46822498182809 33.293315724378125, -117.46814926544755 33.29325699482918, -117.46807955821971 33.29318097466683, -117.46801280310385 33.2930459285698, -117.46798680353123 33.29300289085092, -117.46790343007167 33.29291104288513, -117.46782472625975 33.292820680670765, -117.46778613576278 33.29277618691379, -117.46768262076851 33.29264127897712, -117.46766243744518 33.29259869230763, -117.46758720611838 33.292506340475185, -117.4675004106242 33.292425673843596, -117.46744942719033 33.29237156123675, -117.46736400566458 33.29223658496944, -117.46733810514881 33.29220026735731, -117.46723544457923 33.29210177082155, -117.467176112955 33.29203348666226, -117.46712530877306 33.29196688728733, -117.46703189065445 33.291831940832715, -117.4670137447137 33.29179600407632, -117.46692534663975 33.291697043615464, -117.46685182614861 33.2916427912913, -117.46677443328466 33.291562312942304, -117.4666896024096 33.291432148797306, -117.46668470465809 33.29142735235697, -117.46652784179372 33.29130838157182, -117.46651307746635 33.29129269913169, -117.46641335877024 33.291157775837995, -117.46636560154232 33.29109437123302, -117.4662916847211 33.29102293483063, -117.46620374453326 33.29095227476743, -117.46613536476502 33.29088822363979, -117.46604172405897 33.29077933098838, -117.4660152761251 33.290753376406116, -117.46590928640816 33.29061847620187, -117.46587966197166 33.29059843607494, -117.46577474410209 33.29048368290575, -117.46572423610404 33.29034857460673, -117.46571716596593 33.29033573503742, -117.46559762098215 33.29021375140583, -117.46555517680602 33.29016829053979, -117.46548698108926 33.29007886824287, -117.465393883576 33.28994391929292, -117.46539287615757 33.28994205347099, -117.46527099249445 33.28980908180087, -117.46523093054836 33.28978256058315, -117.46513286551578 33.289674301200044, -117.46507000614841 33.28953923883162, -117.46506852993545 33.289537185490175, -117.46491710088182 33.28940451327704, -117.46490666638637 33.28939291340652, -117.46482306575831 33.289269567384125, -117.46474436311019 33.2891655905531, -117.46470981028915 33.28913469327859, -117.4645825468837 33.28903001754704, -117.46455426658065 33.28899997712723, -117.46444178219758 33.288865099885875, -117.46442031520282 33.28881592290719, -117.46434636365208 33.28873015878346, -117.46425842583936 33.28866632140892, -117.46419031802112 33.28859544403665, -117.46412347365437 33.28846039603745, -117.46409600256878 33.288415755572245, -117.46400936483936 33.28832552445204, -117.46393394942572 33.2882349583493, -117.46389927070148 33.288190637773525, -117.46379400455017 33.28805573297789, -117.46377169768692 33.28801649086976, -117.46367543691463 33.28792087770952, -117.46360987236203 33.28787850919393, -117.46352518196082 33.287786140496095, -117.46345582741644 33.287651101473095, -117.46344751110006 33.28763904313794, -117.46332168605775 33.28751630392105, -117.46328558275304 33.28748133567702, -117.46320257548352 33.28738145020655, -117.46312334731475 33.287265380902255, -117.46310545085117 33.287246514459774, -117.46297818360553 33.28711169091077, -117.46296139091666 33.28710211893793, -117.46283969829139 33.286976908999286, -117.46279912405362 33.28687992489835, -117.46276561194975 33.286841887184686, -117.46263731299813 33.28674395731903, -117.46260210869417 33.286707198139226, -117.46252690567738 33.2865721803295, -117.4624748551592 33.28648527414788, -117.4624270314923 33.28643725425684, -117.46231296026289 33.286333171545174, -117.46228627029186 33.28630248017165, -117.46218023617487 33.286167576801084, -117.46215057156903 33.28608727839941, -117.46210880701274 33.28603254468886, -117.46198863599882 33.28592720903182, -117.46196123464757 33.28589779554292, -117.46187784754584 33.28576280773069, -117.46182622685723 33.28567711617403, -117.46178327590025 33.28562786141026, -117.46167395103295 33.285492969810676, -117.46166412855979 33.28548588324747, -117.46155820171788 33.285358101966104, -117.46150170255015 33.28523225198407, -117.46149399573638 33.2852230426534, -117.461339884642 33.28509401784753, -117.46133433021349 33.28508833763221, -117.46123263071759 33.28495341732479, -117.46117770046561 33.28488604796699, -117.46110313569656 33.284818600041994, -117.46101580090819 33.284732064902435, -117.46097554415263 33.284683775560325, -117.46086944897027 33.28454887124338, -117.46085356466622 33.28451391358177, -117.46077170532998 33.28441393587353, -117.4606916454136 33.28435594173543, -117.46062310394586 33.28427918889476, -117.4605523780269 33.284144153221824, -117.46052927852692 33.284112627516016, -117.46040439549846 33.2840094036789, -117.46036748243732 33.2839778611397, -117.46028558541376 33.28387454593455, -117.46020517126063 33.28374485521569, -117.46020073047886 33.283739562366456, -117.46006829229559 33.28360475485887, -117.46004327856686 33.283591450207936, -117.45991894193682 33.28347000978074, -117.45988113015204 33.28338917676107, -117.45982813411369 33.28333504797006, -117.45971939822621 33.283266203166654, -117.45965115238528 33.28320040477151, -117.45957932724538 33.283065372574576, -117.4595570862332 33.28303245817667, -117.45944818820182 33.282930559578816, -117.45939517965205 33.28287593300715, -117.45932860333335 33.28279570374569, -117.45923370470418 33.282660756594396, -117.45923295728886 33.28265899811042, -117.45912778100515 33.28252585008831, -117.45907094437955 33.28248191887801, -117.4589884673966 33.28239106678126, -117.45892076566936 33.28225601895262, -117.45866692721654 33.281986360231116, -117.45858451376476 33.28187498773086, -117.45856186581786 33.281851450038225, -117.45843829852141 33.28171660800219, -117.45842255071346 33.281706915942515, -117.45829670393123 33.28158183230292, -117.45826048069688 33.281518255463574, -117.45818946961879 33.28144692979241, -117.45809869843228 33.281384546294824, -117.45801944652685 33.28131225854953, -117.45793656192932 33.28118290488124, -117.45793084752604 33.28117728714225, -117.45777482855564 33.28105834804756, -117.45775974516685 33.28104261950965, -117.45766668785994 33.280907664322335, -117.4576125734626 33.28083372041895, -117.45754923459039 33.28077279885874, -117.45745063366127 33.28066937706047, -117.45742345807327 33.280637963907004, -117.45731285539858 33.280503073002166, -117.45728843466166 33.280455207954454, -117.45721160914023 33.28036814758175, -117.45712651913594 33.28029527009966, -117.45707288330351 33.28023335988805, -117.45698992408873 33.2800983670374, -117.45696420801355 33.28005929625077, -117.45688090659233 33.27996346990251, -117.45680221909696 33.279885013234825, -117.45674098906514 33.27982868620389, -117.45664024439452 33.279713335954426, -117.45662092838424 33.27969382940586, -117.4564927961859 33.279559002127456, -117.45647831409595 33.279550067495755, -117.45635300957824 33.27942421749655, -117.4563162111941 33.27935350902325, -117.45626250103008 33.279289251847416, -117.45615419885665 33.27917422254163, -117.45613403732766 33.27915442540161, -117.45602218862162 33.27901953786946, -117.45599211561266 33.27898117021798, -117.45589371164407 33.278884711214424, -117.45583017931382 33.278816252917146, -117.45577047590061 33.278749865205796, -117.45566875003303 33.27861494022771, -117.45566804733005 33.278613553486835, -117.45553923450684 33.278480116998786, -117.45550617001066 33.27845974327469, -117.45539562345348 33.278345345266814, -117.45534398064511 33.27824571868922, -117.45531070708822 33.278210358402866, -117.45518218812872 33.27810800740683, -117.45514930005254 33.278075651554666, -117.45505568125395 33.2779406963693, -117.45501997823801 33.27788974726502, -117.45493393039477 33.27780584406275, -117.45485800030252 33.277716057172235, -117.45482126533824 33.27767095839095, -117.45471586147882 33.277536046052845, -117.45469576099683 33.277491825840556, -117.45461017774399 33.27740113464435, -117.45453394279728 33.277348692176886, -117.45445487562581 33.277266404539255, -117.45438431955003 33.27713136450997, -117.45437163961692 33.277111838014086, -117.45425837659059 33.27699652683613, -117.4542096999223 33.27694500457471, -117.45414028140613 33.27686166036139, -117.45404796440023 33.2767266996214, -117.45404748872271 33.27672560344387, -117.45393144975053 33.276591827166435, -117.45388556121314 33.2765608665874, -117.45379036792646 33.276457044276796, -117.45372882628371 33.27632197094446, -117.45372320490297 33.27631311731183, -117.4535963273053 33.27618715649954, -117.45356129221993 33.27615098139777, -117.45347501482865 33.2760523011039, -117.45339924396933 33.275962502650145, -117.45334619883357 33.2759174729482, -117.45323742187068 33.27581765524738, -117.45320359501038 33.275782694918625, -117.45308139441102 33.275647842362126, -117.45307539252647 33.27563258166468, -117.45298733739205 33.2755128871356, -117.45291336267199 33.27544727619215, -117.45285882672695 33.275378057343175, -117.45279176356705 33.2752430036261, -117.4527508239725 33.275163253562646, -117.45270566670233 33.27510801918101, -117.45259764621862 33.27497311448898, -117.45258873451107 33.27496609286394, -117.4524821301133 33.274838236980266, -117.45242648134985 33.27473705154303, -117.4523956419228 33.27470325373176, -117.45226471441165 33.274602149554134, -117.45223352258571 33.27456854556084, -117.45214938406205 33.27443355358477, -117.45210236168957 33.274353494504375, -117.45205601281518 33.27429859511995, -117.45194033251272 33.2741674582838, -117.45193677714178 33.27416373060656, -117.45181612553367 33.27402887112, -117.45177815735548 33.2739529434, -117.45172553311312 33.273893902294674, -117.4516163074086 33.273801441726604, -117.45157528740779 33.273759150092104, -117.45149108568052 33.2736241578739, -117.45145402475423 33.27356575214782, -117.45138259281659 33.2734892537916, -117.45129205735316 33.27339117099812, -117.4512577819791 33.273354408840916, -117.45114747894795 33.27321951111464, -117.45112989495594 33.273178546621956, -117.45105186474127 33.273084559994935, -117.45096801222611 33.27302017015629, -117.45090129335159 33.272949808130214, -117.450829430553 33.27281477068578, -117.45080570440963 33.2727789601328, -117.45070523005556 33.27267992294378, -117.45064381629483 33.2726192767668, -117.45058305002804 33.27254506775028, -117.45048977967316 33.27241010768122, -117.4504815813983 33.27239194691438, -117.45038159784345 33.27227520155736, -117.45031969948035 33.272233215578865, -117.4502322382671 33.27214044446319, -117.45016663489318 33.272005383941895, -117.45015739724332 33.27199245774863, -117.450013094939 33.271870641711324, -117.44999562236262 33.27185433430863, -117.44990457057291 33.2717357363938, -117.44983343144214 33.271634973804666, -117.44980002835095 33.27160081657007, -117.44968477925836 33.271465935380846, -117.44967145233736 33.27145677834291, -117.44956275787831 33.27133107856882, -117.44951047721564 33.2711959695006, -117.44950903667922 33.271193252896005, -117.44939456546997 33.27106109042472, -117.44934703077857 33.27100953638161, -117.44927999644341 33.270926206384, -117.44919684469892 33.27079120872614, -117.44918470101308 33.27076243214613, -117.4490971482719 33.270656270772314, -117.44902279321965 33.270597594464554, -117.44895006575831 33.27052150385475, -117.44887057873379 33.27038649273226, -117.44886057960522 33.270372862991216, -117.44873203189813 33.27025169472995, -117.44869871135995 33.2702154972498, -117.44862002018358 33.27011680082358, -117.44853653643213 33.26999803054981, -117.44852145337532 33.269981858309514, -117.44840434876176 33.269846982572425, -117.44837460379848 33.26982781574058, -117.44826735898414 33.26971217841742, -117.44821227833332 33.26958058296473, -117.44820949839955 33.26957708887091, -117.44807186310362 33.269442286818276, -117.44805039167734 33.269419095922935, -117.4479703140987 33.26930735459242, -117.44788984649676 33.26917234633716, -117.44788805007805 33.26916837338581, -117.44777363256016 33.26903746677006, -117.44772614993933 33.269003977014265, -117.44763515933292 33.268902667232794, -117.44756909011448 33.26876760691706, -117.44756384591977 33.268760286936065, -117.44741654292832 33.26863285778955, -117.44740207157238 33.26862029099199, -117.44730442785178 33.26849796301519, -117.44723993279908 33.26840870097797, -117.44719518708116 33.268363057796954, -117.44707796992166 33.26823147638844, -117.44707487569586 33.268228192286436, -117.44695283852772 33.26809333285869, -117.44691578352726 33.268010241044095, -117.44687372581339 33.26795831903608, -117.44675389775314 33.26784788488595, -117.44673150330759 33.267823531929686, -117.4466480158126 33.26768853367338, -117.4465915541152 33.267595454375055, -117.44655421564318 33.26755357239607, -117.44643836376814 33.267418690225334, -117.44642956388695 33.26741229897047, -117.44631758254611 33.2672838256368, -117.44626733845605 33.26718276550611, -117.44623901608787 33.26714880942376, -117.4461054317204 33.26701574767321, -117.44610382527698 33.26701399633387, -117.44600085708194 33.2668790675108, -117.44594323566415 33.266791681019896, -117.44589808353817 33.26674413789998, -117.44578135060179 33.26662866455503, -117.4457624763882 33.26660932591776, -117.4456431587783 33.266474455398985, -117.4456192416658 33.266421446903145, -117.4455496570127 33.266339492254374, -117.4454574052931 33.26626776714814, -117.44539843973924 33.26620473579754, -117.44532913310276 33.26606968580391, -117.44529504340235 33.266010420270845, -117.44522493357385 33.26593476071701, -117.44513304215249 33.26582397630765, -117.44511335065934 33.2657998619703, -117.4450056586546 33.26566494919331, -117.44497075240258 33.26558050361069, -117.44493072214634 33.26552991911025, -117.44485591778191 33.26539488850522, -117.4448088467123 33.26541261693488, -117.44480848687776 33.26534163092265, -117.44479312129943 33.2653951131627, -117.44475347483217 33.26525995671662, -117.44466397677263 33.2651249785458, -117.44464614645831 33.26508780230751, -117.44456506720915 33.264990033956956, -117.44448425935248 33.264923280852315, -117.44442684272254 33.26485522981544, -117.44435214486771 33.26472019850557, -117.44432196751438 33.26467870646365, -117.44422029726995 33.26458537133573, -117.44416012599659 33.26452292132679, -117.4441056162976 33.26445048270809, -117.4440072593679 33.264315535679344, -117.44399791517036 33.26429403135382, -117.44391249647906 33.264180575737235, -117.44383591794087 33.264107204476225, -117.44378634830436 33.264045727740594, -117.4437054518774 33.263910718140004, -117.44367365408404 33.263867504320025, -117.44356992520963 33.26377590338103, -117.44351182113355 33.26371288830683, -117.44346142095903 33.26364099209715, -117.44336627304662 33.263506033089854, -117.44334957047234 33.26347547852982, -117.44324429817534 33.263371169639214, -117.44318782362924 33.2633376554335, -117.44310043823604 33.26323638405503, -117.44304320571592 33.263101289667496, -117.44302544174968 33.263073940328525, -117.44291064525203 33.26296646359275, -117.44286362756345 33.26292252412362, -117.44279397152478 33.26283158079888, -117.44270144980844 33.262698925563946, -117.4426991573193 33.262696620066166, -117.44259788098242 33.262561682258884, -117.44253949841614 33.26252005146416, -117.44245869202939 33.26242687927731, -117.4423933819945 33.26229181330833, -117.44237718629479 33.262269483830245, -117.44223623641558 33.26215707395356, -117.44221547357958 33.262137686607645, -117.44213345895932 33.26202214108878, -117.44205323543501 33.26190147950532, -117.44203906827627 33.2618871783203, -117.44193112058437 33.26175226365934, -117.4418912820578 33.26172165380483, -117.44180307614077 33.26161742030647, -117.44174653932845 33.261482322800674, -117.44172886701556 33.26144997257925, -117.44164304135721 33.26134739206831, -117.44156685337262 33.2612578695277, -117.4415334549757 33.26121248286457, -117.44143800954528 33.26107752335904, -117.44140459025073 33.26101602125725, -117.44133658779464 33.260942584987575, -117.44124283010174 33.26087403664673, -117.4411833425762 33.2608078304104, -117.4411137766187 33.260672778819234, -117.44108051773526 33.260622068996035, -117.44099728629075 33.26053789361171, -117.44091862014834 33.2604524772666, -117.44088072172637 33.260403008553645, -117.44078273657098 33.26026805752339, -117.44075638339994 33.26021522434624, -117.44067952250904 33.26013312494258, -117.44059465898462 33.2600798530704, -117.44051953679467 33.25999839340595, -117.44046558287306 33.25986328611813, -117.4404322367806 33.25980532958586, -117.4403596689956 33.259728362821726, -117.4402703314701 33.25963364547732, -117.44023949890888 33.259593489903615, -117.44014101510055 33.259458540106635, -117.44010806448468 33.25938970703387, -117.44005533522193 33.259323544916555, -117.43994620930853 33.259227743969106, -117.43991242870382 33.259188752127734, -117.43983343540533 33.25905373312071, -117.43978392555195 33.25898012673304, -117.43972405066644 33.25891882153279, -117.43962194704555 33.2587932691362, -117.43961463996773 33.25878390993591, -117.43950240321242 33.25864900822665, -117.43945976514787 33.25856565040418, -117.43941767859494 33.25851400919505, -117.43929788380902 33.258397914851344, -117.43928222759207 33.258379189307945, -117.43918830638715 33.258244222598854, -117.43913567656251 33.258164913609654, -117.43908642701457 33.258109283915175, -117.43897368721068 33.25797530769067, -117.43897290970516 33.25797438622713, -117.43885783267072 33.25783949393544, -117.4388115334688 33.2577526784396, -117.43876639042757 33.25770451814155, -117.43864982807163 33.25761959894214, -117.43860519499466 33.25756978832304, -117.4385346080286 33.257434738789215, -117.43848747871935 33.257357515025205, -117.4384338274566 33.25729979566921, -117.43832551076765 33.25717161279383, -117.43832020205411 33.257164897739266, -117.43820874605282 33.25702999205622, -117.43816323906931 33.256924722772915, -117.43813963445605 33.25689493708254, -117.43800140768231 33.256765889667925, -117.43589377789193 33.25392061425424, -117.43575895950812 33.25379143558214, -117.43573195727336 33.253761998644585, -117.43565914841417 33.25365648669788, -117.43557434626221 33.25352148516087, -117.43556967478418 33.25351017501554, -117.43547365183036 33.25338653921302, -117.43540776074553 33.253332451666445, -117.43534079863251 33.25325170575944, -117.43527289752562 33.25311664483862, -117.43524545795628 33.25307618336088, -117.43514488443219 33.25298179422573, -117.43508364558713 33.25291867939108, -117.43503383643719 33.25284688413116, -117.43493937453333 33.252711915926206, -117.43492142465178 33.252678586158346, -117.43482390216649 33.25257702110884, -117.43475968798106 33.25253610169747, -117.43467742369758 33.25244223455777, -117.43461605232748 33.252307150429864, -117.43459738844973 33.25227980758127, -117.43447764445217 33.252172335429094, -117.43443560829088 33.2521282790326, -117.43437574454907 33.25203739275942, -117.43428115942248 33.2519024244586, -117.43427338779964 33.25188761805309, -117.43416401968832 33.25176753482378, -117.43411158833071 33.25173192392377, -117.43402731622835 33.25163271335266, -117.43396537882961 33.251497630854246, -117.4339492876952 33.25147471843596, -117.43382124953153 33.251362835059425, -117.43378754049172 33.2513293310498, -117.43371728063981 33.251227899040856, -117.43363016378753 33.251092904173035, -117.4336252978232 33.251083517148196, -117.4335073502926 33.25095803367535, -117.43346350174573 33.25092797476085, -117.43336978940137 33.25082321444217, -117.43330415401387 33.25068814447566, -117.43330126981665 33.25068400074877, -117.43315378991782 33.25055336960146, -117.43313953103056 33.25053980481857, -117.43305227557677 33.2504184244512, -117.43297739133197 33.25031420345098, -117.43294606074834 33.25028349557722, -117.43281952510684 33.25014863733013, -117.43281553712333 33.25014634611651, -117.43269552065733 33.25001377014311, -117.43265340633411 33.24992223455563, -117.43261516318923 33.24987875100774, -117.43249170881738 33.24978591411864, -117.43245164267245 33.24974402102002, -117.43237428208886 33.24960899129947, -117.43232948228521 33.249542053353096, -117.43048411090119 33.247180171010406, -117.43039704933571 33.24704517355711, -117.43038479053928 33.24701477745257, -117.43029649417527 33.24691022272374, -117.43022302863162 33.246863495946975, -117.43014404317461 33.246775451297594, -117.43008423259246 33.246640359361834, -117.43006074864378 33.24660631607363, -117.42993472331723 33.24650557749703, -117.42989906981768 33.246471736268724, -117.42982911358179 33.246370643740526, -117.42973880286269 33.24623565702941, -117.42973687389915 33.246231391349774, -117.4296206430795 33.24610076644958, -117.42957507803936 33.24607264407594, -117.4294796490696 33.24596595460434, -117.42941972405882 33.24583086271936, -117.42941282942756 33.2458211892892, -117.42925120175806 33.24569657438956, -117.42925080943917 33.24569614700168, -117.42916923664393 33.245561129697926, -117.42908889449431 33.245432782073394, -117.42908212142949 33.245426131457485, -117.428955907942 33.24529126800305, -117.42892709531588 33.24527281198034, -117.42882404093181 33.24515642390822, -117.42876492504128 33.24503671483857, -117.42875028300669 33.2450213793683, -117.42860329975328 33.24491210215929, -117.42857936051102 33.24488666963083, -117.4284980056714 33.24475165111004, -117.4284410670586 33.2446628939018, -117.4283968471586 33.244616700744224, -117.42827924736636 33.244498182387446, -117.42826464003463 33.24448185720452, -117.42814644335978 33.244346965291896, -117.42811719428173 33.244285470001884, -117.42804720479326 33.24421200801923, -117.42795554334894 33.244155121033096, -117.42788070360102 33.24407728207588, -117.42781683575792 33.24394220292433, -117.42779331550624 33.24390624044027, -117.42768650789296 33.24380735228274, -117.42763156445932 33.243755088398295, -117.42756768025635 33.24367246196685, -117.42747755475425 33.24353747288988, -117.42746940522169 33.243519959994956, -117.42736264211 33.243402568913645, -117.42730765525555 33.243368768266734, -117.42721416250332 33.24326778012586, -117.42715306788732 33.24313269108312, -117.42714543993993 33.24312178300561, -117.42700063201012 33.24299791561781, -117.42698374340891 33.242981315496216, -117.4268986449701 33.242862966819985, -117.42682155884211 33.24274032057663, -117.42681019119568 33.24272797149564, -117.42668373671958 33.24259310647013, -117.42665976637551 33.24257985314327, -117.42654909278245 33.24245826939635, -117.4264976277485 33.2423479805987, -117.42647556152284 33.24232322264523, -117.42633594251497 33.24220933167315, -117.42631688213294 33.24218846772063, -117.42623429717158 33.24205345184948, -117.42617370709947 33.241957193614766, -117.42613594029856 33.241918489955395, -117.42601252502728 33.24178361380745, -117.42601185180801 33.24178323021189, -117.4258943534202 33.24164871957783, -117.425849746924 33.24155765701764, -117.42580939412079 33.24151371153743, -117.42568806359107 33.241418887180856, -117.42564984066861 33.24137895872517, -117.42556506185073 33.241243949891505, -117.42552597018069 33.24119537009456, -117.42541978767248 33.2411091479185, -117.42536427192398 33.241053274608504, -117.4253045060145 33.24097424322125, -117.4252059976284 33.24083928107443, -117.4252021817785 33.2408301207025, -117.42509916089097 33.24070434730876, -117.42504036482737 33.2406632496622, -117.4249592850714 33.24056952632921, -117.42489436570548 33.240434449161235, -117.4248781326275 33.24041040884691, -117.4247590104549 33.24029961250505, -117.42471638832112 33.24025827255816, -117.42464585747037 33.24016469990118, -117.42455427523703 33.24002972524283, -117.4245542639871 33.24002971360458, -117.42442495065723 33.2398948559484, -117.42439251494268 33.23987401780215, -117.42428604206779 33.23976003088326, -117.42423040171067 33.23964511673104, -117.42421284963046 33.239624981554066, -117.42406875980377 33.23951366383949, -117.42404625102174 33.23949025064393, -117.42396465881585 33.23935522979543, -117.4239065175474 33.23925771357859, -117.42387398231813 33.239220239846254, -117.42375267061044 33.23908535419608, -117.42374465513876 33.23908030415458, -117.42362390079087 33.23895049382316, -117.42358260806367 33.238864464370195, -117.42354042491368 33.2388154790946, -117.42342085859319 33.238710196622854, -117.42339467118228 33.23868067630414, -117.42331922061194 33.23854563410162, -117.42325856469955 33.23844282578244, -117.4232296118874 33.23841064002297, -117.42309680512345 33.23828617944604, -117.42308759766013 33.2382758241351, -117.4229708896795 33.23814092203175, -117.42293473894043 33.238065732661724, -117.42287859693018 33.238005936813835, -117.42277308892263 33.23793158442445, -117.42271556712791 33.23787119193039, -117.42264696638546 33.237736126016614, -117.42261089879491 33.23768506862724, -117.42250802660396 33.23760129899714, -117.4224492073834 33.23754206380223, -117.42239347244099 33.23746638901357, -117.42229917916595 33.23733141013704, -117.42228705177698 33.237302388358266, -117.42218672279522 33.237196492832275, -117.42212537819047 33.23716283569766, -117.42202996450683 33.23706172573525, -117.42197516262489 33.2369266126088, -117.42196314476536 33.236906628314856, -117.42184063599842 33.23679176984497, -117.4218014087826 33.23675382107664, -117.42172634268137 33.23665685833546, -117.42163936412663 33.23653658615085, -117.42162445828751 33.23652190465724, -117.42150586212512 33.236387007520484, -117.42147755692491 33.23636867473098, -117.42137396383464 33.2362521553271, -117.42131546067229 33.23614036471634, -117.42129422253848 33.23611712635459, -117.42115382586492 33.23600813018987, -117.42112989823697 33.2359823837093, -117.42103915456426 33.23584739181233, -117.42099176237339 33.235786340395684, -117.42090879826648 33.23571253388786, -117.4208300483751 33.23563733623005, -117.42077957820777 33.235577671978575, -117.4206681194826 33.235443340065146)\n" + ] + } + ], + "source": [ + "print(combined_gdf['geometry'].iloc[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the Merged Extracted Shorelines to a JSON file\n", + "- This will contains all the metadata for each extracted shoreline such as \n", + "\n", + "\n", + " 1. cloud cover\n", + " 2. date\n", + " 3. satellite it was derived from \n", + " 4. geoaccuracy\n", + "- Filename: `extracted_shorelines_dict.json`\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mapping of dictionary keys to dataframe columns\n", + "keymap ={'shorelines':'geometry',\n", + " 'dates':'date',\n", + " 'satname':'satname',\n", + " 'cloud_cover':'cloud_cover',\n", + " 'geoaccuracy':'geoaccuracy'}\n", + "# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines\n", + "shoreline_dict = dataframe_to_dict(combined_gdf,keymap)\n", + "# save the extracted shoreline dictionary to json file\n", + "to_file(shoreline_dict, os.path.join(merged_session_location, \"extracted_shorelines_dict.json\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the Merged Extracted Shorelines to GeoJSON Files\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Save extracted shorelines as a GeoJSON file\n", + "es_line_path = os.path.join(merged_session_location, \"extracted_shorelines_lines.geojson\")\n", + "es_pts_path = os.path.join(merged_session_location, \"extracted_shorelines_points.geojson\")\n", + "\n", + "es_lines_gdf = convert_multipoints_to_linestrings(combined_gdf)\n", + "# save extracted shorelines as interpolated linestrings\n", + "es_lines_gdf.to_file(es_line_path, driver='GeoJSON')\n", + "\n", + "\n", + "points_gdf = convert_linestrings_to_multipoints(combined_gdf)\n", + "points_gdf = stringify_datetime_columns(points_gdf)\n", + "# Save extracted shorelines as mulitpoints GeoJSON file\n", + "points_gdf.to_file(es_pts_path, driver='GeoJSON')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Find when the Transects and Shorelines intersect\n", + "1. Loads the Transects for all the ROIs \n", + "2. Get the shoreline dictionary we created earlier and read the shorelines from it\n", + "3. Find where the shorelines and transects intersect\n", + "4. Save the shoreline and transect intersections as a timeseries to a csv file\n", + "5. Save the timeseries of intersections between the shoreline and a single tranesct to csv file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. load transects for all ROIs\n", + "transect_rows = merged_config[merged_config['type'] == 'transect']\n", + "transects_dict = {row['id']: np.array(row[\"geometry\"].coords) for i, row in transect_rows.iterrows()}\n", + "# 2. compute the intersection between the transects and the extracted shorelines\n", + "cross_distance = SDS_transects.compute_intersection_QC(shoreline_dict, transects_dict, settings_transects)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# use coastseg.common to get the cross_distance_df\n", + "transects_df = get_cross_distance_df(shoreline_dict,cross_distance)\n", + "# save the transect shoreline intersections to csv timeseries file\n", + "filepath = os.path.join(merged_session_location, \"transect_time_series.csv\")\n", + "transects_df.to_csv(filepath, sep=\",\")\n", + "transects_df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the timeseries of intersections between the shoreline and a single tranesct to csv file\n", + "create_csv_per_transect(merged_session_location,cross_distance,shoreline_dict,)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tidally Correct Shoreline Transect Intersections" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Load the Tide Model\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "\n", + "gdf1=gpd.GeoDataFrame(\n", + " geometry=[], crs= None\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": ['L8', 'L8' 'L8'],\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", + " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(0, 0), (1, 1)]),\n", + " MultiPoint([(2, 2), (3, 3)]),\n", + " MultiPoint([(4, 4), (5, 5)]),\n", + " ],\n", + "}\n", + "df = gpd.GeoDataFrame(geometry = data['geometry'], crs='epsg:4326')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": ['L8', 'L8' 'L8'],\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", + " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", + " ],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cloud_coversatnamedategeometry
00.1L82018-12-30 18:22:25MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)
10.2L82018-01-30 19:22:25MULTIPOINT (2.00000 2.00000, 3.00000 3.00000)
20.3L82022-01-03 19:22:25MULTIPOINT (4.00000 4.00000, 5.00000 5.00000)
\n", + "
" + ], + "text/plain": [ + " cloud_cover satname date \\\n", + "0 0.1 L8 2018-12-30 18:22:25 \n", + "1 0.2 L8 2018-01-30 19:22:25 \n", + "2 0.3 L8 2022-01-03 19:22:25 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) \n", + "1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000) \n", + "2 MULTIPOINT (4.00000 4.00000, 5.00000 5.00000) " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# create a list of geometries\n", + "geometries = [\n", + " MultiPoint([(0, 0), (1, 1)]),\n", + " MultiPoint([(2, 2), (3, 3)]),\n", + " MultiPoint([(4, 4), (5, 5)]),\n", + "]\n", + "\n", + "# create a dictionary with the other columns\n", + "data = {\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": ['L8', 'L8', 'L8'],\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", + " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", + " ],\n", + " \"geometry\": geometries,\n", + "}\n", + "\n", + "# create a GeoDataFrame from the dictionary\n", + "df = gpd.GeoDataFrame(data, geometry='geometry', crs='epsg:4326')\n", + "df.set_crs('epsg:4326', inplace=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'geoaccuracy': [1, 2, 3],\n", + " 'cloud_cover': [0.1, 0.2, 0.3],\n", + " 'satname': ['L8', 'L8', 'L8'],\n", + " 'dates': ['2018-12-30 18:22:25', '2018-1-30 19:22:25', '2022-01-03 19:22:25'],\n", + " 'shorelines': [,\n", + " ,\n", + " ]}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {\n", + " 'geoaccuracy': [1, 2, 3],\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": ['L8', 'L8', 'L8'],\n", + " \"dates\": [\n", + " \"2018-12-30 18:22:25\",\n", + " \"2018-1-30 19:22:25\",\n", + " \"2022-01-03 19:22:25\",\n", + " ],\n", + " \"shorelines\": geometries,\n", + "}\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf1.crs" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"geoaccuracy\": [1, 2, 3],\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", + " \"dates\": [\n", + " \"2018-12-30 18:22:25\",\n", + " \"2018-01-30 19:22:25\",\n", + " \"2022-01-03 19:22:25\",\n", + " ],\n", + " \"shorelines\": [\n", + " np.array([[0.0, 0.0], [1.0, 1.0]]),\n", + " np.array([[2.0, 2.0], [3.0, 3.0]]),\n", + " np.array([[4.0, 4.0], [5.0, 5.0]]),\n", + " ],\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'shorelines': [array([[0., 0.],\n", + " [1., 1.]]),\n", + " array([[2., 2.],\n", + " [3., 3.]]),\n", + " array([[4., 4.],\n", + " [5., 5.]])],\n", + " 'dates': ['2018-12-30 18:22:25',\n", + " '2018-01-30 19:22:25',\n", + " '2022-01-03 19:22:25'],\n", + " 'satname': ['L8', 'L8', 'L8'],\n", + " 'cloud_cover': [0.1, 0.2, 0.3]}" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.merge_utils import dataframe_to_dict\n", + "\n", + "key_map = {\n", + " \"shorelines\": \"geometry\",\n", + " \"dates\": \"date\",\n", + " \"satname\": \"satname\",\n", + " \"cloud_cover\": \"cloud_cover\",\n", + " \"geoaccuracy\": \"geoaccuracy\",\n", + "}\n", + "\n", + "result = dataframe_to_dict(df, key_map)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[0., 0.],\n", + " [1., 1.]]),\n", + " array([[2., 2.],\n", + " [3., 3.]]),\n", + " array([[4., 4.],\n", + " [5., 5.]])]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['shorelines']" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[0., 0.],\n", + " [1., 1.]]),\n", + " array([[2., 2.],\n", + " [3., 3.]]),\n", + " array([[4., 4.],\n", + " [5., 5.]])]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "expected['shorelines']" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 36\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m expected[\u001b[39m'\u001b[39;49m\u001b[39mshorelines\u001b[39;49m\u001b[39m'\u001b[39;49m] \u001b[39m==\u001b[39;49m result[\u001b[39m'\u001b[39;49m\u001b[39mshorelines\u001b[39;49m\u001b[39m'\u001b[39;49m]\n", + "\u001b[1;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" + ] + } + ], + "source": [ + "expected['shorelines'] == result['shorelines']" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "# Define the two lists of arrays\n", + "list1 = [np.array([[0., 0.], [1., 1.]]), np.array([[2., 2.], [3., 3.]]), np.array([[4., 4.], [5., 5.]])]\n", + "list2 = [np.array([[0., 0.], [1., 1.]]), np.array([[2., 2.], [3., 3.]]), np.array([[4., 4.], [5., 5.]])]\n", + "\n", + "# Check if the two lists of arrays are equal\n", + "equal = all(np.array_equal(a, b) for a, b in zip(list1, list2))\n", + "\n", + "print(equal) # This will print True if the two lists of arrays are equal, and False otherwise" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0.]\n", + " [1. 1.]]\n", + "True\n", + "[[2. 2.]\n", + " [3. 3.]]\n", + "True\n", + "[[4. 4.]\n", + " [5. 5.]]\n", + "True\n" + ] + } + ], + "source": [ + "for a, b in zip(list1, list2):\n", + " print(b)\n", + " print((np.array_equal(a, b)))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a=list1[0]\n", + "b=list2[0]\n", + "np.array_equal(a, b)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all([True,False])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "expected == expected2" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometry
0MULTILINESTRING ((0.00000 0.00000, 1.00000 1.0...
1MULTILINESTRING ((4.00000 4.00000, 5.00000 5.0...
\n", + "
" + ], + "text/plain": [ + " geometry\n", + "0 MULTILINESTRING ((0.00000 0.00000, 1.00000 1.0...\n", + "1 MULTILINESTRING ((4.00000 4.00000, 5.00000 5.0..." + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {\n", + " \"geometry\": [\n", + " MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),\n", + " MultiLineString([[(4, 4), (5, 5)], [(6, 6), (7, 7)]]),\n", + " ]\n", + " }\n", + "\n", + "\n", + "gdf = gpd.GeoDataFrame(data)\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'MultiLineString' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m result \u001b[39m=\u001b[39m convert_lines_to_multipoints(gdf)\n", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 87\u001b[0m \u001b[39mreturn\u001b[39;00m geometry \u001b[39m# Return the original geometry if it's not a LineString or MultiLineString\u001b[39;00m\n\u001b[0;32m 89\u001b[0m \u001b[39m# Apply the conversion function to each row in the GeoDataFrame\u001b[39;00m\n\u001b[1;32m---> 90\u001b[0m gdf[\u001b[39m'\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m gdf[\u001b[39m'\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m'\u001b[39;49m]\u001b[39m.\u001b[39;49mapply(line_to_multipoint)\n\u001b[0;32m 92\u001b[0m \u001b[39mreturn\u001b[39;00m gdf\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\geopandas\\geoseries.py:645\u001b[0m, in \u001b[0;36mGeoSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 643\u001b[0m \u001b[39m@doc\u001b[39m(pd\u001b[39m.\u001b[39mSeries)\n\u001b[0;32m 644\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\u001b[39mself\u001b[39m, func, convert_dtype\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, args\u001b[39m=\u001b[39m(), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 645\u001b[0m result \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mapply(func, convert_dtype\u001b[39m=\u001b[39mconvert_dtype, args\u001b[39m=\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(result, GeoSeries):\n\u001b[0;32m 647\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcrs \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\series.py:4630\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 4520\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[0;32m 4521\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 4522\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4525\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 4526\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[0;32m 4527\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 4528\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4529\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4628\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[0;32m 4629\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4630\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\u001b[39mself\u001b[39;49m, func, convert_dtype, args, kwargs)\u001b[39m.\u001b[39;49mapply()\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1025\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_str()\n\u001b[0;32m 1024\u001b[0m \u001b[39m# self.f is Callable\u001b[39;00m\n\u001b[1;32m-> 1025\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1076\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1074\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1075\u001b[0m values \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m)\u001b[39m.\u001b[39m_values\n\u001b[1;32m-> 1076\u001b[0m mapped \u001b[39m=\u001b[39m lib\u001b[39m.\u001b[39;49mmap_infer(\n\u001b[0;32m 1077\u001b[0m values,\n\u001b[0;32m 1078\u001b[0m f,\n\u001b[0;32m 1079\u001b[0m convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype,\n\u001b[0;32m 1080\u001b[0m )\n\u001b[0;32m 1082\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1083\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1084\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1085\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\_libs\\lib.pyx:2834\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m8\n\u001b[0;32m 82\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint(geometry\u001b[39m.\u001b[39mcoords)\n\u001b[0;32m 83\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(geometry, MultiLineString):\n\u001b[1;32m---> 84\u001b[0m points \u001b[39m=\u001b[39m [MultiPoint(line\u001b[39m.\u001b[39mcoords) \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m geometry]\n\u001b[0;32m 85\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint([point \u001b[39mfor\u001b[39;00m multi \u001b[39min\u001b[39;00m points \u001b[39mfor\u001b[39;00m point \u001b[39min\u001b[39;00m multi])\n\u001b[0;32m 86\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;31mTypeError\u001b[0m: 'MultiLineString' object is not iterable" + ] + } + ], + "source": [ + "result = convert_lines_to_multipoints(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf.iloc[0]['geometry']" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf.iloc[0]['geometry'].geoms" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[, ]\n" + ] + } + ], + "source": [ + "if isinstance(gdf.iloc[0]['geometry'], MultiLineString):\n", + " points = [MultiPoint(line.coords) for line in gdf.iloc[0]['geometry'].geoms]\n", + " print(points)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from shapely.geometry import Point,MultiPoint\n", + "\n", + "Point(0,0)\n", + "MultiPoint([Point(0,0).coords])" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'MultiLineString' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 44\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 8\u001b[0m \u001b[39mreturn\u001b[39;00m geometry \u001b[39m# Return the original geometry if it's not a LineString or MultiLineString\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[39m# Apply the conversion function to each row in the GeoDataFrame\u001b[39;00m\n\u001b[1;32m---> 11\u001b[0m gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m gdf[\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39;49mapply(line_to_multipoint)\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\geopandas\\geoseries.py:645\u001b[0m, in \u001b[0;36mGeoSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 643\u001b[0m \u001b[39m@doc\u001b[39m(pd\u001b[39m.\u001b[39mSeries)\n\u001b[0;32m 644\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\u001b[39mself\u001b[39m, func, convert_dtype\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, args\u001b[39m=\u001b[39m(), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 645\u001b[0m result \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mapply(func, convert_dtype\u001b[39m=\u001b[39mconvert_dtype, args\u001b[39m=\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(result, GeoSeries):\n\u001b[0;32m 647\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcrs \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\series.py:4630\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 4520\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[0;32m 4521\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 4522\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4525\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 4526\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[0;32m 4527\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 4528\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4529\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4628\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[0;32m 4629\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4630\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\u001b[39mself\u001b[39;49m, func, convert_dtype, args, kwargs)\u001b[39m.\u001b[39;49mapply()\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1025\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_str()\n\u001b[0;32m 1024\u001b[0m \u001b[39m# self.f is Callable\u001b[39;00m\n\u001b[1;32m-> 1025\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1076\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1074\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1075\u001b[0m values \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m)\u001b[39m.\u001b[39m_values\n\u001b[1;32m-> 1076\u001b[0m mapped \u001b[39m=\u001b[39m lib\u001b[39m.\u001b[39;49mmap_infer(\n\u001b[0;32m 1077\u001b[0m values,\n\u001b[0;32m 1078\u001b[0m f,\n\u001b[0;32m 1079\u001b[0m convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype,\n\u001b[0;32m 1080\u001b[0m )\n\u001b[0;32m 1082\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1083\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1084\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1085\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\_libs\\lib.pyx:2834\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 44\u001b[0m line \u001b[0;36m5\n\u001b[0;32m 3\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint(geometry\u001b[39m.\u001b[39mcoords)\n\u001b[0;32m 4\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(geometry, MultiLineString):\n\u001b[1;32m----> 5\u001b[0m points \u001b[39m=\u001b[39m [MultiPoint(line\u001b[39m.\u001b[39mcoords) \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m geometry]\n\u001b[0;32m 6\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint([point \u001b[39mfor\u001b[39;00m multi \u001b[39min\u001b[39;00m points \u001b[39mfor\u001b[39;00m point \u001b[39min\u001b[39;00m multi])\n\u001b[0;32m 7\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "\u001b[1;31mTypeError\u001b[0m: 'MultiLineString' object is not iterable" + ] + } + ], + "source": [ + "def line_to_multipoint(geometry):\n", + " if isinstance(geometry, LineString):\n", + " return MultiPoint(geometry.coords)\n", + " elif isinstance(geometry, MultiLineString):\n", + " points = [MultiPoint(line.coords) for line in geometry]\n", + " return MultiPoint([point for multi in points for point in multi])\n", + " else:\n", + " return geometry # Return the original geometry if it's not a LineString or MultiLineString\n", + "\n", + "# Apply the conversion function to each row in the GeoDataFrame\n", + "gdf[\"geometry\"] = gdf[\"geometry\"].apply(line_to_multipoint)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geoaccuracycloud_coversatnamedategeometry
010.1L82018-12-30 18:22:25MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)
120.2L82018-01-30 19:22:25MULTIPOINT (2.00000 2.00000, 3.00000 3.00000)
230.3L82022-01-03 19:22:25MULTIPOINT (4.00000 4.00000, 5.00000 5.00000)
\n", + "
" + ], + "text/plain": [ + " geoaccuracy cloud_cover satname date \\\n", + "0 1 0.1 L8 2018-12-30 18:22:25 \n", + "1 2 0.2 L8 2018-01-30 19:22:25 \n", + "2 3 0.3 L8 2022-01-03 19:22:25 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) \n", + "1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000) \n", + "2 MULTIPOINT (4.00000 4.00000, 5.00000 5.00000) " + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.merge_utils import merge_geometries\n", + "\n", + "# create a list of geometries\n", + "geometries = [\n", + " MultiPoint([(0, 0), (1, 1)]),\n", + " MultiPoint([(2, 2), (3, 3)]),\n", + " MultiPoint([(4, 4), (5, 5)]),\n", + "]\n", + "# create a dictionary with the other columns\n", + "data = {\n", + " \"geoaccuracy\": [1, 2, 3],\n", + " \"cloud_cover\": [0.1, 0.2, 0.3],\n", + " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", + " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", + " ],\n", + " \"geometry\": geometries,\n", + "}\n", + "# create a GeoDataFrame from the dictionary\n", + "df = gpd.GeoDataFrame(data, geometry=\"geometry\", crs=\"epsg:4326\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geoaccuracycloud_coversatnamedate
010.1L82018-12-30 18:22:25
120.2L82018-01-30 19:22:25
230.3L82022-01-03 19:22:25
\n", + "
" + ], + "text/plain": [ + " geoaccuracy cloud_cover satname date\n", + "0 1 0.1 L8 2018-12-30 18:22:25\n", + "1 2 0.2 L8 2018-01-30 19:22:25\n", + "2 3 0.3 L8 2022-01-03 19:22:25" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = merge_geometries(df)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometrygeoaccuracysatnamecloud_cover
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226, -118.45892 35...5.088L80.000000
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239, -120.45892 40...5.802L80.230000
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226, -118.45892 35... \n", + "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239, -120.45892 40... \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) \n", + "\n", + " geoaccuracy satname cloud_cover \n", + "0 5.088 L8 0.000000 \n", + "1 5.802 L8 0.230000 \n", + "2 6.596 L8 0.263967 " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from shapely.geometry import MultiPoint, MultiLineString, LineString, Point\n", + "\n", + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2019-1-28 05:12:28\"),\n", + " pd.Timestamp(\"2020-5-23 19:24:27\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.45892, 33.28226), (-118.45892, 35.28226)]),\n", + " MultiPoint([(-117.45881, 33.28239), (-120.45892, 40.28226)]),\n", + " MultiPoint([(-117.45875, 33.28242)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 5.802,\n", + " 6.596,\n", + " ],\n", + " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", + " \"cloud_cover\": [0.0, 0.23, 0.263967],\n", + " }\n", + "extracted_gdf1 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf1" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometrygeoaccuracysatnamecloud_cover
02018-12-30 18:22:25MULTIPOINT (-117.44480 33.26540)5.088L80.000000
12020-01-28 05:12:28MULTIPOINT (-117.45899 33.28226)5.802L80.000000
22020-05-23 19:24:27MULTIPOINT (-117.45896 33.28226)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry geoaccuracy satname \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.44480 33.26540) 5.088 L8 \n", + "1 2020-01-28 05:12:28 MULTIPOINT (-117.45899 33.28226) 5.802 L8 \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) 6.596 L8 \n", + "\n", + " cloud_cover \n", + "0 0.000000 \n", + "1 0.000000 \n", + "2 0.263967 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1\n", + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2020-1-28 05:12:28\"),\n", + " pd.Timestamp(\"2020-5-23 19:24:27\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.44480, 33.26540)]),\n", + " MultiPoint([(-117.45899, 33.28226)]),\n", + " MultiPoint([(-117.45896, 33.28226)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 5.802,\n", + " 6.596,\n", + " ],\n", + " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", + " \"cloud_cover\": [0.0, 0.0, 0.263967],\n", + "}\n", + "extracted_gdf2 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf2 " + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometrygeoaccuracysatnamecloud_cover
02015-12-30 18:22:25MULTIPOINT (-117.45896 33.28226)5.088L90.000000
12019-01-28 05:12:28MULTIPOINT (-117.45894 33.28226)5.802L90.100000
22020-05-23 19:24:27MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry geoaccuracy satname \\\n", + "0 2015-12-30 18:22:25 MULTIPOINT (-117.45896 33.28226) 5.088 L9 \n", + "1 2019-01-28 05:12:28 MULTIPOINT (-117.45894 33.28226) 5.802 L9 \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45891 33.28232) 6.596 L8 \n", + "\n", + " cloud_cover \n", + "0 0.000000 \n", + "1 0.100000 \n", + "2 0.263967 " + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2015-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2019-1-28 05:12:28\"),\n", + " pd.Timestamp(\"2020-5-23 19:24:27\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.45896, 33.28226)]),\n", + " MultiPoint([(-117.45894, 33.28226)]),\n", + " MultiPoint([(-117.45891, 33.28232)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 5.802,\n", + " 6.596,\n", + " ],\n", + " \"satname\": [\"L9\", \"L9\", \"L8\"],\n", + " \"cloud_cover\": [0.0, 0.1, 0.263967],\n", + "}\n", + "extracted_gdf3 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf3" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_leftsatnamegeometry_rightcloud_covergeoaccuracy
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)L8MULTIPOINT (-117.44480 33.26540)0.0000005.088
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)0.2639676.596
\n", + "
" + ], + "text/plain": [ + " date geometry_left satname \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", + "\n", + " geometry_right cloud_cover geoaccuracy \n", + "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", + "1 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 " + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.merge_utils import merge_geodataframes\n", + "\n", + "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_leftsatnamegeometry_rightgeometrycloud_covergeoaccuracy
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)MULTIPOINT (-117.45891 33.28232)0.2639676.596
\n", + "
" + ], + "text/plain": [ + " date geometry_left satname \\\n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", + "\n", + " geometry_right geometry \\\n", + "0 MULTIPOINT (-117.45896 33.28226) MULTIPOINT (-117.45891 33.28232) \n", + "\n", + " cloud_cover geoaccuracy \n", + "0 0.263967 6.596 " + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from coastseg.merge_utils import merge_geodataframes\n", + "\n", + "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2, extracted_gdf3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.merge_utils import merge_geodataframes\n", + "\n", + "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "aggregation_funcs = {\n", + " 'cloud_cover': 'mean',\n", + " 'geoaccuracy': 'mean'\n", + " }\n", + "\n", + "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2, extracted_gdf3)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_ygeometrygeoaccuracysatnamecloud_cover
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry_x geoaccuracy_x \\\n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "\n", + " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", + "0 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "\n", + " satname_y cloud_cover_y geometry geoaccuracy \\\n", + "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "\n", + " satname cloud_cover \n", + "0 L8 0.263967 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this code did not work it only found the dates in common across all the dataframes.\n", + "from functools import reduce\n", + "\n", + "# Step 1: Find the common dates using set intersection\n", + "common_dates = set(extracted_gdf1['date']).intersection(extracted_gdf2['date'], extracted_gdf3['date'])\n", + "\n", + "# Step 2: Filter the dataframes to only include rows with the common date\n", + "dfs_filtered = [df[df['date'].isin(common_dates)] for df in [extracted_gdf1, extracted_gdf2, extracted_gdf3]]\n", + "\n", + "# Step 3: Perform a single merge operation on the filtered dataframes\n", + "final_df = reduce(lambda left, right: pd.merge(left, right, on='date', how='inner'), dfs_filtered)\n", + "final_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_ygeometrygeoaccuracysatnamecloud_cover
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry_x geoaccuracy_x \\\n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "\n", + " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", + "0 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "\n", + " satname_y cloud_cover_y geometry geoaccuracy \\\n", + "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "\n", + " satname cloud_cover \n", + "0 L8 0.263967 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# only got dates in common across all the dataframes\n", + "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Assuming dfs is a list of all your dataframes\n", + "common_dates = set(dfs[0]['date'])\n", + "for df in dfs[1:]:\n", + " common_dates = common_dates.intersection(df['date'])\n", + "\n", + "dfs_filtered = [df[df['date'].isin(common_dates)] for df in dfs]\n", + "final_df = reduce(lambda left, right: pd.merge(left, right, on='date', how='inner'), dfs_filtered)\n", + "final_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_y
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)5.088L80.000000MULTIPOINT (-117.44480 33.26540)5.088L80.000000
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967
02019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)5.802L80.230000MULTIPOINT (-117.45894 33.28226)5.802L90.100000
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
02020-05-23 19:24:27MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", + "
" + ], + "text/plain": [ + " date geometry_x geoaccuracy_x \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "0 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) 5.802 \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "\n", + " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", + "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", + "1 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "0 L8 0.230000 MULTIPOINT (-117.45894 33.28226) 5.802 \n", + "1 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "\n", + " satname_y cloud_cover_y \n", + "0 L8 0.000000 \n", + "1 L8 0.263967 \n", + "0 L9 0.100000 \n", + "1 L8 0.263967 \n", + "0 L8 0.263967 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from itertools import combinations\n", + "\n", + "# Put all dataframes in a list\n", + "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Initialize an empty list to store the merged dataframes\n", + "merged_dfs = []\n", + "\n", + "# Loop over all combinations of 2 dataframes\n", + "for df_a, df_b in combinations(dfs, 2):\n", + " # Perform an 'inner' merge and append the result to the list\n", + " merged_dfs.append(df_a.merge(df_b, on='date', how='inner'))\n", + "\n", + "# Concatenate all the merged dataframes\n", + "final_df = pd.concat(merged_dfs)\n", + "final_df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Cannot interpret '' as a data type", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 63\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 23\u001b[0m \u001b[39mreturn\u001b[39;00m merged\n\u001b[0;32m 26\u001b[0m \u001b[39m# Perform a full outer join and average the numeric columns across all dataframes\u001b[39;00m\n\u001b[1;32m---> 27\u001b[0m result \u001b[39m=\u001b[39m reduce(merge_and_average, dfs)\n", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 63\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 11\u001b[0m \u001b[39m# Loop over all columns\u001b[39;00m\n\u001b[0;32m 12\u001b[0m \u001b[39mfor\u001b[39;00m column \u001b[39min\u001b[39;00m \u001b[39mset\u001b[39m(df1\u001b[39m.\u001b[39mcolumns)\u001b[39m.\u001b[39mintersection(df2\u001b[39m.\u001b[39mcolumns):\n\u001b[0;32m 13\u001b[0m \u001b[39m# Skip non-numeric columns\u001b[39;00m\n\u001b[1;32m---> 14\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m np\u001b[39m.\u001b[39;49missubdtype(df1[column]\u001b[39m.\u001b[39;49mdtype, np\u001b[39m.\u001b[39;49mnumber):\n\u001b[0;32m 15\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m 17\u001b[0m \u001b[39m# Average the values in the two columns\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\numpy\\core\\numerictypes.py:417\u001b[0m, in \u001b[0;36missubdtype\u001b[1;34m(arg1, arg2)\u001b[0m\n\u001b[0;32m 359\u001b[0m \u001b[39m\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 360\u001b[0m \u001b[39mReturns True if first argument is a typecode lower/equal in type hierarchy.\u001b[39;00m\n\u001b[0;32m 361\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 414\u001b[0m \n\u001b[0;32m 415\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 416\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m issubclass_(arg1, generic):\n\u001b[1;32m--> 417\u001b[0m arg1 \u001b[39m=\u001b[39m dtype(arg1)\u001b[39m.\u001b[39mtype\n\u001b[0;32m 418\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m issubclass_(arg2, generic):\n\u001b[0;32m 419\u001b[0m arg2 \u001b[39m=\u001b[39m dtype(arg2)\u001b[39m.\u001b[39mtype\n", + "\u001b[1;31mTypeError\u001b[0m: Cannot interpret '' as a data type" + ] + } + ], + "source": [ + "from functools import reduce\n", + "import numpy as np\n", + "\n", + "# only got dates in common across all the dataframes\n", + "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "def merge_and_average(df1, df2):\n", + " # Perform a full outer join\n", + " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "\n", + " # Loop over all columns\n", + " for column in set(df1.columns).intersection(df2.columns):\n", + " # Skip non-numeric columns\n", + " if not np.issubdtype(df1[column].dtype, np.number):\n", + " continue\n", + "\n", + " # Average the values in the two columns\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + "\n", + " # Drop the original columns\n", + " merged.drop(columns=[col for col in merged.columns if '_df1' in col or '_df2' in col], inplace=True)\n", + "\n", + " return merged\n", + "\n", + "\n", + "# Perform a full outer join and average the numeric columns across all dataframes\n", + "result = reduce(merge_and_average, dfs)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float64\n", + "0 0.000000\n", + "1 0.230000\n", + "2 0.263967\n", + "Name: cloud_cover, dtype: float64\n", + "float64\n", + "0 5.088\n", + "1 5.802\n", + "2 6.596\n", + "Name: geoaccuracy, dtype: float64\n", + "object\n", + "datetime64[ns]\n", + "geometry\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", + "1 POINT (-117.45881 33.28239) \n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", + "3 POINT (-117.45899 33.28226) " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from shapely.ops import unary_union\n", + "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "# Loop over all columns\n", + "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", + " # Skip non-numeric columns\n", + " # print(extracted_gdf1[column])\n", + " print(extracted_gdf1[column].dtype)\n", + " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", + " columns = [col for col in merged.columns if \"geometry\" in col]\n", + " merged[\"geometry\"] = merged[columns].apply(\n", + " lambda row: unary_union(row.tolist()), axis=1\n", + " )\n", + " # drop the rows that were merged\n", + " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", + " continue\n", + " if not np.issubdtype(extracted_gdf1[column].dtype, np.number):\n", + " continue\n", + " # if not pd.api.types.is_numeric_dtype(extracted_gdf1[column]):\n", + " # continue\n", + " print(extracted_gdf1[column])\n", + " # Average the values in the two columns\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", + " \n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", + "1 POINT (-117.45881 33.28239) \n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", + "3 POINT (-117.45899 33.28226) " + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from shapely.ops import unary_union\n", + "from coastseg.merge_utils import merge_geometries\n", + "\n", + "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "# Loop over all columns\n", + "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", + " # merge the geometries\n", + " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", + " merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", + " continue\n", + " # Skip non-numeric columns\n", + " if not np.issubdtype(extracted_gdf1[column].dtype, np.number):\n", + " continue\n", + " # Average the values in the two columns\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", + " \n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "merging {['geometry_df1', 'geometry_df2']}\n", + "0 MULTIPOINT (-117.45892 33.28226, -118.45892 35...\n", + "1 MULTIPOINT (-117.45881 33.28239, -120.45892 40...\n", + "2 MULTIPOINT (-117.45875 33.28242)\n", + "Name: geometry, dtype: geometry\n", + "0 MULTIPOINT (-117.44480 33.26540)\n", + "1 MULTIPOINT (-117.45899 33.28226)\n", + "2 MULTIPOINT (-117.45896 33.28226)\n", + "Name: geometry, dtype: geometry\n", + "merging {['geometry_df1', 'geometry_df2']}\n", + "0 MULTIPOINT (-118.45892 35.28226, -117.45892 33...\n", + "1 MULTIPOINT (-120.45892 40.28226, -117.45881 33...\n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33...\n", + "3 POINT (-117.45899 33.28226)\n", + "Name: geometry, dtype: geometry\n", + "0 MULTIPOINT (-117.45896 33.28226)\n", + "1 MULTIPOINT (-117.45894 33.28226)\n", + "2 MULTIPOINT (-117.45891 33.28232)\n", + "Name: geometry, dtype: geometry\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-118.45892 35.28226, -117.45892 33...
12019-01-28 05:12:28L80.2300005.802MULTIPOINT (-120.45892 40.28226, -117.45881 33...
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
42015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
52019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "4 2015-12-30 18:22:25 L9 0.000000 5.088 \n", + "5 2019-01-28 05:12:28 L9 0.100000 5.802 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-118.45892 35.28226, -117.45892 33... \n", + "1 MULTIPOINT (-120.45892 40.28226, -117.45881 33... \n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45891 33... \n", + "3 POINT (-117.45899 33.28226) \n", + "4 POINT (-117.45896 33.28226) \n", + "5 POINT (-117.45894 33.28226) " + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from shapely.ops import unary_union\n", + "from coastseg.merge_utils import merge_geometries\n", + "from functools import reduce\n", + "import pandas as pd\n", + "\n", + "\n", + "def merge_geometries(merged_gdf, columns=None, operation=unary_union):\n", + " \"\"\"\n", + " Performs a specified operation for the geometries with the same date and satname.\n", + "\n", + " Parameters:\n", + " merged_gdf : GeoDataFrame\n", + " The GeoDataFrame to perform the operation on.\n", + " columns : list of str, optional\n", + " The columns to perform the operation on. If None, all columns with 'geometry' in the name are used.\n", + " operation : function, optional\n", + " The operation to perform. If None, unary_union is used.\n", + "\n", + " Returns:\n", + " GeoDataFrame\n", + " The GeoDataFrame with the operation performed.\n", + " \"\"\"\n", + " if columns is None:\n", + " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", + " else:\n", + " columns = [col for col in columns if col in merged_gdf.columns]\n", + "\n", + " merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", + " lambda row: operation(row.tolist()), axis=1\n", + " )\n", + " for col in columns:\n", + " if col in merged_gdf.columns:\n", + " merged_gdf = merged_gdf.drop(columns=col)\n", + " return merged_gdf\n", + "\n", + "def merge_and_average(df1, df2):\n", + " # Perform a full outer join\n", + " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "\n", + " # Loop over all columns\n", + " for column in set(df1.columns).intersection(df2.columns):\n", + " # Merge the geometries\n", + " \n", + " if isinstance(df1[column].dtype, gpd.array.GeometryDtype):\n", + " print(f\"merging {{['{column}_df1', '{column}_df2']}}\")\n", + " print(df1[column])\n", + " print(df2[column])\n", + " # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", + " merged = merge_geometries(merged)\n", + " continue\n", + " # Skip non-numeric columns\n", + " if not pd.api.types.is_numeric_dtype(df1[column]):\n", + " continue\n", + " # Average the values in the two columns\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", + "\n", + " return merged\n", + "\n", + "# List of GeoDataFrames\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "\n", + "result\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "geometry\n", + "merging {['geometry_df1', 'geometry_df2']}\n", + "0 MULTIPOINT (-117.45892 33.28226)\n", + "1 MULTIPOINT (-117.45881 33.28239)\n", + "2 MULTIPOINT (-117.45875 33.28242)\n", + "Name: geometry, dtype: geometry\n", + "0 MULTIPOINT (-117.44480 33.26540)\n", + "1 MULTIPOINT (-117.45899 33.28226)\n", + "2 MULTIPOINT (-117.45896 33.28226)\n", + "Name: geometry, dtype: geometry\n" + ] + } + ], + "source": [ + "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "# Loop over all columns\n", + "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", + " # Merge the geometries\n", + " \n", + " \n", + " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", + " print(column)\n", + " print(f\"merging {{['{column}_df1', '{column}_df2']}}\")\n", + " print(extracted_gdf1[column])\n", + " print(extracted_gdf2[column])\n", + " # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", + " merged = merge_geometries(merged)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_df1geoaccuracy_df1satnamecloud_cover_df1geometry_df2geoaccuracy_df2cloud_cover_df2
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)5.088L80.000000MULTIPOINT (-117.44480 33.26540)5.0880.000000
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)5.802L80.230000NoneNaNNaN
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.5960.263967
32020-01-28 05:12:28NoneNaNL8NaNMULTIPOINT (-117.45899 33.28226)5.8020.000000
\n", + "
" + ], + "text/plain": [ + " date geometry_df1 geoaccuracy_df1 \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", + "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) 5.802 \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "3 2020-01-28 05:12:28 None NaN \n", + "\n", + " satname cloud_cover_df1 geometry_df2 geoaccuracy_df2 \\\n", + "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", + "1 L8 0.230000 None NaN \n", + "2 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "3 L8 NaN MULTIPOINT (-117.45899 33.28226) 5.802 \n", + "\n", + " cloud_cover_df2 \n", + "0 0.000000 \n", + "1 NaN \n", + "2 0.263967 \n", + "3 0.000000 " + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'merged_gdf' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 68\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m col]\n\u001b[0;32m 3\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m----> 4\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns \u001b[39mif\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns]\n\u001b[0;32m 5\u001b[0m merged_gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m merged_gdf[columns]\u001b[39m.\u001b[39mapply(\n\u001b[0;32m 6\u001b[0m \u001b[39mlambda\u001b[39;00m row: unary_union(row\u001b[39m.\u001b[39mtolist()), axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns:\n", + "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 68\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m col]\n\u001b[0;32m 3\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m----> 4\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns \u001b[39mif\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns]\n\u001b[0;32m 5\u001b[0m merged_gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m merged_gdf[columns]\u001b[39m.\u001b[39mapply(\n\u001b[0;32m 6\u001b[0m \u001b[39mlambda\u001b[39;00m row: unary_union(row\u001b[39m.\u001b[39mtolist()), axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns:\n", + "\u001b[1;31mNameError\u001b[0m: name 'merged_gdf' is not defined" + ] + } + ], + "source": [ + "merged_gdf = merged\n", + "\n", + "if columns is None:\n", + " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", + "else:\n", + " columns = [col for col in columns if col in merged_gdf.columns]\n", + "merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", + " lambda row: unary_union(row.tolist()), axis=1\n", + ")\n", + "for col in columns:\n", + " if col in merged_gdf.columns:\n", + " merged_gdf = merged_gdf.drop(columns=col)\n", + " \n", + "merged_gdf\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry
02015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
12018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
22019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
32019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
42020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
52020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2015-12-30 18:22:25 L9 0.000000 5.088 \n", + "1 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "2 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "3 2019-01-28 05:12:28 L9 0.100000 5.802 \n", + "4 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "5 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "\n", + " geometry \n", + "0 POINT (-117.45896 33.28226) \n", + "1 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", + "2 POINT (-117.45881 33.28239) \n", + "3 POINT (-117.45894 33.28226) \n", + "4 POINT (-117.45899 33.28226) \n", + "5 MULTIPOINT (-117.45896 33.28226, -117.45891 33... " + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result.sort_values(by='date', inplace=True)\n", + "result.reset_index(drop=True, inplace=True)\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
42015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
52019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "4 2015-12-30 18:22:25 L9 0.000000 5.088 \n", + "5 2019-01-28 05:12:28 L9 0.100000 5.802 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", + "1 POINT (-117.45881 33.28239) \n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45891 33... \n", + "3 POINT (-117.45899 33.28226) \n", + "4 POINT (-117.45896 33.28226) \n", + "5 POINT (-117.45894 33.28226) " + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import List, Optional, Union\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from shapely.ops import unary_union\n", + "from coastseg.merge_utils import merge_geometries\n", + "\n", + "\n", + "def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " # Perform a full outer join\n", + " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "\n", + " # Identify numeric columns from both dataframes\n", + " numeric_columns_df1 = df1.select_dtypes(include='number').columns\n", + " numeric_columns_df2 = df2.select_dtypes(include='number').columns\n", + " common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", + "\n", + " # Average the numeric columns\n", + " for column in common_numeric_columns:\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + "\n", + " # Drop the original numeric columns\n", + " merged.drop(columns=[f'{column}_df1' for column in common_numeric_columns] + [f'{column}_df2' for column in common_numeric_columns], inplace=True)\n", + "\n", + " # Merge geometries\n", + " geometry_columns = [col for col in merged.columns if 'geometry' in col]\n", + " merged = merge_geometries(merged, columns=geometry_columns)\n", + "\n", + " return merged\n", + "\n", + "# List of GeoDataFrames\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "\n", + "result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_df1satnamegeometry_df2cloud_covergeoaccuracy
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)L8MULTIPOINT (-117.44480 33.26540)0.0000005.088
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)L8None0.2300005.802
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)0.2639676.596
32020-01-28 05:12:28NoneL8MULTIPOINT (-117.45899 33.28226)0.0000005.802
\n", + "
" + ], + "text/plain": [ + " date geometry_df1 satname \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", + "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) L8 \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", + "3 2020-01-28 05:12:28 None L8 \n", + "\n", + " geometry_df2 cloud_cover geoaccuracy \n", + "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", + "1 None 0.230000 5.802 \n", + "2 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 \n", + "3 MULTIPOINT (-117.45899 33.28226) 0.000000 5.802 " + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# Perform a full outer join\n", + "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "# Identify numeric columns from both dataframes\n", + "numeric_columns_df1 = extracted_gdf1.select_dtypes(include='number').columns\n", + "numeric_columns_df2 = extracted_gdf2.select_dtypes(include='number').columns\n", + "common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", + "# Average the numeric columns\n", + "for column in common_numeric_columns:\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + "# Drop the original numeric columns\n", + "merged.drop(columns=[f'{column}_df1' for column in common_numeric_columns] + [f'{column}_df2' for column in common_numeric_columns], inplace=True)\n", + "# Merge geometries\n", + "geometry_columns = [col for col in merged.columns if 'geometry' in col]\n", + "merged\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometry_df1geometry_df2
satnamedate
L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
2019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
2020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
2020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", + "
" + ], + "text/plain": [ + " geometry_df1 \\\n", + "satname date \n", + "L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", + " 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", + " 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", + " 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", + "\n", + " geometry_df2 \n", + "satname date \n", + "L8 2018-12-30 18:22:25 POINT (-117.44480 33.26540) \n", + " 2019-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", + " 2020-01-28 05:12:28 POINT (-117.45899 33.28226) \n", + " 2020-05-23 19:24:27 POINT (-117.45896 33.28226) " + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if columns is None:\n", + " columns = [col for col in merged.columns if \"geometry\" in col]\n", + "columns\n", + "# # Use groupby and agg to perform the operation more efficiently\n", + "geometries = merged.groupby(['satname', 'date'])[columns].agg(unary_union)\n", + "geometries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if columns is None:\n", + " columns = [col for col in merged.columns if \"geometry\" in col]\n", + "\n", + "# Concatenate the geometry columns into a list\n", + "merged['geometry_list'] = merged[columns].values.tolist()\n", + "\n", + "# Apply unary_union to each list of geometries\n", + "merged['geometry'] = merged['geometry_list'].apply(unary_union)\n", + "\n", + "# Drop the original geometry columns and the 'geometry_list' column\n", + "merged.drop(columns=columns + ['geometry_list'], inplace=True)\n", + "\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "geometry_df1 POINT (-117.45892 33.28226)\n", + "geometry_df2 POINT (-117.4448 33.2654)\n", + "Name: (L8, 2018-12-30 18:22:25), dtype: object" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometries.iloc[0][columns].agg(unary_union)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "combined_geometry = unary_union(geometries[columns].values)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[, ],\n", + " [, ],\n", + " [, ],\n", + " [, ]],\n", + " dtype=object)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometries[columns].values" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MULTIPOINT (-117.45899 33.28226, -117.45896 33.28226, -117.45892 33.28226, -117.45881 33.28239, -117.45875 33.28242, -117.4448 33.2654)\n" + ] + } + ], + "source": [ + "print(combined_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "geometries = geometries.rename(columns={columns[0]: 'geometry'})" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometrygeometry_df2
satnamedate
L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
2019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
2020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
2020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", + "
" + ], + "text/plain": [ + " geometry \\\n", + "satname date \n", + "L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", + " 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", + " 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", + " 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", + "\n", + " geometry_df2 \n", + "satname date \n", + "L8 2018-12-30 18:22:25 POINT (-117.44480 33.26540) \n", + " 2019-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", + " 2020-01-28 05:12:28 POINT (-117.45899 33.28226) \n", + " 2020-05-23 19:24:27 POINT (-117.45896 33.28226) " + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometries" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracy
02018-12-30 18:22:25L80.0000005.088
12019-01-28 05:12:28L80.2300005.802
22020-05-23 19:24:27L80.2639676.596
32020-01-28 05:12:28L80.0000005.802
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088\n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802\n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596\n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "if columns is None:\n", + " columns = [col for col in merged.columns if \"geometry\" in col]\n", + "columns\n", + "# # Use groupby and agg to perform the operation more efficiently\n", + "geometries = merged.groupby(['satname', 'date'])[columns].agg(unary_union)\n", + "geometries\n", + "# # Create a new GeoDataFrame\n", + "# return gpd.GeoDataFrame(merged_gdf.drop(columns=columns).join(geometries))\n", + "\n", + "merged.drop(columns=columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datesatnamecloud_covergeoaccuracygeometry_df1geometry_df2
02018-12-30 18:22:25L80.0000005.088POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
22020-05-23 19:24:27L80.2639676.596POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
32020-01-28 05:12:28L80.0000005.802GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
\n", + "
" + ], + "text/plain": [ + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "\n", + " geometry_df1 geometry_df2 \n", + "0 POINT (-117.45892 33.28226) POINT (-117.44480 33.26540) \n", + "1 POINT (-117.45881 33.28239) GEOMETRYCOLLECTION EMPTY \n", + "2 POINT (-117.45875 33.28242) POINT (-117.45896 33.28226) \n", + "3 GEOMETRYCOLLECTION EMPTY POINT (-117.45899 33.28226) " + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Drop the geometry columns from the merged DataFrame\n", + "merged_no_geo = merged.drop(columns=columns)\n", + "\n", + "# Merge the geometries back into the DataFrame\n", + "result = pd.merge(merged_no_geo, geometries, on=['satname', 'date'], how='left')\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexdate_mergedsatname_mergedcloud_covergeoaccuracysatname_geometriesdate_geometriesgeometry_df1geometry_df2
002018-12-30 18:22:25L80.0000005.088L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
112019-01-28 05:12:28L80.2300005.802L82019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
222020-05-23 19:24:27L80.2639676.596L82020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
332020-01-28 05:12:28L80.0000005.802L82020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", + "
" + ], + "text/plain": [ + " index date_merged satname_merged cloud_cover geoaccuracy \\\n", + "0 0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "2 2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", + "3 3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "\n", + " satname_geometries date_geometries geometry_df1 \\\n", + "0 L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", + "1 L8 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", + "2 L8 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", + "3 L8 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", + "\n", + " geometry_df2 \n", + "0 POINT (-117.44480 33.26540) \n", + "1 GEOMETRYCOLLECTION EMPTY \n", + "2 POINT (-117.45899 33.28226) \n", + "3 POINT (-117.45896 33.28226) " + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged = merged.reset_index() # Reset the index of the merged DataFrame\n", + "geometries = geometries.reset_index() # Reset the index of the geometries DataFrame\n", + "result = gpd.GeoDataFrame(merged.drop(columns=columns).join(geometries, lsuffix='_merged', rsuffix='_geometries'))\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['geometry_df1', 'geometry_df2']" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "geometry_columns" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "merged[\"geometry\"] = merged[geometry_columns].apply(\n", + " lambda row: unary_union(row.tolist()), axis=1\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_df1satnamegeometry_df2cloud_covergeoaccuracygeometry
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)L8MULTIPOINT (-117.44480 33.26540)0.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)L8None0.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)0.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28NoneL8MULTIPOINT (-117.45899 33.28226)0.0000005.802POINT (-117.45899 33.28226)
\n", + "
" + ], + "text/plain": [ + " date geometry_df1 satname \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", + "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) L8 \n", + "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", + "3 2020-01-28 05:12:28 None L8 \n", + "\n", + " geometry_df2 cloud_cover geoaccuracy \\\n", + "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", + "1 None 0.230000 5.802 \n", + "2 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 \n", + "3 MULTIPOINT (-117.45899 33.28226) 0.000000 5.802 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", + "1 POINT (-117.45881 33.28239) \n", + "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", + "3 POINT (-117.45899 33.28226) " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter\n", + "\n", + "# Put all dataframes in a list\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Initialize counters for dates and satellite names\n", + "date_counter = Counter()\n", + "satname_counter = Counter()\n", + "\n", + "# Loop over all dataframes\n", + "for gdf in gdfs:\n", + " # Update the counters with the dates and satellite names from the current dataframe\n", + " date_counter.update(gdf['date'])\n", + " satname_counter.update(gdf['satname'])\n", + "\n", + "# Now date_counter and satname_counter are dictionaries where the keys are the dates and satellite names,\n", + "# and the values are the number of times each date or satellite name appears across all dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter\n", + "\n", + "# Put all dataframes in a list\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Initialize a counter for the combined values\n", + "combined_counter = Counter()\n", + "\n", + "# Loop over all dataframes\n", + "for gdf in gdfs:\n", + " # Combine the 'date' and 'satname' into a single value and update the counter with these values\n", + " combined_counter.update(gdf['date'].astype(str) + '_' + gdf['satname'])\n", + "\n", + "# Now combined_counter is a dictionary where the keys are the combined 'date' and 'satname' values,\n", + "# and the values are the number of times each combined value appears across all dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'2020-05-23 19:24:27_L8': 3,\n", + " '2018-12-30 18:22:25_L8': 2,\n", + " '2019-01-28 05:12:28_L8': 1,\n", + " '2020-01-28 05:12:28_L8': 1,\n", + " '2015-12-30 18:22:25_L9': 1,\n", + " '2019-01-28 05:12:28_L9': 1})" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_counter" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'2020-05-23 19:24:27_L8': 3, '2018-12-30 18:22:25_L8': 2})" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a new Counter that only includes items with a count of 2 or more\n", + "filtered_counter = Counter({k: v for k, v in combined_counter.items() if v >= 2})\n", + "filtered_counter" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Perform an 'inner' merge between each pair of DataFrames\n", + "merge_df1_df2 = extracted_gdf1.merge(extracted_gdf2, on=['date', 'satname'], how='inner')\n", + "merge_df1_df3 = extracted_gdf1.merge(extracted_gdf3, on=['date', 'satname'], how='inner')\n", + "merge_df2_df3 = extracted_gdf2.merge(extracted_gdf3, on=['date', 'satname'], how='inner')\n", + "\n", + "# Concatenate the results\n", + "final_df = pd.concat([merge_df1_df2, merge_df1_df3, merge_df2_df3])\n", + "\n", + "# Drop duplicates\n", + "final_df = final_df.drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dategeometry_xgeoaccuracy_xsatnamecloud_cover_xgeometry_ygeoaccuracy_ycloud_cover_y
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)5.088L80.000000MULTIPOINT (-117.44480 33.26540)5.0880.000000
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.5960.263967
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.5960.263967
02020-05-23 19:24:27MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.5960.263967
\n", + "
" + ], + "text/plain": [ + " date geometry_x geoaccuracy_x \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "\n", + " satname cloud_cover_x geometry_y geoaccuracy_y \\\n", + "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", + "1 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", + "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", + "\n", + " cloud_cover_y \n", + "0 0.000000 \n", + "1 0.263967 \n", + "0 0.263967 \n", + "0 0.263967 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py new file mode 100644 index 00000000..5440201b --- /dev/null +++ b/src/coastseg/merge_utils.py @@ -0,0 +1,644 @@ +from collections import defaultdict +import os +from typing import List, Union + +import geopandas as gpd +import numpy as np +import pandas as pd +from shapely.geometry import LineString, MultiLineString, MultiPoint +from shapely.ops import unary_union + +from coastseg import geodata_processing + +# from coastseg.file_utilities import to_file +# from coastseg.common import get_cross_distance_df +# from coastseg.common import convert_linestrings_to_multipoints, stringify_datetime_columns +# from coastsat import SDS_transects + + +def convert_multipoints_to_linestrings(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Convert MultiPoint geometries in a GeoDataFrame to LineString geometries. + + Args: + - gdf (gpd.GeoDataFrame): The input GeoDataFrame. + + Returns: + - gpd.GeoDataFrame: A new GeoDataFrame with LineString geometries. If the input GeoDataFrame + already contains LineStrings, the original GeoDataFrame is returned. + """ + + # Create a copy of the GeoDataFrame + gdf_copy = gdf.copy() + + # Check if all geometries in the gdf are LineStrings + if all(gdf_copy.geometry.type == "LineString"): + return gdf_copy + + def multipoint_to_linestring(multipoint): + if isinstance(multipoint, MultiPoint): + return LineString(multipoint.geoms) + return multipoint + + # Convert each MultiPoint to a LineString + gdf_copy["geometry"] = gdf_copy["geometry"].apply(multipoint_to_linestring) + + return gdf_copy + + +def dataframe_to_dict(df: pd.DataFrame, key_map: dict) -> dict: + """ + Converts a DataFrame to a dictionary, with specific mapping between dictionary keys and DataFrame columns. + + Parameters: + df : DataFrame + The DataFrame to convert. + key_map : dict + A dictionary where keys are the desired dictionary keys and values are the corresponding DataFrame column names. + + Returns: + dict + The resulting dictionary. + """ + result_dict = defaultdict(list) + + for dict_key, df_key in key_map.items(): + if df_key in df.columns: + if df_key == "date": + # Assumes the column to be converted to date is the one specified in the mapping with key 'date' + result_dict[dict_key] = list( + df[df_key].apply( + lambda x: x.strftime("%Y-%m-%d %H:%M:%S") + if pd.notnull(x) + else None + ) + ) + elif df_key == "geometry": + # Assumes the column to be converted to geometry is the one specified in the mapping with key 'geometry' + result_dict[dict_key] = list( + df[df_key].apply( + lambda x: np.array([list(point.coords[0]) for point in x.geoms]) + if pd.notnull(x) + else None + ) + ) + else: + result_dict[dict_key] = list(df[df_key]) + + return dict(result_dict) + + +from geopandas import GeoDataFrame +from shapely.geometry import LineString, MultiLineString, MultiPoint, Point + + +def convert_lines_to_multipoints(gdf: GeoDataFrame) -> GeoDataFrame: + """ + Convert LineString or MultiLineString geometries in a GeoDataFrame to MultiPoint geometries. + + Parameters + ---------- + gdf : GeoDataFrame + The input GeoDataFrame containing LineString or MultiLineString geometries. + + Returns + ------- + GeoDataFrame + A new GeoDataFrame with MultiPoint geometries. + + """ + # Create a copy of the input GeoDataFrame to avoid modifying it in place + gdf = gdf.copy() + + # Define a function to convert LineString or MultiLineString to MultiPoint + def line_to_multipoint(geometry): + if isinstance(geometry, LineString): + return MultiPoint(geometry.coords) + elif isinstance(geometry, MultiLineString): + points = [MultiPoint(line.coords) for line in geometry.geoms] + return MultiPoint([point for multi in points for point in multi.geoms]) + elif isinstance(geometry, MultiPoint): + return geometry + elif isinstance(geometry, Point): + return MultiPoint([geometry.coords]) + else: + raise TypeError(f"Unsupported geometry type: {type(geometry)}") + + # Apply the conversion function to each row in the GeoDataFrame + gdf["geometry"] = gdf["geometry"].apply(line_to_multipoint) + + return gdf + + +def merge_geodataframes( + on, how="inner", aggregation_funcs=None, crs="epsg:4326", *gdfs +): + """ + Merges multiple GeoDataFrames based on a common column. + + Parameters: + on : str or list of str + Column name or list of column names to merge on. + how : str, optional + Type of merge to be performed (default is 'inner'). + aggregation_funcs : dict, optional + Dictionary of column names to aggregation functions. + Example: for the columns 'cloud_cover' and 'geoaccuracy', the mean aggregation function can be specified as: + aggregation_funcs = { + 'cloud_cover': 'mean', + 'geoaccuracy': 'mean' + } + *gdfs : GeoDataFrames + Variable number of GeoDataFrames to be merged. + + Returns: + GeoDataFrame + The merged GeoDataFrame with aggregated columns as specified. + """ + if len(gdfs) < 2: + raise ValueError("At least two GeoDataFrames must be provided for merging") + + # Set default aggregation functions if none are provided + if aggregation_funcs is None: + aggregation_funcs = {} + + # Perform the merge while applying the custom aggregation functions + merged_gdf = gdfs[0] + merged_gdf.set_crs(crs) + for gdf in gdfs[1:]: + merged_gdf = pd.merge( + merged_gdf, gdf, on=on, how=how, suffixes=("_left", "_right") + ) + + # Apply aggregation functions + for col, func in aggregation_funcs.items(): + col_left = f"{col}_left" + col_right = f"{col}_right" + + # Check if the columns exist in both GeoDataFrames + if col_left in merged_gdf.columns and col_right in merged_gdf.columns: + # Apply the aggregation function and drop the original columns + merged_gdf[col] = merged_gdf[[col_left, col_right]].agg(func, axis=1) + merged_gdf = merged_gdf.drop(columns=[col_left, col_right]) + + return merged_gdf + + +def read_first_geojson_file( + directory: str, + filenames=["extracted_shorelines_lines.geojson", "extracted_shorelines.geojson"], +): + # Loop over the filenames + for filename in filenames: + filepath = os.path.join(directory, filename) + + # If the file exists, read it and return the GeoDataFrame + if os.path.exists(filepath): + return geodata_processing.read_gpd_file(filepath) + + # If none of the files exist, raise an exception + raise FileNotFoundError( + f"None of the files {filenames} exist in the directory {directory}" + ) + + +def clip_gdfs(gdfs, overlap_gdf): + """ + Clips GeoDataFrames to an overlapping region. + + Parameters: + gdfs : list of GeoDataFrames + The GeoDataFrames to be clipped. + overlap_gdf : GeoDataFrame + The overlapping region to which the GeoDataFrames will be clipped. + + Returns: + list of GeoDataFrames + The clipped GeoDataFrames. + """ + clipped_gdfs = [] + for gdf in gdfs: + clipped_gdf = gpd.clip(gdf, overlap_gdf) + if not clipped_gdf.empty: + clipped_gdfs.append(clipped_gdf) + clipped_gdf.plot() + return clipped_gdfs + + +def calculate_overlap(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Calculates the intersection of all pairs of polygons in a GeoDataFrame. + + Parameters: + ----------- + gdf : GeoDataFrame + A GeoDataFrame containing polygons. + + Returns: + -------- + overlap_gdf : GeoDataFrame + A GeoDataFrame containing the intersection of all pairs of polygons in gdf. + """ + # Check if the input GeoDataFrame is empty + if not hasattr(gdf, "empty"): + return gpd.GeoDataFrame() + if gdf.empty: + # Return an empty GeoDataFrame with the same CRS if it exists + return gpd.GeoDataFrame( + geometry=[], crs=gdf.crs if hasattr(gdf, "crs") else None + ) + + # Initialize a list to store the intersections + intersections = [] + + # Loop over each pair of rows in gdf + for i in range(len(gdf) - 1): + for j in range(i + 1, len(gdf)): + # Check for intersection + if gdf.iloc[i].geometry.intersects(gdf.iloc[j].geometry): + # Calculate the intersection + intersection = gdf.iloc[i].geometry.intersection(gdf.iloc[j].geometry) + # Append the intersection to the intersections list + intersections.append(intersection) + + # Create a GeoSeries from the intersections + intersection_series = gpd.GeoSeries(intersections, crs=gdf.crs) + + # Create a GeoDataFrame from the GeoSeries + overlap_gdf = gpd.GeoDataFrame(geometry=intersection_series) + + # # Loop over each pair of rows in gdf + # for i in range(len(gdf)): + # for j in range(i+1, len(gdf)): + # # Check for intersection + # if gdf.iloc[i].geometry.intersects(gdf.iloc[j].geometry): + # # Calculate the intersection + # intersection = gdf.iloc[i].geometry.intersection(gdf.iloc[j].geometry) + + # # Create a new row with the intersection and append to the result list + # overlap_list.append({'geometry': intersection}) + + # # Create a DataFrame from the results list + # overlap_df = pd.DataFrame(overlap_list) + + # # Convert the result DataFrame to a GeoDataFrame and set the CRS + # overlap_gdf = gpd.GeoDataFrame(overlap_df, geometry='geometry', crs=gdf.crs) + + return overlap_gdf + + +def average_multipoints(multipoints) -> MultiPoint: + """ + Calculate the average MultiPoint geometry from a list of MultiPoint geometries. + + This function takes a list of shapely MultiPoint geometries, ensures they all have the same number of points + by padding shorter MultiPoints with their last point, and then calculates the average coordinates + for each point position across all the input MultiPoint geometries. + + The result is a new MultiPoint geometry that represents the average shape of the input MultiPoints. + + Parameters: + multipoints (list of shapely.geometry.MultiPoint): A list of shapely MultiPoint geometries to be averaged. + + Returns: + shapely.geometry.MultiPoint: A MultiPoint geometry representing the average shape of the input MultiPoints. + + Raises: + ValueError: If the input list of MultiPoint geometries is empty. + + Example: + >>> from shapely.geometry import MultiPoint + >>> multipoint1 = MultiPoint([(0, 0), (1, 1), (2, 2)]) + >>> multipoint2 = MultiPoint([(1, 1), (2, 2)]) + >>> multipoint3 = MultiPoint([(0, 0), (1, 1), (2, 2), (3, 3)]) + >>> average_mp = average_multipoints([multipoint1, multipoint2, multipoint3]) + >>> print(average_mp) + MULTIPOINT (0.3333333333333333 0.3333333333333333, 1.3333333333333333 1.3333333333333333, 2 2, 3 3) + """ + if not multipoints: + raise ValueError("The list of MultiPoint geometries is empty") + + # Find the maximum number of points in any MultiPoint + max_len = max(len(mp.geoms) for mp in multipoints) + + # Pad shorter MultiPoints with their last point + padded_multipoints = [] + for mp in multipoints: + if len(mp.geoms) < max_len: + padded_multipoints.append( + MultiPoint(list(mp.geoms) + [mp.geoms[-1]] * (max_len - len(mp.geoms))) + ) + else: + padded_multipoints.append(mp) + + # Calculate the average coordinates for each point + num_multipoints = len(padded_multipoints) + average_coords = [] + for i in range(max_len): + avg_left = sum(mp.geoms[i].x for mp in padded_multipoints) / num_multipoints + avg_right = sum(mp.geoms[i].y for mp in padded_multipoints) / num_multipoints + average_coords.append((avg_left, avg_right)) + + return MultiPoint(average_coords) + + +def average_columns(df, col1, col2, new_col): + df[new_col] = df[[col1, col2]].mean(axis=1, skipna=True) + return df + + +def combine_dataframes(df1, df2, join_columns): + # Perform an outer join and mark the origin of each row + all_rows = pd.merge(df1, df2, on=join_columns, how="outer", indicator=True) + + # Keep only the rows that are in 'df1' but not in 'df2' + df1_unique = all_rows[all_rows["_merge"] == "left_only"] + if "cloud_cover_x" in df1_unique.columns and "cloud_cover_y" in df1_unique.columns: + df1_unique = average_columns( + df1_unique, "cloud_cover_x", "cloud_cover_y", "cloud_cover" + ) + df1_unique.drop(columns=["cloud_cover_x", "cloud_cover_y"], inplace=True) + if "geoaccuracy_x" in df1_unique.columns and "geoaccuracy_y" in df1_unique.columns: + df1_unique = average_columns( + df1_unique, "geoaccuracy_x", "geoaccuracy_y", "geoaccuracy" + ) + df1_unique.drop(columns=["geoaccuracy_x", "geoaccuracy_y"], inplace=True) + df1_unique.drop(columns=["_merge"], inplace=True) + + # Concatenate 'df2' and the unique rows from 'df1' + result = pd.concat([df2, df1_unique], ignore_index=True) + + def assign_geometry(row): + if pd.isnull(row["geometry"]): + if pd.notnull(row["geometry_x"]): + return row["geometry_x"] + elif pd.notnull(row["geometry_y"]): + return row["geometry_y"] + else: + return row["geometry"] + + if "geometry_x" in result.columns and "geometry_y" in result.columns: + result["geometry"] = result.apply(assign_geometry, axis=1) + result.drop(columns=["geometry_x", "geometry_y"], inplace=True) + return result + + +def combine_geodataframes(gdf1, gdf2, join_columns, average_columns=None): + """ + Combines two GeoDataFrames, performing an outer join and averaging specified numerical columns. + + Parameters: + gdf1, gdf2 : GeoDataFrame + The GeoDataFrames to combine. + join_columns : list of str + The columns to join on. + average_columns : list of str, optional + The columns to average. If None, all numerical columns with the same name in both GeoDataFrames will be averaged. + + Returns: + GeoDataFrame + The combined GeoDataFrame. + """ + # Ensure that the 'geometry' column is present in both GeoDataFrames + if "geometry" not in gdf1.columns or "geometry" not in gdf2.columns: + raise ValueError("Both GeoDataFrames must have a 'geometry' column.") + + # Combine GeoDataFrames using an outer join + combined_gdf = pd.merge( + gdf1, gdf2, on=join_columns, how="outer", suffixes=("_gdf1", "_gdf2") + ) + + if average_columns is None: + # List of numerical columns to be averaged + average_columns = [ + col + for col in gdf1.columns + if col in gdf2.columns + and col not in join_columns + ["geometry"] + and np.issubdtype(gdf1[col].dtype, np.number) + and np.issubdtype(gdf2[col].dtype, np.number) + ] + + # Average specified numerical columns + for col in average_columns: + if ( + f"{col}_gdf1" in combined_gdf.columns + and f"{col}_gdf2" in combined_gdf.columns + ): + combined_gdf[col] = combined_gdf[[f"{col}_gdf1", f"{col}_gdf2"]].mean( + axis=1 + ) + combined_gdf.drop(columns=[f"{col}_gdf1", f"{col}_gdf2"], inplace=True) + + # Resolve geometry conflicts by prioritizing non-null values + combined_gdf["geometry"] = combined_gdf["geometry_gdf1"].combine_first( + combined_gdf["geometry_gdf2"] + ) + combined_gdf.drop(columns=["geometry_gdf1", "geometry_gdf2"], inplace=True) + + return gpd.GeoDataFrame(combined_gdf, geometry="geometry") + + +def mergeRightUnique( + left_df: gpd.GeoDataFrame, + right_df: gpd.GeoDataFrame, + join_columns: Union[str, List[str]] = ["date", "satname"], + CRS: str = "EPSG:4326", +) -> pd.DataFrame: + """ + Merges two GeoDataFrames, keeping only the unique rows from the right GeoDataFrame based on the specified join columns. + + Parameters: + left_df : GeoDataFrame + The left GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set. + right_df : GeoDataFrame + The right GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set. + join_columns : str or list of str, default ['date', 'satname'] + The columns to join on. These columns are set as the index for both GeoDataFrames. If a string is passed, it is converted to a list. + CRS : str, default 'EPSG:4326' + The Coordinate Reference System to set for the GeoDataFrames if not already set. + + Returns: + GeoDataFrame + The merged GeoDataFrame, containing all rows from the left GeoDataFrame and only the unique rows from the right GeoDataFrame based on the join columns. + """ + if not left_df.crs: + left_df.set_crs(CRS, inplace=True) + if not right_df.crs: + right_df.set_crs(CRS, inplace=True) + + if isinstance(join_columns, str): + join_columns = [join_columns] + # Ensure that join are set as the index for both DataFrames + left_df.set_index(join_columns, inplace=True) + right_df.set_index(join_columns, inplace=True) + + # Find the difference in the MultiIndex between right_df and merged_gdf + unique_indices = right_df.index.difference(merged_gdf.index) + + # Select only those rows from right_df that have unique indices + unique_to_right_df = right_df.loc[unique_indices] + if unique_to_right_df.crs: + unique_to_right_df.crs = right_df.crs + + # Now concatenate the merged_gdf with the unique_to_right_df + combined_gdf = pd.concat( + [merged_gdf.reset_index(), unique_to_right_df.reset_index()], ignore_index=True + ) + return combined_gdf + + +def merge_geometries(merged_gdf, columns=None, operation=unary_union): + """ + Performs a specified operation for the geometries with the same date and satname. + + Parameters: + merged_gdf : GeoDataFrame + The GeoDataFrame to perform the operation on. + columns : list of str, optional + The columns to perform the operation on. If None, all columns with 'geometry' in the name are used. + operation : function, optional + The operation to perform. If None, unary_union is used. + + Returns: + GeoDataFrame + The GeoDataFrame with the operation performed. + """ + if columns is None: + columns = [col for col in merged_gdf.columns if "geometry" in col] + else: + columns = [col for col in columns if col in merged_gdf.columns] + + merged_gdf["geometry"] = merged_gdf[columns].apply( + lambda row: operation(row.tolist()), axis=1 + ) + for col in columns: + if col in merged_gdf.columns: + merged_gdf = merged_gdf.drop(columns=col) + return merged_gdf + + +def merge_geojson_files( + *file_paths: str, +) -> gpd.GeoDataFrame: + """ + Merges any number of GeoJSON files into a single GeoDataFrame, removing any duplicate rows. + + Parameters: + - *file_paths (str): Paths to the GeoJSON files. + + Returns: + - GeoDataFrame: A GeoDataFrame containing the merged data from all input files, with duplicates removed. + """ + merged_gdf = gpd.GeoDataFrame() + for filepath in file_paths: + gdf = geodata_processing.read_gpd_file(filepath) + # Merging the two dataframes + merged_gdf = gpd.GeoDataFrame(pd.concat([merged_gdf, gdf], ignore_index=True)) + + # Dropping any duplicated rows based on all columns + merged_gdf_cleaned = merged_gdf.drop_duplicates() + return merged_gdf_cleaned + + +def create_csv_per_transect( + save_path: str, + cross_distance_transects: dict, + extracted_shorelines_dict: dict, + roi_id: str = None, # ROI ID is now optional and defaults to None + filename_suffix: str = "_timeseries_raw.csv", +): + for key, distances in cross_distance_transects.items(): + # Initialize the dictionary for DataFrame with mandatory keys + data_dict = { + "dates": extracted_shorelines_dict["dates"], + "satname": extracted_shorelines_dict["satname"], + key: distances, + } + + # Add roi_id to the dictionary if provided + if roi_id is not None: + data_dict["roi_id"] = [roi_id] * len(extracted_shorelines_dict["dates"]) + + # Create a DataFrame directly with the data dictionary + df = pd.DataFrame(data_dict).set_index("dates") + + # Construct the full file path + csv_filename = f"{key}{filename_suffix}" + fn = os.path.join(save_path, csv_filename) + + # Save to CSV file, 'mode' set to 'w' for overwriting + try: + df.to_csv(fn, sep=",", mode="w") + print(f"Time-series for transect {key} saved to {fn}") + except Exception as e: + print(f"Failed to save time-series for transect {key}: {e}") + + +# better way of mergine multiple gdfs together +# from shapely.ops import unary_union +# from coastseg.merge_utils import merge_geometries +# from functools import reduce +# import pandas as pd + + +# def merge_geometries(merged_gdf, columns=None, operation=unary_union): +# """ +# Performs a specified operation for the geometries with the same date and satname. + +# Parameters: +# merged_gdf : GeoDataFrame +# The GeoDataFrame to perform the operation on. +# columns : list of str, optional +# The columns to perform the operation on. If None, all columns with 'geometry' in the name are used. +# operation : function, optional +# The operation to perform. If None, unary_union is used. + +# Returns: +# GeoDataFrame +# The GeoDataFrame with the operation performed. +# """ +# if columns is None: +# columns = [col for col in merged_gdf.columns if "geometry" in col] +# else: +# columns = [col for col in columns if col in merged_gdf.columns] + +# merged_gdf["geometry"] = merged_gdf[columns].apply( +# lambda row: operation(row.tolist()), axis=1 +# ) +# for col in columns: +# if col in merged_gdf.columns: +# merged_gdf = merged_gdf.drop(columns=col) +# return merged_gdf + +# def merge_and_average(df1, df2): +# # Perform a full outer join +# merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2')) + +# # Loop over all columns +# for column in set(df1.columns).intersection(df2.columns): +# # Merge the geometries + +# if isinstance(df1[column].dtype, gpd.array.GeometryDtype): +# print(f"merging {{['{column}_df1', '{column}_df2']}}") +# print(df1[column]) +# print(df2[column]) +# # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union) +# merged = merge_geometries(merged) +# continue +# # Skip non-numeric columns +# if not pd.api.types.is_numeric_dtype(df1[column]): +# continue +# # Average the values in the two columns +# merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1) +# merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True) + +# return merged + +# # List of GeoDataFrames +# gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3] + +# # Perform a full outer join and average the numeric columns across all GeoDataFrames +# result = reduce(merge_and_average, gdfs) + +# result diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py new file mode 100644 index 00000000..e69de29b From 89950889ac0d59ec75773398dc62a365b61c8e32 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 17 Nov 2023 18:05:43 -0800 Subject: [PATCH 28/87] add more tests and update script wip --- ..._session_final_nonoverlapping_script.ipynb | 4398 ++++------------- tests/test_merge_utils.py | 470 ++ 2 files changed, 1303 insertions(+), 3565 deletions(-) diff --git a/merge_session_final_nonoverlapping_script.ipynb b/merge_session_final_nonoverlapping_script.ipynb index 486d81d6..1197e343 100644 --- a/merge_session_final_nonoverlapping_script.ipynb +++ b/merge_session_final_nonoverlapping_script.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -98,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -590,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -740,7 +740,7 @@ "[717 rows x 4 columns]" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -768,7 +768,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -823,7 +823,7 @@ "1 13 roi NaN POLYGON ((-117.46847 33.26526, -117.46869 33.3..." ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -870,8 +870,8 @@ " \n", " \n", " \n", - " satname\n", " date\n", + " satname\n", " cloud_cover\n", " geoaccuracy\n", " geometry\n", @@ -880,424 +880,424 @@ " \n", " \n", " 0\n", - " L8\n", " 2018-12-30 18:22:25\n", + " L8\n", " 0.000000\n", " 5.088\n", " MULTIPOINT (-117.46831 33.29341, -117.46822 33...\n", " \n", " \n", - " 43\n", - " L8\n", + " 1\n", " 2019-02-16 18:22:17\n", + " L8\n", " 0.000000\n", " 5.802\n", - " MULTIPOINT (-117.45881 33.28239, -117.45891 33...\n", + " MULTIPOINT (-117.46822 33.29345, -117.46815 33...\n", " \n", " \n", - " 44\n", - " L8\n", + " 2\n", " 2019-03-20 18:22:08\n", + " L8\n", " 0.000000\n", " 6.596\n", - " MULTIPOINT (-117.45875 33.28242, -117.45889 33...\n", + " MULTIPOINT (-117.46815 33.29345, -117.46805 33...\n", " \n", " \n", - " 45\n", - " L8\n", + " 3\n", " 2019-06-08 18:22:20\n", + " L8\n", " 0.263967\n", " 4.826\n", - " MULTIPOINT (-117.44480 33.26540, -117.44481 33...\n", + " MULTIPOINT (-117.44858 33.26998, -117.44854 33...\n", " \n", " \n", - " 1\n", - " L8\n", + " 4\n", " 2019-07-10 18:22:29\n", + " L8\n", " 0.003838\n", " 4.275\n", " MULTIPOINT (-117.46834 33.29332, -117.46831 33...\n", " \n", " \n", - " 2\n", - " L8\n", + " 5\n", " 2019-07-26 18:22:33\n", + " L8\n", " 0.007632\n", " 4.286\n", " MULTIPOINT (-117.46209 33.28590, -117.46199 33...\n", " \n", " \n", - " 3\n", - " L8\n", + " 6\n", " 2019-08-11 18:22:40\n", + " L8\n", " 0.000000\n", " 4.080\n", " MULTIPOINT (-117.46831 33.29335, -117.46828 33...\n", " \n", " \n", - " 4\n", - " L8\n", + " 7\n", " 2019-08-27 18:22:44\n", + " L8\n", " 0.000000\n", " 4.208\n", " MULTIPOINT (-117.46832 33.29332, -117.46831 33...\n", " \n", " \n", - " 5\n", - " L8\n", + " 8\n", " 2019-09-12 18:22:48\n", + " L8\n", " 0.000000\n", " 4.128\n", " MULTIPOINT (-117.46829 33.29345, -117.46817 33...\n", " \n", " \n", - " 6\n", - " L8\n", + " 9\n", " 2019-10-14 18:22:56\n", + " L8\n", " 0.007924\n", " 4.002\n", " MULTIPOINT (-117.46154 33.28536, -117.46150 33...\n", " \n", " \n", - " 7\n", - " L8\n", + " 10\n", " 2019-10-30 18:22:56\n", + " L8\n", " 0.014848\n", " 4.851\n", " MULTIPOINT (-117.46817 33.29345, -117.46815 33...\n", " \n", " \n", - " 8\n", - " L8\n", + " 11\n", " 2019-11-15 18:22:53\n", + " L8\n", " 0.000000\n", " 4.661\n", " MULTIPOINT (-117.46824 33.29345, -117.46815 33...\n", " \n", " \n", - " 9\n", - " L8\n", + " 12\n", " 2019-12-17 18:22:50\n", + " L8\n", " 0.003546\n", " 5.080\n", " MULTIPOINT (-117.46827 33.29345, -117.46815 33...\n", " \n", " \n", - " 10\n", - " L8\n", + " 13\n", " 2020-01-02 18:22:45\n", + " L8\n", " 0.000000\n", " 5.318\n", " MULTIPOINT (-117.46829 33.29345, -117.46818 33...\n", " \n", " \n", - " 11\n", - " L8\n", + " 14\n", " 2020-01-18 18:22:42\n", + " L8\n", " 0.000000\n", " 4.996\n", " MULTIPOINT (-117.46842 33.29331, -117.46833 33...\n", " \n", " \n", - " 46\n", - " L8\n", + " 15\n", " 2020-03-22 18:22:20\n", + " L8\n", " 0.000000\n", " 5.147\n", - " MULTIPOINT (-117.45891 33.28236, -117.45892 33...\n", + " MULTIPOINT (-117.46831 33.29345, -117.46820 33...\n", " \n", " \n", - " 47\n", - " L8\n", + " 16\n", " 2020-04-23 18:22:05\n", + " L8\n", " 0.014904\n", " 4.790\n", - " MULTIPOINT (-117.43233 33.24960, -117.43234 33...\n", + " MULTIPOINT (-117.43574 33.25393, -117.43573 33...\n", " \n", " \n", - " 12\n", - " L8\n", + " 17\n", " 2020-06-10 18:22:09\n", + " L8\n", " 0.000000\n", " 4.624\n", " MULTIPOINT (-117.46831 33.29337, -117.46826 33...\n", " \n", " \n", - " 13\n", - " L8\n", + " 18\n", " 2020-07-12 18:22:24\n", + " L8\n", " 0.000000\n", " 4.785\n", " MULTIPOINT (-117.46833 33.29332, -117.46831 33...\n", " \n", " \n", - " 14\n", - " L8\n", + " 19\n", " 2020-08-29 18:22:41\n", + " L8\n", " 0.003502\n", " 4.401\n", " MULTIPOINT (-117.46829 33.29345, -117.46818 33...\n", " \n", " \n", - " 15\n", - " L8\n", + " 20\n", " 2020-09-30 18:22:51\n", + " L8\n", " 0.000000\n", " 4.235\n", " MULTIPOINT (-117.46822 33.29345, -117.46815 33...\n", " \n", " \n", - " 16\n", - " L8\n", + " 21\n", " 2020-12-03 18:22:54\n", + " L8\n", " 0.000000\n", " 4.724\n", " MULTIPOINT (-117.46824 33.29345, -117.46815 33...\n", " \n", " \n", - " 17\n", - " L8\n", + " 22\n", " 2020-12-19 18:22:53\n", + " L8\n", " 0.000000\n", " 4.938\n", " MULTIPOINT (-117.46822 33.29345, -117.46815 33...\n", " \n", " \n", - " 18\n", - " L8\n", + " 23\n", " 2021-01-04 18:22:48\n", + " L8\n", " 0.000000\n", " 4.964\n", " MULTIPOINT (-117.46824 33.29345, -117.46815 33...\n", " \n", " \n", - " 19\n", - " L8\n", + " 24\n", " 2021-01-20 18:22:41\n", + " L8\n", " 0.000000\n", " 5.149\n", " MULTIPOINT (-117.46845 33.29331, -117.46836 33...\n", " \n", " \n", - " 20\n", - " L8\n", + " 25\n", " 2021-02-05 18:22:40\n", + " L8\n", " 0.000000\n", " 4.352\n", " MULTIPOINT (-117.46831 33.29341, -117.46823 33...\n", " \n", " \n", - " 21\n", - " L8\n", + " 26\n", " 2021-02-21 18:22:35\n", + " L8\n", " 0.000000\n", " 4.232\n", " MULTIPOINT (-117.46831 33.29343, -117.46820 33...\n", " \n", " \n", - " 48\n", - " L8\n", + " 27\n", " 2021-03-09 18:22:27\n", + " L8\n", " 0.060021\n", " 5.644\n", - " MULTIPOINT (-117.45881 33.28239, -117.45890 33...\n", + " MULTIPOINT (-117.46572 33.29051, -117.46569 33...\n", " \n", " \n", - " 22\n", - " L8\n", + " 28\n", " 2021-06-13 18:22:22\n", + " L8\n", " 0.010056\n", " 4.434\n", " MULTIPOINT (-117.45511 33.27808, -117.45503 33...\n", " \n", " \n", - " 23\n", - " L8\n", + " 29\n", " 2021-10-19 18:22:59\n", + " L8\n", " 0.000000\n", " 4.356\n", " MULTIPOINT (-117.46817 33.29345, -117.46815 33...\n", " \n", " \n", - " 24\n", - " L9\n", + " 30\n", " 2021-11-23 18:25:01\n", + " L9\n", " 0.001055\n", " 4.860\n", " MULTIPOINT (-117.46823 33.29345, -117.46815 33...\n", " \n", " \n", - " 25\n", - " L8\n", + " 31\n", " 2021-12-22 18:22:51\n", + " L8\n", " 0.106271\n", " 5.356\n", " MULTIPOINT (-117.45486 33.27788, -117.45479 33...\n", " \n", " \n", - " 26\n", - " L8\n", + " 32\n", " 2022-01-23 18:22:43\n", + " L8\n", " 0.000000\n", " 5.018\n", " MULTIPOINT (-117.46831 33.29334, -117.46829 33...\n", " \n", " \n", - " 27\n", - " L9\n", + " 33\n", " 2022-02-16 18:22:44\n", + " L9\n", " 0.000000\n", " 4.984\n", " MULTIPOINT (-117.46811 33.29345, -117.46803 33...\n", " \n", " \n", - " 28\n", - " L8\n", + " 34\n", " 2022-02-24 18:22:34\n", + " L8\n", " 0.000000\n", " 4.809\n", " MULTIPOINT (-117.46831 33.29332, -117.46831 33...\n", " \n", " \n", - " 50\n", - " L9\n", + " 35\n", " 2022-03-04 18:22:34\n", + " L9\n", " 0.348253\n", " 6.277\n", - " MULTIPOINT (-117.46464 33.28832, -117.46458 33...\n", + " MULTIPOINT (-117.46608 33.29048, -117.46604 33...\n", " \n", " \n", - " 29\n", - " L8\n", + " 36\n", " 2022-03-12 18:22:30\n", + " L8\n", " 0.000000\n", " 4.449\n", " MULTIPOINT (-117.46841 33.29332, -117.46831 33...\n", " \n", " \n", - " 30\n", - " L8\n", + " 37\n", " 2022-04-13 18:22:23\n", + " L8\n", " 0.000000\n", " 4.154\n", " MULTIPOINT (-117.46829 33.29345, -117.46817 33...\n", " \n", " \n", - " 31\n", - " L9\n", + " 38\n", " 2022-04-21 18:22:18\n", + " L9\n", " 0.035151\n", " 3.886\n", " MULTIPOINT (-117.44798 33.26931, -117.44789 33...\n", " \n", " \n", - " 32\n", - " L9\n", + " 39\n", " 2022-06-24 18:22:26\n", + " L9\n", " 0.000000\n", " 4.793\n", " MULTIPOINT (-117.46831 33.29341, -117.46824 33...\n", " \n", " \n", - " 33\n", - " L8\n", + " 40\n", " 2022-07-02 18:22:47\n", + " L8\n", " 0.000000\n", " 4.817\n", " MULTIPOINT (-117.46823 33.29345, -117.46815 33...\n", " \n", " \n", - " 34\n", - " L9\n", + " 41\n", " 2022-08-11 18:22:43\n", + " L9\n", " 0.000000\n", " 4.334\n", " MULTIPOINT (-117.46812 33.29345, -117.46804 33...\n", " \n", " \n", - " 35\n", - " L8\n", + " 42\n", " 2022-08-19 18:23:05\n", + " L8\n", " 0.019461\n", " 4.262\n", " MULTIPOINT (-117.45778 33.28118, -117.45777 33...\n", " \n", " \n", - " 36\n", - " L9\n", + " 43\n", " 2022-08-27 18:22:47\n", + " L9\n", " 0.003008\n", " 4.178\n", " MULTIPOINT (-117.46284 33.28711, -117.46280 33...\n", " \n", " \n", - " 37\n", - " L8\n", + " 44\n", " 2022-09-20 18:23:12\n", + " L8\n", " 0.004018\n", " 3.943\n", " MULTIPOINT (-117.46106 33.28482, -117.46102 33...\n", " \n", " \n", - " 49\n", - " L8\n", + " 45\n", " 2022-10-22 18:23:08\n", + " L8\n", " 0.248255\n", " 5.138\n", - " MULTIPOINT (-117.45891 33.28233, -117.45894 33...\n", + " MULTIPOINT (-117.46821 33.29345, -117.46815 33...\n", " \n", " \n", - " 51\n", - " L9\n", + " 46\n", " 2022-10-30 18:22:59\n", + " L9\n", " 0.020157\n", " 4.587\n", - " MULTIPOINT (-117.44076 33.26042, -117.44082 33...\n", + " MULTIPOINT (-117.44473 33.26540, -117.44467 33...\n", " \n", " \n", - " 38\n", - " L9\n", + " 47\n", " 2022-11-15 18:23:01\n", + " L9\n", " 0.000000\n", " 4.828\n", " MULTIPOINT (-117.46825 33.29345, -117.46815 33...\n", " \n", " \n", - " 39\n", - " L8\n", + " 48\n", " 2022-11-23 18:23:05\n", + " L8\n", " 0.000000\n", " 4.894\n", " MULTIPOINT (-117.46810 33.29345, -117.46802 33...\n", " \n", " \n", - " 40\n", - " L8\n", + " 49\n", " 2022-12-09 18:23:05\n", + " L8\n", " 0.000000\n", " 5.371\n", " MULTIPOINT (-117.46810 33.29345, -117.46805 33...\n", " \n", " \n", - " 52\n", - " L9\n", + " 50\n", " 2023-01-02 18:23:00\n", + " L9\n", " 0.059842\n", " 5.832\n", - " MULTIPOINT (-117.45891 33.28230, -117.45897 33...\n", + " MULTIPOINT (-117.46823 33.29345, -117.46815 33...\n", " \n", " \n", - " 41\n", - " L9\n", + " 51\n", " 2023-01-18 18:22:52\n", + " L9\n", " 0.149705\n", " 5.798\n", " MULTIPOINT (-117.46223 33.28630, -117.46215 33...\n", " \n", " \n", - " 42\n", - " L9\n", + " 52\n", " 2023-02-19 18:22:55\n", + " L9\n", " 0.000000\n", " 5.287\n", " MULTIPOINT (-117.46813 33.29345, -117.46805 33...\n", @@ -1307,115 +1307,115 @@ "" ], "text/plain": [ - " satname date cloud_cover geoaccuracy \\\n", - "0 L8 2018-12-30 18:22:25 0.000000 5.088 \n", - "43 L8 2019-02-16 18:22:17 0.000000 5.802 \n", - "44 L8 2019-03-20 18:22:08 0.000000 6.596 \n", - "45 L8 2019-06-08 18:22:20 0.263967 4.826 \n", - "1 L8 2019-07-10 18:22:29 0.003838 4.275 \n", - "2 L8 2019-07-26 18:22:33 0.007632 4.286 \n", - "3 L8 2019-08-11 18:22:40 0.000000 4.080 \n", - "4 L8 2019-08-27 18:22:44 0.000000 4.208 \n", - "5 L8 2019-09-12 18:22:48 0.000000 4.128 \n", - "6 L8 2019-10-14 18:22:56 0.007924 4.002 \n", - "7 L8 2019-10-30 18:22:56 0.014848 4.851 \n", - "8 L8 2019-11-15 18:22:53 0.000000 4.661 \n", - "9 L8 2019-12-17 18:22:50 0.003546 5.080 \n", - "10 L8 2020-01-02 18:22:45 0.000000 5.318 \n", - "11 L8 2020-01-18 18:22:42 0.000000 4.996 \n", - "46 L8 2020-03-22 18:22:20 0.000000 5.147 \n", - "47 L8 2020-04-23 18:22:05 0.014904 4.790 \n", - "12 L8 2020-06-10 18:22:09 0.000000 4.624 \n", - "13 L8 2020-07-12 18:22:24 0.000000 4.785 \n", - "14 L8 2020-08-29 18:22:41 0.003502 4.401 \n", - "15 L8 2020-09-30 18:22:51 0.000000 4.235 \n", - "16 L8 2020-12-03 18:22:54 0.000000 4.724 \n", - "17 L8 2020-12-19 18:22:53 0.000000 4.938 \n", - "18 L8 2021-01-04 18:22:48 0.000000 4.964 \n", - "19 L8 2021-01-20 18:22:41 0.000000 5.149 \n", - "20 L8 2021-02-05 18:22:40 0.000000 4.352 \n", - "21 L8 2021-02-21 18:22:35 0.000000 4.232 \n", - "48 L8 2021-03-09 18:22:27 0.060021 5.644 \n", - "22 L8 2021-06-13 18:22:22 0.010056 4.434 \n", - "23 L8 2021-10-19 18:22:59 0.000000 4.356 \n", - "24 L9 2021-11-23 18:25:01 0.001055 4.860 \n", - "25 L8 2021-12-22 18:22:51 0.106271 5.356 \n", - "26 L8 2022-01-23 18:22:43 0.000000 5.018 \n", - "27 L9 2022-02-16 18:22:44 0.000000 4.984 \n", - "28 L8 2022-02-24 18:22:34 0.000000 4.809 \n", - "50 L9 2022-03-04 18:22:34 0.348253 6.277 \n", - "29 L8 2022-03-12 18:22:30 0.000000 4.449 \n", - "30 L8 2022-04-13 18:22:23 0.000000 4.154 \n", - "31 L9 2022-04-21 18:22:18 0.035151 3.886 \n", - "32 L9 2022-06-24 18:22:26 0.000000 4.793 \n", - "33 L8 2022-07-02 18:22:47 0.000000 4.817 \n", - "34 L9 2022-08-11 18:22:43 0.000000 4.334 \n", - "35 L8 2022-08-19 18:23:05 0.019461 4.262 \n", - "36 L9 2022-08-27 18:22:47 0.003008 4.178 \n", - "37 L8 2022-09-20 18:23:12 0.004018 3.943 \n", - "49 L8 2022-10-22 18:23:08 0.248255 5.138 \n", - "51 L9 2022-10-30 18:22:59 0.020157 4.587 \n", - "38 L9 2022-11-15 18:23:01 0.000000 4.828 \n", - "39 L8 2022-11-23 18:23:05 0.000000 4.894 \n", - "40 L8 2022-12-09 18:23:05 0.000000 5.371 \n", - "52 L9 2023-01-02 18:23:00 0.059842 5.832 \n", - "41 L9 2023-01-18 18:22:52 0.149705 5.798 \n", - "42 L9 2023-02-19 18:22:55 0.000000 5.287 \n", + " date satname cloud_cover geoaccuracy \\\n", + "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "1 2019-02-16 18:22:17 L8 0.000000 5.802 \n", + "2 2019-03-20 18:22:08 L8 0.000000 6.596 \n", + "3 2019-06-08 18:22:20 L8 0.263967 4.826 \n", + "4 2019-07-10 18:22:29 L8 0.003838 4.275 \n", + "5 2019-07-26 18:22:33 L8 0.007632 4.286 \n", + "6 2019-08-11 18:22:40 L8 0.000000 4.080 \n", + "7 2019-08-27 18:22:44 L8 0.000000 4.208 \n", + "8 2019-09-12 18:22:48 L8 0.000000 4.128 \n", + "9 2019-10-14 18:22:56 L8 0.007924 4.002 \n", + "10 2019-10-30 18:22:56 L8 0.014848 4.851 \n", + "11 2019-11-15 18:22:53 L8 0.000000 4.661 \n", + "12 2019-12-17 18:22:50 L8 0.003546 5.080 \n", + "13 2020-01-02 18:22:45 L8 0.000000 5.318 \n", + "14 2020-01-18 18:22:42 L8 0.000000 4.996 \n", + "15 2020-03-22 18:22:20 L8 0.000000 5.147 \n", + "16 2020-04-23 18:22:05 L8 0.014904 4.790 \n", + "17 2020-06-10 18:22:09 L8 0.000000 4.624 \n", + "18 2020-07-12 18:22:24 L8 0.000000 4.785 \n", + "19 2020-08-29 18:22:41 L8 0.003502 4.401 \n", + "20 2020-09-30 18:22:51 L8 0.000000 4.235 \n", + "21 2020-12-03 18:22:54 L8 0.000000 4.724 \n", + "22 2020-12-19 18:22:53 L8 0.000000 4.938 \n", + "23 2021-01-04 18:22:48 L8 0.000000 4.964 \n", + "24 2021-01-20 18:22:41 L8 0.000000 5.149 \n", + "25 2021-02-05 18:22:40 L8 0.000000 4.352 \n", + "26 2021-02-21 18:22:35 L8 0.000000 4.232 \n", + "27 2021-03-09 18:22:27 L8 0.060021 5.644 \n", + "28 2021-06-13 18:22:22 L8 0.010056 4.434 \n", + "29 2021-10-19 18:22:59 L8 0.000000 4.356 \n", + "30 2021-11-23 18:25:01 L9 0.001055 4.860 \n", + "31 2021-12-22 18:22:51 L8 0.106271 5.356 \n", + "32 2022-01-23 18:22:43 L8 0.000000 5.018 \n", + "33 2022-02-16 18:22:44 L9 0.000000 4.984 \n", + "34 2022-02-24 18:22:34 L8 0.000000 4.809 \n", + "35 2022-03-04 18:22:34 L9 0.348253 6.277 \n", + "36 2022-03-12 18:22:30 L8 0.000000 4.449 \n", + "37 2022-04-13 18:22:23 L8 0.000000 4.154 \n", + "38 2022-04-21 18:22:18 L9 0.035151 3.886 \n", + "39 2022-06-24 18:22:26 L9 0.000000 4.793 \n", + "40 2022-07-02 18:22:47 L8 0.000000 4.817 \n", + "41 2022-08-11 18:22:43 L9 0.000000 4.334 \n", + "42 2022-08-19 18:23:05 L8 0.019461 4.262 \n", + "43 2022-08-27 18:22:47 L9 0.003008 4.178 \n", + "44 2022-09-20 18:23:12 L8 0.004018 3.943 \n", + "45 2022-10-22 18:23:08 L8 0.248255 5.138 \n", + "46 2022-10-30 18:22:59 L9 0.020157 4.587 \n", + "47 2022-11-15 18:23:01 L9 0.000000 4.828 \n", + "48 2022-11-23 18:23:05 L8 0.000000 4.894 \n", + "49 2022-12-09 18:23:05 L8 0.000000 5.371 \n", + "50 2023-01-02 18:23:00 L9 0.059842 5.832 \n", + "51 2023-01-18 18:22:52 L9 0.149705 5.798 \n", + "52 2023-02-19 18:22:55 L9 0.000000 5.287 \n", "\n", " geometry \n", "0 MULTIPOINT (-117.46831 33.29341, -117.46822 33... \n", - "43 MULTIPOINT (-117.45881 33.28239, -117.45891 33... \n", - "44 MULTIPOINT (-117.45875 33.28242, -117.45889 33... \n", - "45 MULTIPOINT (-117.44480 33.26540, -117.44481 33... \n", - "1 MULTIPOINT (-117.46834 33.29332, -117.46831 33... \n", - "2 MULTIPOINT (-117.46209 33.28590, -117.46199 33... \n", - "3 MULTIPOINT (-117.46831 33.29335, -117.46828 33... \n", - "4 MULTIPOINT (-117.46832 33.29332, -117.46831 33... \n", - "5 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", - "6 MULTIPOINT (-117.46154 33.28536, -117.46150 33... \n", - "7 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", - "8 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", - "9 MULTIPOINT (-117.46827 33.29345, -117.46815 33... \n", - "10 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", - "11 MULTIPOINT (-117.46842 33.29331, -117.46833 33... \n", - "46 MULTIPOINT (-117.45891 33.28236, -117.45892 33... \n", - "47 MULTIPOINT (-117.43233 33.24960, -117.43234 33... \n", - "12 MULTIPOINT (-117.46831 33.29337, -117.46826 33... \n", - "13 MULTIPOINT (-117.46833 33.29332, -117.46831 33... \n", - "14 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", - "15 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", - "16 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", - "17 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", - "18 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", - "19 MULTIPOINT (-117.46845 33.29331, -117.46836 33... \n", - "20 MULTIPOINT (-117.46831 33.29341, -117.46823 33... \n", - "21 MULTIPOINT (-117.46831 33.29343, -117.46820 33... \n", - "48 MULTIPOINT (-117.45881 33.28239, -117.45890 33... \n", - "22 MULTIPOINT (-117.45511 33.27808, -117.45503 33... \n", - "23 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", - "24 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", - "25 MULTIPOINT (-117.45486 33.27788, -117.45479 33... \n", - "26 MULTIPOINT (-117.46831 33.29334, -117.46829 33... \n", - "27 MULTIPOINT (-117.46811 33.29345, -117.46803 33... \n", - "28 MULTIPOINT (-117.46831 33.29332, -117.46831 33... \n", - "50 MULTIPOINT (-117.46464 33.28832, -117.46458 33... \n", - "29 MULTIPOINT (-117.46841 33.29332, -117.46831 33... \n", - "30 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", - "31 MULTIPOINT (-117.44798 33.26931, -117.44789 33... \n", - "32 MULTIPOINT (-117.46831 33.29341, -117.46824 33... \n", - "33 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", - "34 MULTIPOINT (-117.46812 33.29345, -117.46804 33... \n", - "35 MULTIPOINT (-117.45778 33.28118, -117.45777 33... \n", - "36 MULTIPOINT (-117.46284 33.28711, -117.46280 33... \n", - "37 MULTIPOINT (-117.46106 33.28482, -117.46102 33... \n", - "49 MULTIPOINT (-117.45891 33.28233, -117.45894 33... \n", - "51 MULTIPOINT (-117.44076 33.26042, -117.44082 33... \n", - "38 MULTIPOINT (-117.46825 33.29345, -117.46815 33... \n", - "39 MULTIPOINT (-117.46810 33.29345, -117.46802 33... \n", - "40 MULTIPOINT (-117.46810 33.29345, -117.46805 33... \n", - "52 MULTIPOINT (-117.45891 33.28230, -117.45897 33... \n", - "41 MULTIPOINT (-117.46223 33.28630, -117.46215 33... \n", - "42 MULTIPOINT (-117.46813 33.29345, -117.46805 33... " + "1 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", + "2 MULTIPOINT (-117.46815 33.29345, -117.46805 33... \n", + "3 MULTIPOINT (-117.44858 33.26998, -117.44854 33... \n", + "4 MULTIPOINT (-117.46834 33.29332, -117.46831 33... \n", + "5 MULTIPOINT (-117.46209 33.28590, -117.46199 33... \n", + "6 MULTIPOINT (-117.46831 33.29335, -117.46828 33... \n", + "7 MULTIPOINT (-117.46832 33.29332, -117.46831 33... \n", + "8 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", + "9 MULTIPOINT (-117.46154 33.28536, -117.46150 33... \n", + "10 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", + "11 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "12 MULTIPOINT (-117.46827 33.29345, -117.46815 33... \n", + "13 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", + "14 MULTIPOINT (-117.46842 33.29331, -117.46833 33... \n", + "15 MULTIPOINT (-117.46831 33.29345, -117.46820 33... \n", + "16 MULTIPOINT (-117.43574 33.25393, -117.43573 33... \n", + "17 MULTIPOINT (-117.46831 33.29337, -117.46826 33... \n", + "18 MULTIPOINT (-117.46833 33.29332, -117.46831 33... \n", + "19 MULTIPOINT (-117.46829 33.29345, -117.46818 33... \n", + "20 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", + "21 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "22 MULTIPOINT (-117.46822 33.29345, -117.46815 33... \n", + "23 MULTIPOINT (-117.46824 33.29345, -117.46815 33... \n", + "24 MULTIPOINT (-117.46845 33.29331, -117.46836 33... \n", + "25 MULTIPOINT (-117.46831 33.29341, -117.46823 33... \n", + "26 MULTIPOINT (-117.46831 33.29343, -117.46820 33... \n", + "27 MULTIPOINT (-117.46572 33.29051, -117.46569 33... \n", + "28 MULTIPOINT (-117.45511 33.27808, -117.45503 33... \n", + "29 MULTIPOINT (-117.46817 33.29345, -117.46815 33... \n", + "30 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", + "31 MULTIPOINT (-117.45486 33.27788, -117.45479 33... \n", + "32 MULTIPOINT (-117.46831 33.29334, -117.46829 33... \n", + "33 MULTIPOINT (-117.46811 33.29345, -117.46803 33... \n", + "34 MULTIPOINT (-117.46831 33.29332, -117.46831 33... \n", + "35 MULTIPOINT (-117.46608 33.29048, -117.46604 33... \n", + "36 MULTIPOINT (-117.46841 33.29332, -117.46831 33... \n", + "37 MULTIPOINT (-117.46829 33.29345, -117.46817 33... \n", + "38 MULTIPOINT (-117.44798 33.26931, -117.44789 33... \n", + "39 MULTIPOINT (-117.46831 33.29341, -117.46824 33... \n", + "40 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", + "41 MULTIPOINT (-117.46812 33.29345, -117.46804 33... \n", + "42 MULTIPOINT (-117.45778 33.28118, -117.45777 33... \n", + "43 MULTIPOINT (-117.46284 33.28711, -117.46280 33... \n", + "44 MULTIPOINT (-117.46106 33.28482, -117.46102 33... \n", + "45 MULTIPOINT (-117.46821 33.29345, -117.46815 33... \n", + "46 MULTIPOINT (-117.44473 33.26540, -117.44467 33... \n", + "47 MULTIPOINT (-117.46825 33.29345, -117.46815 33... \n", + "48 MULTIPOINT (-117.46810 33.29345, -117.46802 33... \n", + "49 MULTIPOINT (-117.46810 33.29345, -117.46805 33... \n", + "50 MULTIPOINT (-117.46823 33.29345, -117.46815 33... \n", + "51 MULTIPOINT (-117.46223 33.28630, -117.46215 33... \n", + "52 MULTIPOINT (-117.46813 33.29345, -117.46805 33... " ] }, "execution_count": 14, @@ -1424,7 +1424,9 @@ } ], "source": [ - "from coastseg.merge_utils import calculate_overlap\n", + "from coastseg.merge_utils import calculate_overlap, clip_gdfs, read_first_geojson_file, convert_lines_to_multipoints,merge_and_average\n", + "from functools import reduce\n", + "\n", "\n", "result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", "combined_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", @@ -1448,247 +1450,58 @@ "if overlap_gdf.empty or len(clipped_shorelines_gdfs) == 0:\n", " print(\"No overlapping ROIs found. Sessions can be merged.\")\n", " # merge the geodataframes on date and satname and average the cloud_cover and geoaccuracy for the merged rows\n", - " aggregation_funcs = {\n", - " 'cloud_cover': 'mean',\n", - " 'geoaccuracy': 'mean'\n", - " }\n", - " merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", - "\n", - " # merge the geometries with the same date and satname\n", - " geometry_columns = [col for col in merged_gdf.columns if 'geometry' in col]\n", - " merged_gdf = merge_geometries(merged_gdf, columns=geometry_columns, operation=unary_union)\n", - " # combine all the extracted shorelines gdfs into a single gdf\n", - " result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", - " result_gdf.set_crs(\"EPSG:4326\", inplace=True)\n", "\n", " for gdf in gdfs:\n", " if not gdf.crs:\n", " gdf.set_crs(\"EPSG:4326\", inplace=True)\n", - " result_gdf = pd.concat([gdf, result_gdf], ignore_index=True)\n", + " # result_gdf = pd.concat([gdf, result_gdf], ignore_index=True)\n", + " \n", + " # Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + " result = reduce(merge_and_average, gdfs)\n", "\n", - " # combine the merged shorelines with all the extracted shorelines whose date and satname weren't in the merged shorelines\n", - " combined_gdf = mergeRightUnique(merged_gdf, result_gdf, ['satname', 'date'],CRS=\"EPSG:4326\").sort_values(by='date')\n", + " result.sort_values(by='date', inplace=True)\n", + " result.reset_index(drop=True, inplace=True)\n", "\n", - "else:\n", - " print(\"Overlapping ROIs found. This script does not support overlapping ROIs. Any shorelines in overlapping regions will be combined with a union resulting in multiple shorelines on the same date.\")\n", - " # merge the geodataframes on date and satname and average the cloud_cover and geoaccuracy for the merged rows\n", - " aggregation_funcs = {\n", - " 'cloud_cover': 'mean',\n", - " 'geoaccuracy': 'mean'\n", - " }\n", - " merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", + "print(f\"Combined {len(result)} rows from {len(gdfs)} GeoDataFrames\")\n", + "print(f\"The following dataframe contains the combined extracted shorelines from all sessions.\\n Shorelines that were extracted on the same dates have been combined.\")\n", "\n", - " # merge the geometries with the same date and satname\n", - " geometry_columns = [col for col in merged_gdf.columns if 'geometry' in col]\n", - " merged_gdf = merge_geometries(merged_gdf, columns=geometry_columns, operation=unary_union)\n", - " # combine all the extracted shorelines gdfs into a single gdf\n", - " result_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", - " result_gdf.set_crs(\"EPSG:4326\", inplace=True)\n", "\n", - " for gdf in gdfs:\n", - " if not gdf.crs:\n", - " gdf.set_crs(\"EPSG:4326\", inplace=True)\n", - " result_gdf = pd.concat([gdf, result_gdf], ignore_index=True)\n", + "combined_gdf = result\n", + "combined_gdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the Merged Extracted Shorelines to a JSON file\n", + "- This will contains all the metadata for each extracted shoreline such as \n", "\n", - " # combine the merged shorelines with all the extracted shorelines whose date and satname weren't in the merged shorelines\n", - " combined_gdf = mergeRightUnique(merged_gdf, result_gdf, ['satname', 'date'],CRS=\"EPSG:4326\").sort_values(by='date')\n", "\n", - "print(f\"Combined {len(combined_gdf)} rows from {len(gdfs)} GeoDataFrames\")\n", - "print(f\"The following dataframe contains the combined extracted shorelines from all sessions.\\n Shorelines that were extracted on the same dates have been combined.\")\n", - "combined_gdf" + " 1. cloud cover\n", + " 2. date\n", + " 3. satellite it was derived from \n", + " 4. geoaccuracy\n", + "- Filename: `extracted_shorelines_dict.json`\n", + " " ] }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamegeoaccuracycloud_covergeometry
02018-12-30 18:22:25L85.0880.000000MULTIPOINT (-117.45892 33.28226, -117.45899 33...
12019-02-16 18:22:17L85.8020.000000MULTIPOINT (-117.45881 33.28239, -117.45891 33...
22019-03-20 18:22:08L86.5960.000000MULTIPOINT (-117.45875 33.28242, -117.45889 33...
32019-06-08 18:22:20L84.8260.263967MULTIPOINT (-117.44480 33.26540, -117.44481 33...
42019-07-10 18:22:29L84.2750.000000MULTIPOINT (-117.45899 33.28226, -117.45907 33...
52019-07-26 18:22:33L84.2860.015263MULTIPOINT (-117.45896 33.28226, -117.45904 33...
62019-08-11 18:22:40L84.0800.000000MULTIPOINT (-117.45896 33.28226, -117.45906 33...
72019-08-27 18:22:44L84.2080.000000MULTIPOINT (-117.45894 33.28226, -117.45902 33...
82019-09-12 18:22:48L84.1280.000000MULTIPOINT (-117.45891 33.28232, -117.45894 33...
92019-10-14 18:22:56L84.0020.015847MULTIPOINT (-117.45891 33.28235, -117.45892 33...
\n", - "
" - ], - "text/plain": [ - " date satname geoaccuracy cloud_cover \\\n", - "0 2018-12-30 18:22:25 L8 5.088 0.000000 \n", - "1 2019-02-16 18:22:17 L8 5.802 0.000000 \n", - "2 2019-03-20 18:22:08 L8 6.596 0.000000 \n", - "3 2019-06-08 18:22:20 L8 4.826 0.263967 \n", - "4 2019-07-10 18:22:29 L8 4.275 0.000000 \n", - "5 2019-07-26 18:22:33 L8 4.286 0.015263 \n", - "6 2019-08-11 18:22:40 L8 4.080 0.000000 \n", - "7 2019-08-27 18:22:44 L8 4.208 0.000000 \n", - "8 2019-09-12 18:22:48 L8 4.128 0.000000 \n", - "9 2019-10-14 18:22:56 L8 4.002 0.015847 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-117.45892 33.28226, -117.45899 33... \n", - "1 MULTIPOINT (-117.45881 33.28239, -117.45891 33... \n", - "2 MULTIPOINT (-117.45875 33.28242, -117.45889 33... \n", - "3 MULTIPOINT (-117.44480 33.26540, -117.44481 33... \n", - "4 MULTIPOINT (-117.45899 33.28226, -117.45907 33... \n", - "5 MULTIPOINT (-117.45896 33.28226, -117.45904 33... \n", - "6 MULTIPOINT (-117.45896 33.28226, -117.45906 33... \n", - "7 MULTIPOINT (-117.45894 33.28226, -117.45902 33... \n", - "8 MULTIPOINT (-117.45891 33.28232, -117.45894 33... \n", - "9 MULTIPOINT (-117.45891 33.28235, -117.45892 33... " - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdfs[0].head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timestamp('2018-12-30 18:22:25')" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdfs[0]['date'].iloc[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "geopandas.geodataframe.GeoDataFrame" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "type(combined_gdf)" + "# mapping of dictionary keys to dataframe columns\n", + "keymap ={'shorelines':'geometry',\n", + " 'dates':'date',\n", + " 'satname':'satname',\n", + " 'cloud_cover':'cloud_cover',\n", + " 'geoaccuracy':'geoaccuracy'}\n", + "# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines\n", + "shoreline_dict = dataframe_to_dict(combined_gdf,keymap)\n", + "# save the extracted shoreline dictionary to json file\n", + "to_file(shoreline_dict, os.path.join(merged_session_location, \"extracted_shorelines_dict.json\"))" ] }, { @@ -1699,7 +1512,7 @@ { "data": { "text/plain": [ - "Timestamp('2018-12-30 18:22:25')" + "53" ] }, "execution_count": 16, @@ -1708,78 +1521,7 @@ } ], "source": [ - "combined_gdf['date'].iloc[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timestamp('2018-12-30 18:22:25')" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.Timestamp('2018-12-30 18:22:25')" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MULTIPOINT (-117.46831121455644 33.2934149376039, -117.46822498182809 33.293315724378125, -117.46814926544755 33.29325699482918, -117.46807955821971 33.29318097466683, -117.46801280310385 33.2930459285698, -117.46798680353123 33.29300289085092, -117.46790343007167 33.29291104288513, -117.46782472625975 33.292820680670765, -117.46778613576278 33.29277618691379, -117.46768262076851 33.29264127897712, -117.46766243744518 33.29259869230763, -117.46758720611838 33.292506340475185, -117.4675004106242 33.292425673843596, -117.46744942719033 33.29237156123675, -117.46736400566458 33.29223658496944, -117.46733810514881 33.29220026735731, -117.46723544457923 33.29210177082155, -117.467176112955 33.29203348666226, -117.46712530877306 33.29196688728733, -117.46703189065445 33.291831940832715, -117.4670137447137 33.29179600407632, -117.46692534663975 33.291697043615464, -117.46685182614861 33.2916427912913, -117.46677443328466 33.291562312942304, -117.4666896024096 33.291432148797306, -117.46668470465809 33.29142735235697, -117.46652784179372 33.29130838157182, -117.46651307746635 33.29129269913169, -117.46641335877024 33.291157775837995, -117.46636560154232 33.29109437123302, -117.4662916847211 33.29102293483063, -117.46620374453326 33.29095227476743, -117.46613536476502 33.29088822363979, -117.46604172405897 33.29077933098838, -117.4660152761251 33.290753376406116, -117.46590928640816 33.29061847620187, -117.46587966197166 33.29059843607494, -117.46577474410209 33.29048368290575, -117.46572423610404 33.29034857460673, -117.46571716596593 33.29033573503742, -117.46559762098215 33.29021375140583, -117.46555517680602 33.29016829053979, -117.46548698108926 33.29007886824287, -117.465393883576 33.28994391929292, -117.46539287615757 33.28994205347099, -117.46527099249445 33.28980908180087, -117.46523093054836 33.28978256058315, -117.46513286551578 33.289674301200044, -117.46507000614841 33.28953923883162, -117.46506852993545 33.289537185490175, -117.46491710088182 33.28940451327704, -117.46490666638637 33.28939291340652, -117.46482306575831 33.289269567384125, -117.46474436311019 33.2891655905531, -117.46470981028915 33.28913469327859, -117.4645825468837 33.28903001754704, -117.46455426658065 33.28899997712723, -117.46444178219758 33.288865099885875, -117.46442031520282 33.28881592290719, -117.46434636365208 33.28873015878346, -117.46425842583936 33.28866632140892, -117.46419031802112 33.28859544403665, -117.46412347365437 33.28846039603745, -117.46409600256878 33.288415755572245, -117.46400936483936 33.28832552445204, -117.46393394942572 33.2882349583493, -117.46389927070148 33.288190637773525, -117.46379400455017 33.28805573297789, -117.46377169768692 33.28801649086976, -117.46367543691463 33.28792087770952, -117.46360987236203 33.28787850919393, -117.46352518196082 33.287786140496095, -117.46345582741644 33.287651101473095, -117.46344751110006 33.28763904313794, -117.46332168605775 33.28751630392105, -117.46328558275304 33.28748133567702, -117.46320257548352 33.28738145020655, -117.46312334731475 33.287265380902255, -117.46310545085117 33.287246514459774, -117.46297818360553 33.28711169091077, -117.46296139091666 33.28710211893793, -117.46283969829139 33.286976908999286, -117.46279912405362 33.28687992489835, -117.46276561194975 33.286841887184686, -117.46263731299813 33.28674395731903, -117.46260210869417 33.286707198139226, -117.46252690567738 33.2865721803295, -117.4624748551592 33.28648527414788, -117.4624270314923 33.28643725425684, -117.46231296026289 33.286333171545174, -117.46228627029186 33.28630248017165, -117.46218023617487 33.286167576801084, -117.46215057156903 33.28608727839941, -117.46210880701274 33.28603254468886, -117.46198863599882 33.28592720903182, -117.46196123464757 33.28589779554292, -117.46187784754584 33.28576280773069, -117.46182622685723 33.28567711617403, -117.46178327590025 33.28562786141026, -117.46167395103295 33.285492969810676, -117.46166412855979 33.28548588324747, -117.46155820171788 33.285358101966104, -117.46150170255015 33.28523225198407, -117.46149399573638 33.2852230426534, -117.461339884642 33.28509401784753, -117.46133433021349 33.28508833763221, -117.46123263071759 33.28495341732479, -117.46117770046561 33.28488604796699, -117.46110313569656 33.284818600041994, -117.46101580090819 33.284732064902435, -117.46097554415263 33.284683775560325, -117.46086944897027 33.28454887124338, -117.46085356466622 33.28451391358177, -117.46077170532998 33.28441393587353, -117.4606916454136 33.28435594173543, -117.46062310394586 33.28427918889476, -117.4605523780269 33.284144153221824, -117.46052927852692 33.284112627516016, -117.46040439549846 33.2840094036789, -117.46036748243732 33.2839778611397, -117.46028558541376 33.28387454593455, -117.46020517126063 33.28374485521569, -117.46020073047886 33.283739562366456, -117.46006829229559 33.28360475485887, -117.46004327856686 33.283591450207936, -117.45991894193682 33.28347000978074, -117.45988113015204 33.28338917676107, -117.45982813411369 33.28333504797006, -117.45971939822621 33.283266203166654, -117.45965115238528 33.28320040477151, -117.45957932724538 33.283065372574576, -117.4595570862332 33.28303245817667, -117.45944818820182 33.282930559578816, -117.45939517965205 33.28287593300715, -117.45932860333335 33.28279570374569, -117.45923370470418 33.282660756594396, -117.45923295728886 33.28265899811042, -117.45912778100515 33.28252585008831, -117.45907094437955 33.28248191887801, -117.4589884673966 33.28239106678126, -117.45892076566936 33.28225601895262, -117.45866692721654 33.281986360231116, -117.45858451376476 33.28187498773086, -117.45856186581786 33.281851450038225, -117.45843829852141 33.28171660800219, -117.45842255071346 33.281706915942515, -117.45829670393123 33.28158183230292, -117.45826048069688 33.281518255463574, -117.45818946961879 33.28144692979241, -117.45809869843228 33.281384546294824, -117.45801944652685 33.28131225854953, -117.45793656192932 33.28118290488124, -117.45793084752604 33.28117728714225, -117.45777482855564 33.28105834804756, -117.45775974516685 33.28104261950965, -117.45766668785994 33.280907664322335, -117.4576125734626 33.28083372041895, -117.45754923459039 33.28077279885874, -117.45745063366127 33.28066937706047, -117.45742345807327 33.280637963907004, -117.45731285539858 33.280503073002166, -117.45728843466166 33.280455207954454, -117.45721160914023 33.28036814758175, -117.45712651913594 33.28029527009966, -117.45707288330351 33.28023335988805, -117.45698992408873 33.2800983670374, -117.45696420801355 33.28005929625077, -117.45688090659233 33.27996346990251, -117.45680221909696 33.279885013234825, -117.45674098906514 33.27982868620389, -117.45664024439452 33.279713335954426, -117.45662092838424 33.27969382940586, -117.4564927961859 33.279559002127456, -117.45647831409595 33.279550067495755, -117.45635300957824 33.27942421749655, -117.4563162111941 33.27935350902325, -117.45626250103008 33.279289251847416, -117.45615419885665 33.27917422254163, -117.45613403732766 33.27915442540161, -117.45602218862162 33.27901953786946, -117.45599211561266 33.27898117021798, -117.45589371164407 33.278884711214424, -117.45583017931382 33.278816252917146, -117.45577047590061 33.278749865205796, -117.45566875003303 33.27861494022771, -117.45566804733005 33.278613553486835, -117.45553923450684 33.278480116998786, -117.45550617001066 33.27845974327469, -117.45539562345348 33.278345345266814, -117.45534398064511 33.27824571868922, -117.45531070708822 33.278210358402866, -117.45518218812872 33.27810800740683, -117.45514930005254 33.278075651554666, -117.45505568125395 33.2779406963693, -117.45501997823801 33.27788974726502, -117.45493393039477 33.27780584406275, -117.45485800030252 33.277716057172235, -117.45482126533824 33.27767095839095, -117.45471586147882 33.277536046052845, -117.45469576099683 33.277491825840556, -117.45461017774399 33.27740113464435, -117.45453394279728 33.277348692176886, -117.45445487562581 33.277266404539255, -117.45438431955003 33.27713136450997, -117.45437163961692 33.277111838014086, -117.45425837659059 33.27699652683613, -117.4542096999223 33.27694500457471, -117.45414028140613 33.27686166036139, -117.45404796440023 33.2767266996214, -117.45404748872271 33.27672560344387, -117.45393144975053 33.276591827166435, -117.45388556121314 33.2765608665874, -117.45379036792646 33.276457044276796, -117.45372882628371 33.27632197094446, -117.45372320490297 33.27631311731183, -117.4535963273053 33.27618715649954, -117.45356129221993 33.27615098139777, -117.45347501482865 33.2760523011039, -117.45339924396933 33.275962502650145, -117.45334619883357 33.2759174729482, -117.45323742187068 33.27581765524738, -117.45320359501038 33.275782694918625, -117.45308139441102 33.275647842362126, -117.45307539252647 33.27563258166468, -117.45298733739205 33.2755128871356, -117.45291336267199 33.27544727619215, -117.45285882672695 33.275378057343175, -117.45279176356705 33.2752430036261, -117.4527508239725 33.275163253562646, -117.45270566670233 33.27510801918101, -117.45259764621862 33.27497311448898, -117.45258873451107 33.27496609286394, -117.4524821301133 33.274838236980266, -117.45242648134985 33.27473705154303, -117.4523956419228 33.27470325373176, -117.45226471441165 33.274602149554134, -117.45223352258571 33.27456854556084, -117.45214938406205 33.27443355358477, -117.45210236168957 33.274353494504375, -117.45205601281518 33.27429859511995, -117.45194033251272 33.2741674582838, -117.45193677714178 33.27416373060656, -117.45181612553367 33.27402887112, -117.45177815735548 33.2739529434, -117.45172553311312 33.273893902294674, -117.4516163074086 33.273801441726604, -117.45157528740779 33.273759150092104, -117.45149108568052 33.2736241578739, -117.45145402475423 33.27356575214782, -117.45138259281659 33.2734892537916, -117.45129205735316 33.27339117099812, -117.4512577819791 33.273354408840916, -117.45114747894795 33.27321951111464, -117.45112989495594 33.273178546621956, -117.45105186474127 33.273084559994935, -117.45096801222611 33.27302017015629, -117.45090129335159 33.272949808130214, -117.450829430553 33.27281477068578, -117.45080570440963 33.2727789601328, -117.45070523005556 33.27267992294378, -117.45064381629483 33.2726192767668, -117.45058305002804 33.27254506775028, -117.45048977967316 33.27241010768122, -117.4504815813983 33.27239194691438, -117.45038159784345 33.27227520155736, -117.45031969948035 33.272233215578865, -117.4502322382671 33.27214044446319, -117.45016663489318 33.272005383941895, -117.45015739724332 33.27199245774863, -117.450013094939 33.271870641711324, -117.44999562236262 33.27185433430863, -117.44990457057291 33.2717357363938, -117.44983343144214 33.271634973804666, -117.44980002835095 33.27160081657007, -117.44968477925836 33.271465935380846, -117.44967145233736 33.27145677834291, -117.44956275787831 33.27133107856882, -117.44951047721564 33.2711959695006, -117.44950903667922 33.271193252896005, -117.44939456546997 33.27106109042472, -117.44934703077857 33.27100953638161, -117.44927999644341 33.270926206384, -117.44919684469892 33.27079120872614, -117.44918470101308 33.27076243214613, -117.4490971482719 33.270656270772314, -117.44902279321965 33.270597594464554, -117.44895006575831 33.27052150385475, -117.44887057873379 33.27038649273226, -117.44886057960522 33.270372862991216, -117.44873203189813 33.27025169472995, -117.44869871135995 33.2702154972498, -117.44862002018358 33.27011680082358, -117.44853653643213 33.26999803054981, -117.44852145337532 33.269981858309514, -117.44840434876176 33.269846982572425, -117.44837460379848 33.26982781574058, -117.44826735898414 33.26971217841742, -117.44821227833332 33.26958058296473, -117.44820949839955 33.26957708887091, -117.44807186310362 33.269442286818276, -117.44805039167734 33.269419095922935, -117.4479703140987 33.26930735459242, -117.44788984649676 33.26917234633716, -117.44788805007805 33.26916837338581, -117.44777363256016 33.26903746677006, -117.44772614993933 33.269003977014265, -117.44763515933292 33.268902667232794, -117.44756909011448 33.26876760691706, -117.44756384591977 33.268760286936065, -117.44741654292832 33.26863285778955, -117.44740207157238 33.26862029099199, -117.44730442785178 33.26849796301519, -117.44723993279908 33.26840870097797, -117.44719518708116 33.268363057796954, -117.44707796992166 33.26823147638844, -117.44707487569586 33.268228192286436, -117.44695283852772 33.26809333285869, -117.44691578352726 33.268010241044095, -117.44687372581339 33.26795831903608, -117.44675389775314 33.26784788488595, -117.44673150330759 33.267823531929686, -117.4466480158126 33.26768853367338, -117.4465915541152 33.267595454375055, -117.44655421564318 33.26755357239607, -117.44643836376814 33.267418690225334, -117.44642956388695 33.26741229897047, -117.44631758254611 33.2672838256368, -117.44626733845605 33.26718276550611, -117.44623901608787 33.26714880942376, -117.4461054317204 33.26701574767321, -117.44610382527698 33.26701399633387, -117.44600085708194 33.2668790675108, -117.44594323566415 33.266791681019896, -117.44589808353817 33.26674413789998, -117.44578135060179 33.26662866455503, -117.4457624763882 33.26660932591776, -117.4456431587783 33.266474455398985, -117.4456192416658 33.266421446903145, -117.4455496570127 33.266339492254374, -117.4454574052931 33.26626776714814, -117.44539843973924 33.26620473579754, -117.44532913310276 33.26606968580391, -117.44529504340235 33.266010420270845, -117.44522493357385 33.26593476071701, -117.44513304215249 33.26582397630765, -117.44511335065934 33.2657998619703, -117.4450056586546 33.26566494919331, -117.44497075240258 33.26558050361069, -117.44493072214634 33.26552991911025, -117.44485591778191 33.26539488850522, -117.4448088467123 33.26541261693488, -117.44480848687776 33.26534163092265, -117.44479312129943 33.2653951131627, -117.44475347483217 33.26525995671662, -117.44466397677263 33.2651249785458, -117.44464614645831 33.26508780230751, -117.44456506720915 33.264990033956956, -117.44448425935248 33.264923280852315, -117.44442684272254 33.26485522981544, -117.44435214486771 33.26472019850557, -117.44432196751438 33.26467870646365, -117.44422029726995 33.26458537133573, -117.44416012599659 33.26452292132679, -117.4441056162976 33.26445048270809, -117.4440072593679 33.264315535679344, -117.44399791517036 33.26429403135382, -117.44391249647906 33.264180575737235, -117.44383591794087 33.264107204476225, -117.44378634830436 33.264045727740594, -117.4437054518774 33.263910718140004, -117.44367365408404 33.263867504320025, -117.44356992520963 33.26377590338103, -117.44351182113355 33.26371288830683, -117.44346142095903 33.26364099209715, -117.44336627304662 33.263506033089854, -117.44334957047234 33.26347547852982, -117.44324429817534 33.263371169639214, -117.44318782362924 33.2633376554335, -117.44310043823604 33.26323638405503, -117.44304320571592 33.263101289667496, -117.44302544174968 33.263073940328525, -117.44291064525203 33.26296646359275, -117.44286362756345 33.26292252412362, -117.44279397152478 33.26283158079888, -117.44270144980844 33.262698925563946, -117.4426991573193 33.262696620066166, -117.44259788098242 33.262561682258884, -117.44253949841614 33.26252005146416, -117.44245869202939 33.26242687927731, -117.4423933819945 33.26229181330833, -117.44237718629479 33.262269483830245, -117.44223623641558 33.26215707395356, -117.44221547357958 33.262137686607645, -117.44213345895932 33.26202214108878, -117.44205323543501 33.26190147950532, -117.44203906827627 33.2618871783203, -117.44193112058437 33.26175226365934, -117.4418912820578 33.26172165380483, -117.44180307614077 33.26161742030647, -117.44174653932845 33.261482322800674, -117.44172886701556 33.26144997257925, -117.44164304135721 33.26134739206831, -117.44156685337262 33.2612578695277, -117.4415334549757 33.26121248286457, -117.44143800954528 33.26107752335904, -117.44140459025073 33.26101602125725, -117.44133658779464 33.260942584987575, -117.44124283010174 33.26087403664673, -117.4411833425762 33.2608078304104, -117.4411137766187 33.260672778819234, -117.44108051773526 33.260622068996035, -117.44099728629075 33.26053789361171, -117.44091862014834 33.2604524772666, -117.44088072172637 33.260403008553645, -117.44078273657098 33.26026805752339, -117.44075638339994 33.26021522434624, -117.44067952250904 33.26013312494258, -117.44059465898462 33.2600798530704, -117.44051953679467 33.25999839340595, -117.44046558287306 33.25986328611813, -117.4404322367806 33.25980532958586, -117.4403596689956 33.259728362821726, -117.4402703314701 33.25963364547732, -117.44023949890888 33.259593489903615, -117.44014101510055 33.259458540106635, -117.44010806448468 33.25938970703387, -117.44005533522193 33.259323544916555, -117.43994620930853 33.259227743969106, -117.43991242870382 33.259188752127734, -117.43983343540533 33.25905373312071, -117.43978392555195 33.25898012673304, -117.43972405066644 33.25891882153279, -117.43962194704555 33.2587932691362, -117.43961463996773 33.25878390993591, -117.43950240321242 33.25864900822665, -117.43945976514787 33.25856565040418, -117.43941767859494 33.25851400919505, -117.43929788380902 33.258397914851344, -117.43928222759207 33.258379189307945, -117.43918830638715 33.258244222598854, -117.43913567656251 33.258164913609654, -117.43908642701457 33.258109283915175, -117.43897368721068 33.25797530769067, -117.43897290970516 33.25797438622713, -117.43885783267072 33.25783949393544, -117.4388115334688 33.2577526784396, -117.43876639042757 33.25770451814155, -117.43864982807163 33.25761959894214, -117.43860519499466 33.25756978832304, -117.4385346080286 33.257434738789215, -117.43848747871935 33.257357515025205, -117.4384338274566 33.25729979566921, -117.43832551076765 33.25717161279383, -117.43832020205411 33.257164897739266, -117.43820874605282 33.25702999205622, -117.43816323906931 33.256924722772915, -117.43813963445605 33.25689493708254, -117.43800140768231 33.256765889667925, -117.43589377789193 33.25392061425424, -117.43575895950812 33.25379143558214, -117.43573195727336 33.253761998644585, -117.43565914841417 33.25365648669788, -117.43557434626221 33.25352148516087, -117.43556967478418 33.25351017501554, -117.43547365183036 33.25338653921302, -117.43540776074553 33.253332451666445, -117.43534079863251 33.25325170575944, -117.43527289752562 33.25311664483862, -117.43524545795628 33.25307618336088, -117.43514488443219 33.25298179422573, -117.43508364558713 33.25291867939108, -117.43503383643719 33.25284688413116, -117.43493937453333 33.252711915926206, -117.43492142465178 33.252678586158346, -117.43482390216649 33.25257702110884, -117.43475968798106 33.25253610169747, -117.43467742369758 33.25244223455777, -117.43461605232748 33.252307150429864, -117.43459738844973 33.25227980758127, -117.43447764445217 33.252172335429094, -117.43443560829088 33.2521282790326, -117.43437574454907 33.25203739275942, -117.43428115942248 33.2519024244586, -117.43427338779964 33.25188761805309, -117.43416401968832 33.25176753482378, -117.43411158833071 33.25173192392377, -117.43402731622835 33.25163271335266, -117.43396537882961 33.251497630854246, -117.4339492876952 33.25147471843596, -117.43382124953153 33.251362835059425, -117.43378754049172 33.2513293310498, -117.43371728063981 33.251227899040856, -117.43363016378753 33.251092904173035, -117.4336252978232 33.251083517148196, -117.4335073502926 33.25095803367535, -117.43346350174573 33.25092797476085, -117.43336978940137 33.25082321444217, -117.43330415401387 33.25068814447566, -117.43330126981665 33.25068400074877, -117.43315378991782 33.25055336960146, -117.43313953103056 33.25053980481857, -117.43305227557677 33.2504184244512, -117.43297739133197 33.25031420345098, -117.43294606074834 33.25028349557722, -117.43281952510684 33.25014863733013, -117.43281553712333 33.25014634611651, -117.43269552065733 33.25001377014311, -117.43265340633411 33.24992223455563, -117.43261516318923 33.24987875100774, -117.43249170881738 33.24978591411864, -117.43245164267245 33.24974402102002, -117.43237428208886 33.24960899129947, -117.43232948228521 33.249542053353096, -117.43048411090119 33.247180171010406, -117.43039704933571 33.24704517355711, -117.43038479053928 33.24701477745257, -117.43029649417527 33.24691022272374, -117.43022302863162 33.246863495946975, -117.43014404317461 33.246775451297594, -117.43008423259246 33.246640359361834, -117.43006074864378 33.24660631607363, -117.42993472331723 33.24650557749703, -117.42989906981768 33.246471736268724, -117.42982911358179 33.246370643740526, -117.42973880286269 33.24623565702941, -117.42973687389915 33.246231391349774, -117.4296206430795 33.24610076644958, -117.42957507803936 33.24607264407594, -117.4294796490696 33.24596595460434, -117.42941972405882 33.24583086271936, -117.42941282942756 33.2458211892892, -117.42925120175806 33.24569657438956, -117.42925080943917 33.24569614700168, -117.42916923664393 33.245561129697926, -117.42908889449431 33.245432782073394, -117.42908212142949 33.245426131457485, -117.428955907942 33.24529126800305, -117.42892709531588 33.24527281198034, -117.42882404093181 33.24515642390822, -117.42876492504128 33.24503671483857, -117.42875028300669 33.2450213793683, -117.42860329975328 33.24491210215929, -117.42857936051102 33.24488666963083, -117.4284980056714 33.24475165111004, -117.4284410670586 33.2446628939018, -117.4283968471586 33.244616700744224, -117.42827924736636 33.244498182387446, -117.42826464003463 33.24448185720452, -117.42814644335978 33.244346965291896, -117.42811719428173 33.244285470001884, -117.42804720479326 33.24421200801923, -117.42795554334894 33.244155121033096, -117.42788070360102 33.24407728207588, -117.42781683575792 33.24394220292433, -117.42779331550624 33.24390624044027, -117.42768650789296 33.24380735228274, -117.42763156445932 33.243755088398295, -117.42756768025635 33.24367246196685, -117.42747755475425 33.24353747288988, -117.42746940522169 33.243519959994956, -117.42736264211 33.243402568913645, -117.42730765525555 33.243368768266734, -117.42721416250332 33.24326778012586, -117.42715306788732 33.24313269108312, -117.42714543993993 33.24312178300561, -117.42700063201012 33.24299791561781, -117.42698374340891 33.242981315496216, -117.4268986449701 33.242862966819985, -117.42682155884211 33.24274032057663, -117.42681019119568 33.24272797149564, -117.42668373671958 33.24259310647013, -117.42665976637551 33.24257985314327, -117.42654909278245 33.24245826939635, -117.4264976277485 33.2423479805987, -117.42647556152284 33.24232322264523, -117.42633594251497 33.24220933167315, -117.42631688213294 33.24218846772063, -117.42623429717158 33.24205345184948, -117.42617370709947 33.241957193614766, -117.42613594029856 33.241918489955395, -117.42601252502728 33.24178361380745, -117.42601185180801 33.24178323021189, -117.4258943534202 33.24164871957783, -117.425849746924 33.24155765701764, -117.42580939412079 33.24151371153743, -117.42568806359107 33.241418887180856, -117.42564984066861 33.24137895872517, -117.42556506185073 33.241243949891505, -117.42552597018069 33.24119537009456, -117.42541978767248 33.2411091479185, -117.42536427192398 33.241053274608504, -117.4253045060145 33.24097424322125, -117.4252059976284 33.24083928107443, -117.4252021817785 33.2408301207025, -117.42509916089097 33.24070434730876, -117.42504036482737 33.2406632496622, -117.4249592850714 33.24056952632921, -117.42489436570548 33.240434449161235, -117.4248781326275 33.24041040884691, -117.4247590104549 33.24029961250505, -117.42471638832112 33.24025827255816, -117.42464585747037 33.24016469990118, -117.42455427523703 33.24002972524283, -117.4245542639871 33.24002971360458, -117.42442495065723 33.2398948559484, -117.42439251494268 33.23987401780215, -117.42428604206779 33.23976003088326, -117.42423040171067 33.23964511673104, -117.42421284963046 33.239624981554066, -117.42406875980377 33.23951366383949, -117.42404625102174 33.23949025064393, -117.42396465881585 33.23935522979543, -117.4239065175474 33.23925771357859, -117.42387398231813 33.239220239846254, -117.42375267061044 33.23908535419608, -117.42374465513876 33.23908030415458, -117.42362390079087 33.23895049382316, -117.42358260806367 33.238864464370195, -117.42354042491368 33.2388154790946, -117.42342085859319 33.238710196622854, -117.42339467118228 33.23868067630414, -117.42331922061194 33.23854563410162, -117.42325856469955 33.23844282578244, -117.4232296118874 33.23841064002297, -117.42309680512345 33.23828617944604, -117.42308759766013 33.2382758241351, -117.4229708896795 33.23814092203175, -117.42293473894043 33.238065732661724, -117.42287859693018 33.238005936813835, -117.42277308892263 33.23793158442445, -117.42271556712791 33.23787119193039, -117.42264696638546 33.237736126016614, -117.42261089879491 33.23768506862724, -117.42250802660396 33.23760129899714, -117.4224492073834 33.23754206380223, -117.42239347244099 33.23746638901357, -117.42229917916595 33.23733141013704, -117.42228705177698 33.237302388358266, -117.42218672279522 33.237196492832275, -117.42212537819047 33.23716283569766, -117.42202996450683 33.23706172573525, -117.42197516262489 33.2369266126088, -117.42196314476536 33.236906628314856, -117.42184063599842 33.23679176984497, -117.4218014087826 33.23675382107664, -117.42172634268137 33.23665685833546, -117.42163936412663 33.23653658615085, -117.42162445828751 33.23652190465724, -117.42150586212512 33.236387007520484, -117.42147755692491 33.23636867473098, -117.42137396383464 33.2362521553271, -117.42131546067229 33.23614036471634, -117.42129422253848 33.23611712635459, -117.42115382586492 33.23600813018987, -117.42112989823697 33.2359823837093, -117.42103915456426 33.23584739181233, -117.42099176237339 33.235786340395684, -117.42090879826648 33.23571253388786, -117.4208300483751 33.23563733623005, -117.42077957820777 33.235577671978575, -117.4206681194826 33.235443340065146)\n" - ] - } - ], - "source": [ - "print(combined_gdf['geometry'].iloc[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Save the Merged Extracted Shorelines to a JSON file\n", - "- This will contains all the metadata for each extracted shoreline such as \n", - "\n", - "\n", - " 1. cloud cover\n", - " 2. date\n", - " 3. satellite it was derived from \n", - " 4. geoaccuracy\n", - "- Filename: `extracted_shorelines_dict.json`\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# mapping of dictionary keys to dataframe columns\n", - "keymap ={'shorelines':'geometry',\n", - " 'dates':'date',\n", - " 'satname':'satname',\n", - " 'cloud_cover':'cloud_cover',\n", - " 'geoaccuracy':'geoaccuracy'}\n", - "# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines\n", - "shoreline_dict = dataframe_to_dict(combined_gdf,keymap)\n", - "# save the extracted shoreline dictionary to json file\n", - "to_file(shoreline_dict, os.path.join(merged_session_location, \"extracted_shorelines_dict.json\"))" + "len(shoreline_dict['shorelines'])" ] }, { @@ -1791,7 +1533,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -1876,60 +1618,7 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "\n", - "gdf1=gpd.GeoDataFrame(\n", - " geometry=[], crs= None\n", - " )\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "data = {\n", - " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": ['L8', 'L8' 'L8'],\n", - " \"date\": [\n", - " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", - " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", - " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", - " ],\n", - " \"geometry\": [\n", - " MultiPoint([(0, 0), (1, 1)]),\n", - " MultiPoint([(2, 2), (3, 3)]),\n", - " MultiPoint([(4, 4), (5, 5)]),\n", - " ],\n", - "}\n", - "df = gpd.GeoDataFrame(geometry = data['geometry'], crs='epsg:4326')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = {\n", - " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": ['L8', 'L8' 'L8'],\n", - " \"date\": [\n", - " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", - " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", - " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", - " ],\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 26, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1953,6 +1642,7 @@ " \n", " \n", " \n", + " geoaccuracy\n", " cloud_cover\n", " satname\n", " date\n", @@ -1962,6 +1652,7 @@ " \n", " \n", " 0\n", + " 1\n", " 0.1\n", " L8\n", " 2018-12-30 18:22:25\n", @@ -1969,6 +1660,7 @@ " \n", " \n", " 1\n", + " 2\n", " 0.2\n", " L8\n", " 2018-01-30 19:22:25\n", @@ -1976,6 +1668,7 @@ " \n", " \n", " 2\n", + " 3\n", " 0.3\n", " L8\n", " 2022-01-03 19:22:25\n", @@ -1986,10 +1679,10 @@ "" ], "text/plain": [ - " cloud_cover satname date \\\n", - "0 0.1 L8 2018-12-30 18:22:25 \n", - "1 0.2 L8 2018-01-30 19:22:25 \n", - "2 0.3 L8 2022-01-03 19:22:25 \n", + " geoaccuracy cloud_cover satname date \\\n", + "0 1 0.1 L8 2018-12-30 18:22:25 \n", + "1 2 0.2 L8 2018-01-30 19:22:25 \n", + "2 3 0.3 L8 2022-01-03 19:22:25 \n", "\n", " geometry \n", "0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) \n", @@ -1997,12 +1690,13 @@ "2 MULTIPOINT (4.00000 4.00000, 5.00000 5.00000) " ] }, - "execution_count": 26, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "from coastseg.merge_utils import merge_geometries\n", "\n", "# create a list of geometries\n", "geometries = [\n", @@ -2010,11 +1704,11 @@ " MultiPoint([(2, 2), (3, 3)]),\n", " MultiPoint([(4, 4), (5, 5)]),\n", "]\n", - "\n", "# create a dictionary with the other columns\n", "data = {\n", + " \"geoaccuracy\": [1, 2, 3],\n", " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": ['L8', 'L8', 'L8'],\n", + " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", " \"date\": [\n", " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", @@ -2022,311 +1716,14 @@ " ],\n", " \"geometry\": geometries,\n", "}\n", - "\n", "# create a GeoDataFrame from the dictionary\n", - "df = gpd.GeoDataFrame(data, geometry='geometry', crs='epsg:4326')\n", - "df.set_crs('epsg:4326', inplace=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'geoaccuracy': [1, 2, 3],\n", - " 'cloud_cover': [0.1, 0.2, 0.3],\n", - " 'satname': ['L8', 'L8', 'L8'],\n", - " 'dates': ['2018-12-30 18:22:25', '2018-1-30 19:22:25', '2022-01-03 19:22:25'],\n", - " 'shorelines': [,\n", - " ,\n", - " ]}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = {\n", - " 'geoaccuracy': [1, 2, 3],\n", - " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": ['L8', 'L8', 'L8'],\n", - " \"dates\": [\n", - " \"2018-12-30 18:22:25\",\n", - " \"2018-1-30 19:22:25\",\n", - " \"2022-01-03 19:22:25\",\n", - " ],\n", - " \"shorelines\": geometries,\n", - "}\n", - "data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf1.crs" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"geoaccuracy\": [1, 2, 3],\n", - " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", - " \"dates\": [\n", - " \"2018-12-30 18:22:25\",\n", - " \"2018-01-30 19:22:25\",\n", - " \"2022-01-03 19:22:25\",\n", - " ],\n", - " \"shorelines\": [\n", - " np.array([[0.0, 0.0], [1.0, 1.0]]),\n", - " np.array([[2.0, 2.0], [3.0, 3.0]]),\n", - " np.array([[4.0, 4.0], [5.0, 5.0]]),\n", - " ],\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'shorelines': [array([[0., 0.],\n", - " [1., 1.]]),\n", - " array([[2., 2.],\n", - " [3., 3.]]),\n", - " array([[4., 4.],\n", - " [5., 5.]])],\n", - " 'dates': ['2018-12-30 18:22:25',\n", - " '2018-01-30 19:22:25',\n", - " '2022-01-03 19:22:25'],\n", - " 'satname': ['L8', 'L8', 'L8'],\n", - " 'cloud_cover': [0.1, 0.2, 0.3]}" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from coastseg.merge_utils import dataframe_to_dict\n", - "\n", - "key_map = {\n", - " \"shorelines\": \"geometry\",\n", - " \"dates\": \"date\",\n", - " \"satname\": \"satname\",\n", - " \"cloud_cover\": \"cloud_cover\",\n", - " \"geoaccuracy\": \"geoaccuracy\",\n", - "}\n", - "\n", - "result = dataframe_to_dict(df, key_map)\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([[0., 0.],\n", - " [1., 1.]]),\n", - " array([[2., 2.],\n", - " [3., 3.]]),\n", - " array([[4., 4.],\n", - " [5., 5.]])]" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result['shorelines']" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([[0., 0.],\n", - " [1., 1.]]),\n", - " array([[2., 2.],\n", - " [3., 3.]]),\n", - " array([[4., 4.],\n", - " [5., 5.]])]" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "expected['shorelines']" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 36\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m expected[\u001b[39m'\u001b[39;49m\u001b[39mshorelines\u001b[39;49m\u001b[39m'\u001b[39;49m] \u001b[39m==\u001b[39;49m result[\u001b[39m'\u001b[39;49m\u001b[39mshorelines\u001b[39;49m\u001b[39m'\u001b[39;49m]\n", - "\u001b[1;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" - ] - } - ], - "source": [ - "expected['shorelines'] == result['shorelines']" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "# Define the two lists of arrays\n", - "list1 = [np.array([[0., 0.], [1., 1.]]), np.array([[2., 2.], [3., 3.]]), np.array([[4., 4.], [5., 5.]])]\n", - "list2 = [np.array([[0., 0.], [1., 1.]]), np.array([[2., 2.], [3., 3.]]), np.array([[4., 4.], [5., 5.]])]\n", - "\n", - "# Check if the two lists of arrays are equal\n", - "equal = all(np.array_equal(a, b) for a, b in zip(list1, list2))\n", - "\n", - "print(equal) # This will print True if the two lists of arrays are equal, and False otherwise" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0.]\n", - " [1. 1.]]\n", - "True\n", - "[[2. 2.]\n", - " [3. 3.]]\n", - "True\n", - "[[4. 4.]\n", - " [5. 5.]]\n", - "True\n" - ] - } - ], - "source": [ - "for a, b in zip(list1, list2):\n", - " print(b)\n", - " print((np.array_equal(a, b)))" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a=list1[0]\n", - "b=list2[0]\n", - "np.array_equal(a, b)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all([True,False])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "expected == expected2" + "df = gpd.GeoDataFrame(data, geometry=\"geometry\", crs=\"epsg:4326\")\n", + "df" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -2350,382 +1747,67 @@ " \n", " \n", " \n", + " geoaccuracy\n", + " cloud_cover\n", + " satname\n", + " date\n", " geometry\n", " \n", " \n", " \n", " \n", " 0\n", - " MULTILINESTRING ((0.00000 0.00000, 1.00000 1.0...\n", + " 1\n", + " 0.1\n", + " L8\n", + " 2018-12-30 18:22:25\n", + " MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)\n", " \n", " \n", " 1\n", - " MULTILINESTRING ((4.00000 4.00000, 5.00000 5.0...\n", + " 2\n", + " 0.2\n", + " L8\n", + " 2018-01-30 19:22:25\n", + " MULTIPOINT (2.00000 2.00000, 3.00000 3.00000)\n", + " \n", + " \n", + " 2\n", + " 3\n", + " 0.3\n", + " L8\n", + " 2022-01-03 19:22:25\n", + " MULTIPOINT (4.00000 4.00000, 5.00000 5.00000)\n", " \n", " \n", "\n", "" ], "text/plain": [ - " geometry\n", - "0 MULTILINESTRING ((0.00000 0.00000, 1.00000 1.0...\n", - "1 MULTILINESTRING ((4.00000 4.00000, 5.00000 5.0..." + " geoaccuracy cloud_cover satname date \\\n", + "0 1 0.1 L8 2018-12-30 18:22:25 \n", + "1 2 0.2 L8 2018-01-30 19:22:25 \n", + "2 3 0.3 L8 2022-01-03 19:22:25 \n", + "\n", + " geometry \n", + "0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) \n", + "1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000) \n", + "2 MULTIPOINT (4.00000 4.00000, 5.00000 5.00000) " ] }, - "execution_count": 66, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data = {\n", - " \"geometry\": [\n", - " MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),\n", - " MultiLineString([[(4, 4), (5, 5)], [(6, 6), (7, 7)]]),\n", - " ]\n", - " }\n", - "\n", - "\n", - "gdf = gpd.GeoDataFrame(data)\n", - "gdf" + "result = merge_geometries(df)\n", + "result" ] }, { "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'MultiLineString' object is not iterable", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m result \u001b[39m=\u001b[39m convert_lines_to_multipoints(gdf)\n", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m9\n\u001b[0;32m 87\u001b[0m \u001b[39mreturn\u001b[39;00m geometry \u001b[39m# Return the original geometry if it's not a LineString or MultiLineString\u001b[39;00m\n\u001b[0;32m 89\u001b[0m \u001b[39m# Apply the conversion function to each row in the GeoDataFrame\u001b[39;00m\n\u001b[1;32m---> 90\u001b[0m gdf[\u001b[39m'\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m gdf[\u001b[39m'\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m'\u001b[39;49m]\u001b[39m.\u001b[39;49mapply(line_to_multipoint)\n\u001b[0;32m 92\u001b[0m \u001b[39mreturn\u001b[39;00m gdf\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\geopandas\\geoseries.py:645\u001b[0m, in \u001b[0;36mGeoSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 643\u001b[0m \u001b[39m@doc\u001b[39m(pd\u001b[39m.\u001b[39mSeries)\n\u001b[0;32m 644\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\u001b[39mself\u001b[39m, func, convert_dtype\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, args\u001b[39m=\u001b[39m(), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 645\u001b[0m result \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mapply(func, convert_dtype\u001b[39m=\u001b[39mconvert_dtype, args\u001b[39m=\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(result, GeoSeries):\n\u001b[0;32m 647\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcrs \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\series.py:4630\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 4520\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[0;32m 4521\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 4522\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4525\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 4526\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[0;32m 4527\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 4528\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4529\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4628\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[0;32m 4629\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4630\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\u001b[39mself\u001b[39;49m, func, convert_dtype, args, kwargs)\u001b[39m.\u001b[39;49mapply()\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1025\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_str()\n\u001b[0;32m 1024\u001b[0m \u001b[39m# self.f is Callable\u001b[39;00m\n\u001b[1;32m-> 1025\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1076\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1074\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1075\u001b[0m values \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m)\u001b[39m.\u001b[39m_values\n\u001b[1;32m-> 1076\u001b[0m mapped \u001b[39m=\u001b[39m lib\u001b[39m.\u001b[39;49mmap_infer(\n\u001b[0;32m 1077\u001b[0m values,\n\u001b[0;32m 1078\u001b[0m f,\n\u001b[0;32m 1079\u001b[0m convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype,\n\u001b[0;32m 1080\u001b[0m )\n\u001b[0;32m 1082\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1083\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1084\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1085\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\_libs\\lib.pyx:2834\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 43\u001b[0m line \u001b[0;36m8\n\u001b[0;32m 82\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint(geometry\u001b[39m.\u001b[39mcoords)\n\u001b[0;32m 83\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(geometry, MultiLineString):\n\u001b[1;32m---> 84\u001b[0m points \u001b[39m=\u001b[39m [MultiPoint(line\u001b[39m.\u001b[39mcoords) \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m geometry]\n\u001b[0;32m 85\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint([point \u001b[39mfor\u001b[39;00m multi \u001b[39min\u001b[39;00m points \u001b[39mfor\u001b[39;00m point \u001b[39min\u001b[39;00m multi])\n\u001b[0;32m 86\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "\u001b[1;31mTypeError\u001b[0m: 'MultiLineString' object is not iterable" - ] - } - ], - "source": [ - "result = convert_lines_to_multipoints(gdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.iloc[0]['geometry']" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.iloc[0]['geometry'].geoms" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[, ]\n" - ] - } - ], - "source": [ - "if isinstance(gdf.iloc[0]['geometry'], MultiLineString):\n", - " points = [MultiPoint(line.coords) for line in gdf.iloc[0]['geometry'].geoms]\n", - " print(points)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from shapely.geometry import Point,MultiPoint\n", - "\n", - "Point(0,0)\n", - "MultiPoint([Point(0,0).coords])" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'MultiLineString' object is not iterable", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 44\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 8\u001b[0m \u001b[39mreturn\u001b[39;00m geometry \u001b[39m# Return the original geometry if it's not a LineString or MultiLineString\u001b[39;00m\n\u001b[0;32m 10\u001b[0m \u001b[39m# Apply the conversion function to each row in the GeoDataFrame\u001b[39;00m\n\u001b[1;32m---> 11\u001b[0m gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m gdf[\u001b[39m\"\u001b[39;49m\u001b[39mgeometry\u001b[39;49m\u001b[39m\"\u001b[39;49m]\u001b[39m.\u001b[39;49mapply(line_to_multipoint)\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\geopandas\\geoseries.py:645\u001b[0m, in \u001b[0;36mGeoSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 643\u001b[0m \u001b[39m@doc\u001b[39m(pd\u001b[39m.\u001b[39mSeries)\n\u001b[0;32m 644\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\u001b[39mself\u001b[39m, func, convert_dtype\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, args\u001b[39m=\u001b[39m(), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m--> 645\u001b[0m result \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mapply(func, convert_dtype\u001b[39m=\u001b[39mconvert_dtype, args\u001b[39m=\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 646\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(result, GeoSeries):\n\u001b[0;32m 647\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcrs \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\series.py:4630\u001b[0m, in \u001b[0;36mSeries.apply\u001b[1;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[0;32m 4520\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply\u001b[39m(\n\u001b[0;32m 4521\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 4522\u001b[0m func: AggFuncType,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4525\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 4526\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Series:\n\u001b[0;32m 4527\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 4528\u001b[0m \u001b[39m Invoke function on values of Series.\u001b[39;00m\n\u001b[0;32m 4529\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4628\u001b[0m \u001b[39m dtype: float64\u001b[39;00m\n\u001b[0;32m 4629\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4630\u001b[0m \u001b[39mreturn\u001b[39;00m SeriesApply(\u001b[39mself\u001b[39;49m, func, convert_dtype, args, kwargs)\u001b[39m.\u001b[39;49mapply()\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1025\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1022\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_str()\n\u001b[0;32m 1024\u001b[0m \u001b[39m# self.f is Callable\u001b[39;00m\n\u001b[1;32m-> 1025\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\core\\apply.py:1076\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1074\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1075\u001b[0m values \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39mastype(\u001b[39mobject\u001b[39m)\u001b[39m.\u001b[39m_values\n\u001b[1;32m-> 1076\u001b[0m mapped \u001b[39m=\u001b[39m lib\u001b[39m.\u001b[39;49mmap_infer(\n\u001b[0;32m 1077\u001b[0m values,\n\u001b[0;32m 1078\u001b[0m f,\n\u001b[0;32m 1079\u001b[0m convert\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mconvert_dtype,\n\u001b[0;32m 1080\u001b[0m )\n\u001b[0;32m 1082\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(mapped) \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(mapped[\u001b[39m0\u001b[39m], ABCSeries):\n\u001b[0;32m 1083\u001b[0m \u001b[39m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[0;32m 1084\u001b[0m \u001b[39m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[0;32m 1085\u001b[0m \u001b[39mreturn\u001b[39;00m obj\u001b[39m.\u001b[39m_constructor_expanddim(\u001b[39mlist\u001b[39m(mapped), index\u001b[39m=\u001b[39mobj\u001b[39m.\u001b[39mindex)\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\pandas\\_libs\\lib.pyx:2834\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 44\u001b[0m line \u001b[0;36m5\n\u001b[0;32m 3\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint(geometry\u001b[39m.\u001b[39mcoords)\n\u001b[0;32m 4\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(geometry, MultiLineString):\n\u001b[1;32m----> 5\u001b[0m points \u001b[39m=\u001b[39m [MultiPoint(line\u001b[39m.\u001b[39mcoords) \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m geometry]\n\u001b[0;32m 6\u001b[0m \u001b[39mreturn\u001b[39;00m MultiPoint([point \u001b[39mfor\u001b[39;00m multi \u001b[39min\u001b[39;00m points \u001b[39mfor\u001b[39;00m point \u001b[39min\u001b[39;00m multi])\n\u001b[0;32m 7\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "\u001b[1;31mTypeError\u001b[0m: 'MultiLineString' object is not iterable" - ] - } - ], - "source": [ - "def line_to_multipoint(geometry):\n", - " if isinstance(geometry, LineString):\n", - " return MultiPoint(geometry.coords)\n", - " elif isinstance(geometry, MultiLineString):\n", - " points = [MultiPoint(line.coords) for line in geometry]\n", - " return MultiPoint([point for multi in points for point in multi])\n", - " else:\n", - " return geometry # Return the original geometry if it's not a LineString or MultiLineString\n", - "\n", - "# Apply the conversion function to each row in the GeoDataFrame\n", - "gdf[\"geometry\"] = gdf[\"geometry\"].apply(line_to_multipoint)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geoaccuracycloud_coversatnamedategeometry
010.1L82018-12-30 18:22:25MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)
120.2L82018-01-30 19:22:25MULTIPOINT (2.00000 2.00000, 3.00000 3.00000)
230.3L82022-01-03 19:22:25MULTIPOINT (4.00000 4.00000, 5.00000 5.00000)
\n", - "
" - ], - "text/plain": [ - " geoaccuracy cloud_cover satname date \\\n", - "0 1 0.1 L8 2018-12-30 18:22:25 \n", - "1 2 0.2 L8 2018-01-30 19:22:25 \n", - "2 3 0.3 L8 2022-01-03 19:22:25 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000) \n", - "1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000) \n", - "2 MULTIPOINT (4.00000 4.00000, 5.00000 5.00000) " - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from coastseg.merge_utils import merge_geometries\n", - "\n", - "# create a list of geometries\n", - "geometries = [\n", - " MultiPoint([(0, 0), (1, 1)]),\n", - " MultiPoint([(2, 2), (3, 3)]),\n", - " MultiPoint([(4, 4), (5, 5)]),\n", - "]\n", - "# create a dictionary with the other columns\n", - "data = {\n", - " \"geoaccuracy\": [1, 2, 3],\n", - " \"cloud_cover\": [0.1, 0.2, 0.3],\n", - " \"satname\": [\"L8\", \"L8\", \"L8\"],\n", - " \"date\": [\n", - " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", - " pd.Timestamp(\"2018-1-30 19:22:25\"),\n", - " pd.Timestamp(\"2022-01-03 19:22:25\"),\n", - " ],\n", - " \"geometry\": geometries,\n", - "}\n", - "# create a GeoDataFrame from the dictionary\n", - "df = gpd.GeoDataFrame(data, geometry=\"geometry\", crs=\"epsg:4326\")\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geoaccuracycloud_coversatnamedate
010.1L82018-12-30 18:22:25
120.2L82018-01-30 19:22:25
230.3L82022-01-03 19:22:25
\n", - "
" - ], - "text/plain": [ - " geoaccuracy cloud_cover satname date\n", - "0 1 0.1 L8 2018-12-30 18:22:25\n", - "1 2 0.2 L8 2018-01-30 19:22:25\n", - "2 3 0.3 L8 2022-01-03 19:22:25" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result = merge_geometries(df)\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 118, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -2797,7 +1879,7 @@ "2 6.596 L8 0.263967 " ] }, - "execution_count": 118, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2832,7 +1914,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -2904,7 +1986,7 @@ "2 0.263967 " ] }, - "execution_count": 10, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2936,7 +2018,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -3008,7 +2090,7 @@ "2 0.263967 " ] }, - "execution_count": 102, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3039,7 +2121,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -3064,65 +2146,131 @@ " \n", " \n", " date\n", - " geometry_left\n", - " satname\n", - " geometry_right\n", - " cloud_cover\n", - " geoaccuracy\n", + " geometry_x\n", + " geoaccuracy_x\n", + " satname_x\n", + " cloud_cover_x\n", + " geometry_y\n", + " geoaccuracy_y\n", + " satname_y\n", + " cloud_cover_y\n", " \n", " \n", " \n", " \n", " 0\n", " 2018-12-30 18:22:25\n", - " MULTIPOINT (-117.45892 33.28226)\n", + " MULTIPOINT (-117.45892 33.28226, -118.45892 35...\n", + " 5.088\n", " L8\n", - " MULTIPOINT (-117.44480 33.26540)\n", " 0.000000\n", + " MULTIPOINT (-117.44480 33.26540)\n", " 5.088\n", + " L8\n", + " 0.000000\n", + " \n", + " \n", + " 1\n", + " 2020-05-23 19:24:27\n", + " MULTIPOINT (-117.45875 33.28242)\n", + " 6.596\n", + " L8\n", + " 0.263967\n", + " MULTIPOINT (-117.45896 33.28226)\n", + " 6.596\n", + " L8\n", + " 0.263967\n", + " \n", + " \n", + " 0\n", + " 2019-01-28 05:12:28\n", + " MULTIPOINT (-117.45881 33.28239, -120.45892 40...\n", + " 5.802\n", + " L8\n", + " 0.230000\n", + " MULTIPOINT (-117.45894 33.28226)\n", + " 5.802\n", + " L9\n", + " 0.100000\n", " \n", " \n", " 1\n", " 2020-05-23 19:24:27\n", " MULTIPOINT (-117.45875 33.28242)\n", + " 6.596\n", + " L8\n", + " 0.263967\n", + " MULTIPOINT (-117.45891 33.28232)\n", + " 6.596\n", " L8\n", + " 0.263967\n", + " \n", + " \n", + " 0\n", + " 2020-05-23 19:24:27\n", " MULTIPOINT (-117.45896 33.28226)\n", + " 6.596\n", + " L8\n", " 0.263967\n", + " MULTIPOINT (-117.45891 33.28232)\n", " 6.596\n", + " L8\n", + " 0.263967\n", " \n", " \n", "\n", "" ], "text/plain": [ - " date geometry_left satname \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", - "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", - "\n", - " geometry_right cloud_cover geoaccuracy \n", - "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", - "1 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 " + " date geometry_x \\\n", + "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226, -118.45892 35... \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) \n", + "0 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239, -120.45892 40... \n", + "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) \n", + "0 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) \n", + "\n", + " geoaccuracy_x satname_x cloud_cover_x geometry_y \\\n", + "0 5.088 L8 0.000000 MULTIPOINT (-117.44480 33.26540) \n", + "1 6.596 L8 0.263967 MULTIPOINT (-117.45896 33.28226) \n", + "0 5.802 L8 0.230000 MULTIPOINT (-117.45894 33.28226) \n", + "1 6.596 L8 0.263967 MULTIPOINT (-117.45891 33.28232) \n", + "0 6.596 L8 0.263967 MULTIPOINT (-117.45891 33.28232) \n", + "\n", + " geoaccuracy_y satname_y cloud_cover_y \n", + "0 5.088 L8 0.000000 \n", + "1 6.596 L8 0.263967 \n", + "0 5.802 L9 0.100000 \n", + "1 6.596 L8 0.263967 \n", + "0 6.596 L8 0.263967 " ] }, - "execution_count": 114, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from coastseg.merge_utils import merge_geodataframes\n", + "from itertools import combinations\n", "\n", - "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", - "aggregation_funcs = {\n", - " 'cloud_cover': 'mean',\n", - " 'geoaccuracy': 'mean'\n", - " }\n", - "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2)" + "# Put all dataframes in a list\n", + "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Initialize an empty list to store the merged dataframes\n", + "merged_dfs = []\n", + "\n", + "# Loop over all combinations of 2 dataframes\n", + "for df_a, df_b in combinations(dfs, 2):\n", + " # Perform an 'inner' merge and append the result to the list\n", + " merged_dfs.append(df_a.merge(df_b, on='date', how='inner'))\n", + "\n", + "# Concatenate all the merged dataframes\n", + "final_df = pd.concat(merged_dfs)\n", + "final_df" ] }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -3147,2229 +2295,241 @@ " \n", " \n", " date\n", - " geometry_left\n", " satname\n", - " geometry_right\n", - " geometry\n", " cloud_cover\n", " geoaccuracy\n", + " geometry\n", " \n", " \n", " \n", " \n", " 0\n", + " 2015-12-30 18:22:25\n", + " L9\n", + " 0.000000\n", + " 5.088\n", + " POINT (-117.45896 33.28226)\n", + " \n", + " \n", + " 1\n", + " 2018-12-30 18:22:25\n", + " L8\n", + " 0.000000\n", + " 5.088\n", + " MULTIPOINT (-118.45892 35.28226, -117.45892 33...\n", + " \n", + " \n", + " 2\n", + " 2019-01-28 05:12:28\n", + " L8\n", + " 0.230000\n", + " 5.802\n", + " MULTIPOINT (-120.45892 40.28226, -117.45881 33...\n", + " \n", + " \n", + " 3\n", + " 2019-01-28 05:12:28\n", + " L9\n", + " 0.100000\n", + " 5.802\n", + " POINT (-117.45894 33.28226)\n", + " \n", + " \n", + " 4\n", + " 2020-01-28 05:12:28\n", + " L8\n", + " 0.000000\n", + " 5.802\n", + " POINT (-117.45899 33.28226)\n", + " \n", + " \n", + " 5\n", " 2020-05-23 19:24:27\n", - " MULTIPOINT (-117.45875 33.28242)\n", " L8\n", - " MULTIPOINT (-117.45896 33.28226)\n", - " MULTIPOINT (-117.45891 33.28232)\n", " 0.263967\n", " 6.596\n", + " MULTIPOINT (-117.45896 33.28226, -117.45891 33...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " date geometry_left satname \\\n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", - "\n", - " geometry_right geometry \\\n", - "0 MULTIPOINT (-117.45896 33.28226) MULTIPOINT (-117.45891 33.28232) \n", + " date satname cloud_cover geoaccuracy \\\n", + "0 2015-12-30 18:22:25 L9 0.000000 5.088 \n", + "1 2018-12-30 18:22:25 L8 0.000000 5.088 \n", + "2 2019-01-28 05:12:28 L8 0.230000 5.802 \n", + "3 2019-01-28 05:12:28 L9 0.100000 5.802 \n", + "4 2020-01-28 05:12:28 L8 0.000000 5.802 \n", + "5 2020-05-23 19:24:27 L8 0.263967 6.596 \n", "\n", - " cloud_cover geoaccuracy \n", - "0 0.263967 6.596 " + " geometry \n", + "0 POINT (-117.45896 33.28226) \n", + "1 MULTIPOINT (-118.45892 35.28226, -117.45892 33... \n", + "2 MULTIPOINT (-120.45892 40.28226, -117.45881 33... \n", + "3 POINT (-117.45894 33.28226) \n", + "4 POINT (-117.45899 33.28226) \n", + "5 MULTIPOINT (-117.45896 33.28226, -117.45891 33... " ] }, - "execution_count": 115, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from coastseg.merge_utils import merge_geodataframes\n", + "from typing import List, Optional, Union\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from shapely.ops import unary_union\n", + "from coastseg.merge_utils import merge_geometries\n", "\n", - "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", - "aggregation_funcs = {\n", - " 'cloud_cover': 'mean',\n", - " 'geoaccuracy': 'mean'\n", - " }\n", - "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2, extracted_gdf3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from coastseg.merge_utils import merge_geodataframes\n", "\n", - "# merged_gdf = merge_geodataframes(['date', 'satname'],'inner', aggregation_funcs,'epsg:4326', *gdfs)\n", - "aggregation_funcs = {\n", - " 'cloud_cover': 'mean',\n", - " 'geoaccuracy': 'mean'\n", - " }\n", + "def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " # Perform a full outer join\n", + " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", + "\n", + " # Identify numeric columns from both dataframes\n", + " numeric_columns_df1 = df1.select_dtypes(include='number').columns\n", + " numeric_columns_df2 = df2.select_dtypes(include='number').columns\n", + " common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", + "\n", + " # Average the numeric columns\n", + " for column in common_numeric_columns:\n", + " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", + "\n", + " # Drop the original numeric columns\n", + " merged.drop(columns=[f'{column}_df1' for column in common_numeric_columns] + [f'{column}_df2' for column in common_numeric_columns], inplace=True)\n", + "\n", + " # Merge geometries\n", + " geometry_columns = [col for col in merged.columns if 'geometry' in col]\n", + " merged = merge_geometries(merged, columns=geometry_columns)\n", + "\n", + " return merged\n", + "\n", + "# List of GeoDataFrames\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "\n", + "result.sort_values(by='date', inplace=True)\n", + "result.reset_index(drop=True, inplace=True)\n", + "\n", + "assert len(result) == 6\n", + "assert result[['date', 'satname']].duplicated().sum() == 0, \"The combination of 'date' and 'satname' is not unique.\"\n", + "# assert np.all(result['cloud_cover'] == [0.0, 0.115, 0.263967, 0.0, 0.0, 0.1])\n", "\n", - "merge_geodataframes( ['date', 'satname'],'inner',aggregation_funcs,'epsg:4326', extracted_gdf1, extracted_gdf2, extracted_gdf3)" + "result \n" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_ygeometrygeoaccuracysatnamecloud_cover
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", - "
" - ], "text/plain": [ - " date geometry_x geoaccuracy_x \\\n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "\n", - " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", - "0 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "\n", - " satname_y cloud_cover_y geometry geoaccuracy \\\n", - "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "\n", - " satname cloud_cover \n", - "0 L8 0.263967 " + "0 2015-12-30 18:22:25\n", + "1 2018-12-30 18:22:25\n", + "2 2019-01-28 05:12:28\n", + "3 2019-01-28 05:12:28\n", + "4 2020-01-28 05:12:28\n", + "5 2020-05-23 19:24:27\n", + "Name: date, dtype: datetime64[ns]" ] }, - "execution_count": 14, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# this code did not work it only found the dates in common across all the dataframes.\n", - "from functools import reduce\n", - "\n", - "# Step 1: Find the common dates using set intersection\n", - "common_dates = set(extracted_gdf1['date']).intersection(extracted_gdf2['date'], extracted_gdf3['date'])\n", - "\n", - "# Step 2: Filter the dataframes to only include rows with the common date\n", - "dfs_filtered = [df[df['date'].isin(common_dates)] for df in [extracted_gdf1, extracted_gdf2, extracted_gdf3]]\n", - "\n", - "# Step 3: Perform a single merge operation on the filtered dataframes\n", - "final_df = reduce(lambda left, right: pd.merge(left, right, on='date', how='inner'), dfs_filtered)\n", - "final_df" + "result['date']" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_ygeometrygeoaccuracysatnamecloud_cover
02020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", - "
" - ], - "text/plain": [ - " date geometry_x geoaccuracy_x \\\n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "\n", - " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", - "0 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "\n", - " satname_y cloud_cover_y geometry geoaccuracy \\\n", - "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "\n", - " satname cloud_cover \n", - "0 L8 0.263967 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# only got dates in common across all the dataframes\n", - "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", - "\n", - "# Assuming dfs is a list of all your dataframes\n", - "common_dates = set(dfs[0]['date'])\n", - "for df in dfs[1:]:\n", - " common_dates = common_dates.intersection(df['date'])\n", - "\n", - "dfs_filtered = [df[df['date'].isin(common_dates)] for df in dfs]\n", - "final_df = reduce(lambda left, right: pd.merge(left, right, on='date', how='inner'), dfs_filtered)\n", - "final_df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_xgeoaccuracy_xsatname_xcloud_cover_xgeometry_ygeoaccuracy_ysatname_ycloud_cover_y
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)5.088L80.000000MULTIPOINT (-117.44480 33.26540)5.088L80.000000
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.596L80.263967
02019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)5.802L80.230000MULTIPOINT (-117.45894 33.28226)5.802L90.100000
12020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
02020-05-23 19:24:27MULTIPOINT (-117.45896 33.28226)6.596L80.263967MULTIPOINT (-117.45891 33.28232)6.596L80.263967
\n", - "
" - ], - "text/plain": [ - " date geometry_x geoaccuracy_x \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", - "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "0 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) 5.802 \n", - "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "\n", - " satname_x cloud_cover_x geometry_y geoaccuracy_y \\\n", - "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", - "1 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "0 L8 0.230000 MULTIPOINT (-117.45894 33.28226) 5.802 \n", - "1 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "\n", - " satname_y cloud_cover_y \n", - "0 L8 0.000000 \n", - "1 L8 0.263967 \n", - "0 L9 0.100000 \n", - "1 L8 0.263967 \n", - "0 L8 0.263967 " - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from itertools import combinations\n", - "\n", - "# Put all dataframes in a list\n", - "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", - "\n", - "# Initialize an empty list to store the merged dataframes\n", - "merged_dfs = []\n", - "\n", - "# Loop over all combinations of 2 dataframes\n", - "for df_a, df_b in combinations(dfs, 2):\n", - " # Perform an 'inner' merge and append the result to the list\n", - " merged_dfs.append(df_a.merge(df_b, on='date', how='inner'))\n", - "\n", - "# Concatenate all the merged dataframes\n", - "final_df = pd.concat(merged_dfs)\n", - "final_df" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Cannot interpret '' as a data type", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 63\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 23\u001b[0m \u001b[39mreturn\u001b[39;00m merged\n\u001b[0;32m 26\u001b[0m \u001b[39m# Perform a full outer join and average the numeric columns across all dataframes\u001b[39;00m\n\u001b[1;32m---> 27\u001b[0m result \u001b[39m=\u001b[39m reduce(merge_and_average, dfs)\n", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 63\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 11\u001b[0m \u001b[39m# Loop over all columns\u001b[39;00m\n\u001b[0;32m 12\u001b[0m \u001b[39mfor\u001b[39;00m column \u001b[39min\u001b[39;00m \u001b[39mset\u001b[39m(df1\u001b[39m.\u001b[39mcolumns)\u001b[39m.\u001b[39mintersection(df2\u001b[39m.\u001b[39mcolumns):\n\u001b[0;32m 13\u001b[0m \u001b[39m# Skip non-numeric columns\u001b[39;00m\n\u001b[1;32m---> 14\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m np\u001b[39m.\u001b[39;49missubdtype(df1[column]\u001b[39m.\u001b[39;49mdtype, np\u001b[39m.\u001b[39;49mnumber):\n\u001b[0;32m 15\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m 17\u001b[0m \u001b[39m# Average the values in the two columns\u001b[39;00m\n", - "File \u001b[1;32mc:\\Users\\sf230\\anaconda3\\envs\\coastseg_transformers10\\lib\\site-packages\\numpy\\core\\numerictypes.py:417\u001b[0m, in \u001b[0;36missubdtype\u001b[1;34m(arg1, arg2)\u001b[0m\n\u001b[0;32m 359\u001b[0m \u001b[39m\u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 360\u001b[0m \u001b[39mReturns True if first argument is a typecode lower/equal in type hierarchy.\u001b[39;00m\n\u001b[0;32m 361\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 414\u001b[0m \n\u001b[0;32m 415\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 416\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m issubclass_(arg1, generic):\n\u001b[1;32m--> 417\u001b[0m arg1 \u001b[39m=\u001b[39m dtype(arg1)\u001b[39m.\u001b[39mtype\n\u001b[0;32m 418\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m issubclass_(arg2, generic):\n\u001b[0;32m 419\u001b[0m arg2 \u001b[39m=\u001b[39m dtype(arg2)\u001b[39m.\u001b[39mtype\n", - "\u001b[1;31mTypeError\u001b[0m: Cannot interpret '' as a data type" - ] - } - ], - "source": [ - "from functools import reduce\n", - "import numpy as np\n", - "\n", - "# only got dates in common across all the dataframes\n", - "dfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", - "\n", - "def merge_and_average(df1, df2):\n", - " # Perform a full outer join\n", - " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "\n", - " # Loop over all columns\n", - " for column in set(df1.columns).intersection(df2.columns):\n", - " # Skip non-numeric columns\n", - " if not np.issubdtype(df1[column].dtype, np.number):\n", - " continue\n", - "\n", - " # Average the values in the two columns\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - "\n", - " # Drop the original columns\n", - " merged.drop(columns=[col for col in merged.columns if '_df1' in col or '_df2' in col], inplace=True)\n", - "\n", - " return merged\n", - "\n", - "\n", - "# Perform a full outer join and average the numeric columns across all dataframes\n", - "result = reduce(merge_and_average, dfs)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "float64\n", - "0 0.000000\n", - "1 0.230000\n", - "2 0.263967\n", - "Name: cloud_cover, dtype: float64\n", - "float64\n", - "0 5.088\n", - "1 5.802\n", - "2 6.596\n", - "Name: geoaccuracy, dtype: float64\n", - "object\n", - "datetime64[ns]\n", - "geometry\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", - "1 POINT (-117.45881 33.28239) \n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", - "3 POINT (-117.45899 33.28226) " - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from shapely.ops import unary_union\n", - "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "# Loop over all columns\n", - "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", - " # Skip non-numeric columns\n", - " # print(extracted_gdf1[column])\n", - " print(extracted_gdf1[column].dtype)\n", - " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", - " columns = [col for col in merged.columns if \"geometry\" in col]\n", - " merged[\"geometry\"] = merged[columns].apply(\n", - " lambda row: unary_union(row.tolist()), axis=1\n", - " )\n", - " # drop the rows that were merged\n", - " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", - " continue\n", - " if not np.issubdtype(extracted_gdf1[column].dtype, np.number):\n", - " continue\n", - " # if not pd.api.types.is_numeric_dtype(extracted_gdf1[column]):\n", - " # continue\n", - " print(extracted_gdf1[column])\n", - " # Average the values in the two columns\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", - " \n", - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", - "1 POINT (-117.45881 33.28239) \n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", - "3 POINT (-117.45899 33.28226) " - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from shapely.ops import unary_union\n", - "from coastseg.merge_utils import merge_geometries\n", - "\n", - "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "# Loop over all columns\n", - "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", - " # merge the geometries\n", - " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", - " merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", - " continue\n", - " # Skip non-numeric columns\n", - " if not np.issubdtype(extracted_gdf1[column].dtype, np.number):\n", - " continue\n", - " # Average the values in the two columns\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", - " \n", - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "merging {['geometry_df1', 'geometry_df2']}\n", - "0 MULTIPOINT (-117.45892 33.28226, -118.45892 35...\n", - "1 MULTIPOINT (-117.45881 33.28239, -120.45892 40...\n", - "2 MULTIPOINT (-117.45875 33.28242)\n", - "Name: geometry, dtype: geometry\n", - "0 MULTIPOINT (-117.44480 33.26540)\n", - "1 MULTIPOINT (-117.45899 33.28226)\n", - "2 MULTIPOINT (-117.45896 33.28226)\n", - "Name: geometry, dtype: geometry\n", - "merging {['geometry_df1', 'geometry_df2']}\n", - "0 MULTIPOINT (-118.45892 35.28226, -117.45892 33...\n", - "1 MULTIPOINT (-120.45892 40.28226, -117.45881 33...\n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33...\n", - "3 POINT (-117.45899 33.28226)\n", - "Name: geometry, dtype: geometry\n", - "0 MULTIPOINT (-117.45896 33.28226)\n", - "1 MULTIPOINT (-117.45894 33.28226)\n", - "2 MULTIPOINT (-117.45891 33.28232)\n", - "Name: geometry, dtype: geometry\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-118.45892 35.28226, -117.45892 33...
12019-01-28 05:12:28L80.2300005.802MULTIPOINT (-120.45892 40.28226, -117.45881 33...
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
42015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
52019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "4 2015-12-30 18:22:25 L9 0.000000 5.088 \n", - "5 2019-01-28 05:12:28 L9 0.100000 5.802 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-118.45892 35.28226, -117.45892 33... \n", - "1 MULTIPOINT (-120.45892 40.28226, -117.45881 33... \n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45891 33... \n", - "3 POINT (-117.45899 33.28226) \n", - "4 POINT (-117.45896 33.28226) \n", - "5 POINT (-117.45894 33.28226) " - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from shapely.ops import unary_union\n", - "from coastseg.merge_utils import merge_geometries\n", - "from functools import reduce\n", - "import pandas as pd\n", - "\n", - "\n", - "def merge_geometries(merged_gdf, columns=None, operation=unary_union):\n", - " \"\"\"\n", - " Performs a specified operation for the geometries with the same date and satname.\n", - "\n", - " Parameters:\n", - " merged_gdf : GeoDataFrame\n", - " The GeoDataFrame to perform the operation on.\n", - " columns : list of str, optional\n", - " The columns to perform the operation on. If None, all columns with 'geometry' in the name are used.\n", - " operation : function, optional\n", - " The operation to perform. If None, unary_union is used.\n", - "\n", - " Returns:\n", - " GeoDataFrame\n", - " The GeoDataFrame with the operation performed.\n", - " \"\"\"\n", - " if columns is None:\n", - " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", - " else:\n", - " columns = [col for col in columns if col in merged_gdf.columns]\n", - "\n", - " merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", - " lambda row: operation(row.tolist()), axis=1\n", - " )\n", - " for col in columns:\n", - " if col in merged_gdf.columns:\n", - " merged_gdf = merged_gdf.drop(columns=col)\n", - " return merged_gdf\n", - "\n", - "def merge_and_average(df1, df2):\n", - " # Perform a full outer join\n", - " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "\n", - " # Loop over all columns\n", - " for column in set(df1.columns).intersection(df2.columns):\n", - " # Merge the geometries\n", - " \n", - " if isinstance(df1[column].dtype, gpd.array.GeometryDtype):\n", - " print(f\"merging {{['{column}_df1', '{column}_df2']}}\")\n", - " print(df1[column])\n", - " print(df2[column])\n", - " # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", - " merged = merge_geometries(merged)\n", - " continue\n", - " # Skip non-numeric columns\n", - " if not pd.api.types.is_numeric_dtype(df1[column]):\n", - " continue\n", - " # Average the values in the two columns\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - " merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True)\n", - "\n", - " return merged\n", - "\n", - "# List of GeoDataFrames\n", - "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", - "\n", - "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", - "result = reduce(merge_and_average, gdfs)\n", - "\n", - "result\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "geometry\n", - "merging {['geometry_df1', 'geometry_df2']}\n", - "0 MULTIPOINT (-117.45892 33.28226)\n", - "1 MULTIPOINT (-117.45881 33.28239)\n", - "2 MULTIPOINT (-117.45875 33.28242)\n", - "Name: geometry, dtype: geometry\n", - "0 MULTIPOINT (-117.44480 33.26540)\n", - "1 MULTIPOINT (-117.45899 33.28226)\n", - "2 MULTIPOINT (-117.45896 33.28226)\n", - "Name: geometry, dtype: geometry\n" - ] - } - ], - "source": [ - "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "# Loop over all columns\n", - "for column in set(extracted_gdf1.columns).intersection(extracted_gdf2.columns):\n", - " # Merge the geometries\n", - " \n", - " \n", - " if isinstance(extracted_gdf1[column].dtype, gpd.array.GeometryDtype):\n", - " print(column)\n", - " print(f\"merging {{['{column}_df1', '{column}_df2']}}\")\n", - " print(extracted_gdf1[column])\n", - " print(extracted_gdf2[column])\n", - " # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union)\n", - " merged = merge_geometries(merged)" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [], - "source": [ - "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_df1geoaccuracy_df1satnamecloud_cover_df1geometry_df2geoaccuracy_df2cloud_cover_df2
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)5.088L80.000000MULTIPOINT (-117.44480 33.26540)5.0880.000000
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)5.802L80.230000NoneNaNNaN
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)6.596L80.263967MULTIPOINT (-117.45896 33.28226)6.5960.263967
32020-01-28 05:12:28NoneNaNL8NaNMULTIPOINT (-117.45899 33.28226)5.8020.000000
\n", - "
" - ], - "text/plain": [ - " date geometry_df1 geoaccuracy_df1 \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", - "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) 5.802 \n", - "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "3 2020-01-28 05:12:28 None NaN \n", - "\n", - " satname cloud_cover_df1 geometry_df2 geoaccuracy_df2 \\\n", - "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", - "1 L8 0.230000 None NaN \n", - "2 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "3 L8 NaN MULTIPOINT (-117.45899 33.28226) 5.802 \n", - "\n", - " cloud_cover_df2 \n", - "0 0.000000 \n", - "1 NaN \n", - "2 0.263967 \n", - "3 0.000000 " - ] - }, - "execution_count": 117, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'merged_gdf' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 68\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m col]\n\u001b[0;32m 3\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m----> 4\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns \u001b[39mif\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns]\n\u001b[0;32m 5\u001b[0m merged_gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m merged_gdf[columns]\u001b[39m.\u001b[39mapply(\n\u001b[0;32m 6\u001b[0m \u001b[39mlambda\u001b[39;00m row: unary_union(row\u001b[39m.\u001b[39mtolist()), axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns:\n", - "\u001b[1;32mc:\\development\\doodleverse\\coastseg\\CoastSeg\\merge_session_final_nonoverlapping_script.ipynb Cell 68\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m col]\n\u001b[0;32m 3\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m----> 4\u001b[0m columns \u001b[39m=\u001b[39m [col \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns \u001b[39mif\u001b[39;00m col \u001b[39min\u001b[39;00m merged_gdf\u001b[39m.\u001b[39mcolumns]\n\u001b[0;32m 5\u001b[0m merged_gdf[\u001b[39m\"\u001b[39m\u001b[39mgeometry\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m merged_gdf[columns]\u001b[39m.\u001b[39mapply(\n\u001b[0;32m 6\u001b[0m \u001b[39mlambda\u001b[39;00m row: unary_union(row\u001b[39m.\u001b[39mtolist()), axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m 7\u001b[0m )\n\u001b[0;32m 8\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m columns:\n", - "\u001b[1;31mNameError\u001b[0m: name 'merged_gdf' is not defined" - ] - } - ], - "source": [ - "merged_gdf = merged\n", - "\n", - "if columns is None:\n", - " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", - "else:\n", - " columns = [col for col in columns if col in merged_gdf.columns]\n", - "merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", - " lambda row: unary_union(row.tolist()), axis=1\n", - ")\n", - "for col in columns:\n", - " if col in merged_gdf.columns:\n", - " merged_gdf = merged_gdf.drop(columns=col)\n", - " \n", - "merged_gdf\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry
02015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
12018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
22019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
32019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
42020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
52020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2015-12-30 18:22:25 L9 0.000000 5.088 \n", - "1 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "2 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "3 2019-01-28 05:12:28 L9 0.100000 5.802 \n", - "4 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "5 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "\n", - " geometry \n", - "0 POINT (-117.45896 33.28226) \n", - "1 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", - "2 POINT (-117.45881 33.28239) \n", - "3 POINT (-117.45894 33.28226) \n", - "4 POINT (-117.45899 33.28226) \n", - "5 MULTIPOINT (-117.45896 33.28226, -117.45891 33... " - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result.sort_values(by='date', inplace=True)\n", - "result.reset_index(drop=True, inplace=True)\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry
02018-12-30 18:22:25L80.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27L80.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45891 33...
32020-01-28 05:12:28L80.0000005.802POINT (-117.45899 33.28226)
42015-12-30 18:22:25L90.0000005.088POINT (-117.45896 33.28226)
52019-01-28 05:12:28L90.1000005.802POINT (-117.45894 33.28226)
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "4 2015-12-30 18:22:25 L9 0.000000 5.088 \n", - "5 2019-01-28 05:12:28 L9 0.100000 5.802 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", - "1 POINT (-117.45881 33.28239) \n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45891 33... \n", - "3 POINT (-117.45899 33.28226) \n", - "4 POINT (-117.45896 33.28226) \n", - "5 POINT (-117.45894 33.28226) " - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from typing import List, Optional, Union\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "from shapely.ops import unary_union\n", - "from coastseg.merge_utils import merge_geometries\n", - "\n", - "\n", - "def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", - " # Perform a full outer join\n", - " merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "\n", - " # Identify numeric columns from both dataframes\n", - " numeric_columns_df1 = df1.select_dtypes(include='number').columns\n", - " numeric_columns_df2 = df2.select_dtypes(include='number').columns\n", - " common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", - "\n", - " # Average the numeric columns\n", - " for column in common_numeric_columns:\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - "\n", - " # Drop the original numeric columns\n", - " merged.drop(columns=[f'{column}_df1' for column in common_numeric_columns] + [f'{column}_df2' for column in common_numeric_columns], inplace=True)\n", - "\n", - " # Merge geometries\n", - " geometry_columns = [col for col in merged.columns if 'geometry' in col]\n", - " merged = merge_geometries(merged, columns=geometry_columns)\n", - "\n", - " return merged\n", - "\n", - "# List of GeoDataFrames\n", - "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", - "\n", - "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", - "result = reduce(merge_and_average, gdfs)\n", - "\n", - "result\n" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_df1satnamegeometry_df2cloud_covergeoaccuracy
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)L8MULTIPOINT (-117.44480 33.26540)0.0000005.088
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)L8None0.2300005.802
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)0.2639676.596
32020-01-28 05:12:28NoneL8MULTIPOINT (-117.45899 33.28226)0.0000005.802
\n", - "
" - ], - "text/plain": [ - " date geometry_df1 satname \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", - "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) L8 \n", - "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", - "3 2020-01-28 05:12:28 None L8 \n", - "\n", - " geometry_df2 cloud_cover geoaccuracy \n", - "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", - "1 None 0.230000 5.802 \n", - "2 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 \n", - "3 MULTIPOINT (-117.45899 33.28226) 0.000000 5.802 " - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "# Perform a full outer join\n", - "merged = pd.merge(extracted_gdf1, extracted_gdf2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2'))\n", - "# Identify numeric columns from both dataframes\n", - "numeric_columns_df1 = extracted_gdf1.select_dtypes(include='number').columns\n", - "numeric_columns_df2 = extracted_gdf2.select_dtypes(include='number').columns\n", - "common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", - "# Average the numeric columns\n", - "for column in common_numeric_columns:\n", - " merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1)\n", - "# Drop the original numeric columns\n", - "merged.drop(columns=[f'{column}_df1' for column in common_numeric_columns] + [f'{column}_df2' for column in common_numeric_columns], inplace=True)\n", - "# Merge geometries\n", - "geometry_columns = [col for col in merged.columns if 'geometry' in col]\n", - "merged\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometry_df1geometry_df2
satnamedate
L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
2019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
2020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
2020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", - "
" - ], - "text/plain": [ - " geometry_df1 \\\n", - "satname date \n", - "L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", - " 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", - " 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", - " 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", - "\n", - " geometry_df2 \n", - "satname date \n", - "L8 2018-12-30 18:22:25 POINT (-117.44480 33.26540) \n", - " 2019-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", - " 2020-01-28 05:12:28 POINT (-117.45899 33.28226) \n", - " 2020-05-23 19:24:27 POINT (-117.45896 33.28226) " - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if columns is None:\n", - " columns = [col for col in merged.columns if \"geometry\" in col]\n", - "columns\n", - "# # Use groupby and agg to perform the operation more efficiently\n", - "geometries = merged.groupby(['satname', 'date'])[columns].agg(unary_union)\n", - "geometries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if columns is None:\n", - " columns = [col for col in merged.columns if \"geometry\" in col]\n", - "\n", - "# Concatenate the geometry columns into a list\n", - "merged['geometry_list'] = merged[columns].values.tolist()\n", - "\n", - "# Apply unary_union to each list of geometries\n", - "merged['geometry'] = merged['geometry_list'].apply(unary_union)\n", - "\n", - "# Drop the original geometry columns and the 'geometry_list' column\n", - "merged.drop(columns=columns + ['geometry_list'], inplace=True)\n", - "\n", - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "geometry_df1 POINT (-117.45892 33.28226)\n", - "geometry_df2 POINT (-117.4448 33.2654)\n", - "Name: (L8, 2018-12-30 18:22:25), dtype: object" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "geometries.iloc[0][columns].agg(unary_union)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [], - "source": [ - "combined_geometry = unary_union(geometries[columns].values)" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[, ],\n", - " [, ],\n", - " [, ],\n", - " [, ]],\n", - " dtype=object)" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "geometries[columns].values" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MULTIPOINT (-117.45899 33.28226, -117.45896 33.28226, -117.45892 33.28226, -117.45881 33.28239, -117.45875 33.28242, -117.4448 33.2654)\n" - ] - } - ], - "source": [ - "print(combined_geometry)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [], - "source": [ - "geometries = geometries.rename(columns={columns[0]: 'geometry'})" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometrygeometry_df2
satnamedate
L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
2019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
2020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
2020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", - "
" - ], - "text/plain": [ - " geometry \\\n", - "satname date \n", - "L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", - " 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", - " 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", - " 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", - "\n", - " geometry_df2 \n", - "satname date \n", - "L8 2018-12-30 18:22:25 POINT (-117.44480 33.26540) \n", - " 2019-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", - " 2020-01-28 05:12:28 POINT (-117.45899 33.28226) \n", - " 2020-05-23 19:24:27 POINT (-117.45896 33.28226) " - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "geometries" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracy
02018-12-30 18:22:25L80.0000005.088
12019-01-28 05:12:28L80.2300005.802
22020-05-23 19:24:27L80.2639676.596
32020-01-28 05:12:28L80.0000005.802
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088\n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802\n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596\n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "if columns is None:\n", - " columns = [col for col in merged.columns if \"geometry\" in col]\n", - "columns\n", - "# # Use groupby and agg to perform the operation more efficiently\n", - "geometries = merged.groupby(['satname', 'date'])[columns].agg(unary_union)\n", - "geometries\n", - "# # Create a new GeoDataFrame\n", - "# return gpd.GeoDataFrame(merged_gdf.drop(columns=columns).join(geometries))\n", - "\n", - "merged.drop(columns=columns)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datesatnamecloud_covergeoaccuracygeometry_df1geometry_df2
02018-12-30 18:22:25L80.0000005.088POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
12019-01-28 05:12:28L80.2300005.802POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
22020-05-23 19:24:27L80.2639676.596POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
32020-01-28 05:12:28L80.0000005.802GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
\n", - "
" - ], - "text/plain": [ - " date satname cloud_cover geoaccuracy \\\n", - "0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "\n", - " geometry_df1 geometry_df2 \n", - "0 POINT (-117.45892 33.28226) POINT (-117.44480 33.26540) \n", - "1 POINT (-117.45881 33.28239) GEOMETRYCOLLECTION EMPTY \n", - "2 POINT (-117.45875 33.28242) POINT (-117.45896 33.28226) \n", - "3 GEOMETRYCOLLECTION EMPTY POINT (-117.45899 33.28226) " - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Drop the geometry columns from the merged DataFrame\n", - "merged_no_geo = merged.drop(columns=columns)\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "from shapely.geometry import MultiPoint, MultiLineString, LineString, Point\n", "\n", - "# Merge the geometries back into the DataFrame\n", - "result = pd.merge(merged_no_geo, geometries, on=['satname', 'date'], how='left')\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
indexdate_mergedsatname_mergedcloud_covergeoaccuracysatname_geometriesdate_geometriesgeometry_df1geometry_df2
002018-12-30 18:22:25L80.0000005.088L82018-12-30 18:22:25POINT (-117.45892 33.28226)POINT (-117.44480 33.26540)
112019-01-28 05:12:28L80.2300005.802L82019-01-28 05:12:28POINT (-117.45881 33.28239)GEOMETRYCOLLECTION EMPTY
222020-05-23 19:24:27L80.2639676.596L82020-01-28 05:12:28GEOMETRYCOLLECTION EMPTYPOINT (-117.45899 33.28226)
332020-01-28 05:12:28L80.0000005.802L82020-05-23 19:24:27POINT (-117.45875 33.28242)POINT (-117.45896 33.28226)
\n", - "
" - ], - "text/plain": [ - " index date_merged satname_merged cloud_cover geoaccuracy \\\n", - "0 0 2018-12-30 18:22:25 L8 0.000000 5.088 \n", - "1 1 2019-01-28 05:12:28 L8 0.230000 5.802 \n", - "2 2 2020-05-23 19:24:27 L8 0.263967 6.596 \n", - "3 3 2020-01-28 05:12:28 L8 0.000000 5.802 \n", - "\n", - " satname_geometries date_geometries geometry_df1 \\\n", - "0 L8 2018-12-30 18:22:25 POINT (-117.45892 33.28226) \n", - "1 L8 2019-01-28 05:12:28 POINT (-117.45881 33.28239) \n", - "2 L8 2020-01-28 05:12:28 GEOMETRYCOLLECTION EMPTY \n", - "3 L8 2020-05-23 19:24:27 POINT (-117.45875 33.28242) \n", - "\n", - " geometry_df2 \n", - "0 POINT (-117.44480 33.26540) \n", - "1 GEOMETRYCOLLECTION EMPTY \n", - "2 POINT (-117.45899 33.28226) \n", - "3 POINT (-117.45896 33.28226) " - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged = merged.reset_index() # Reset the index of the merged DataFrame\n", - "geometries = geometries.reset_index() # Reset the index of the geometries DataFrame\n", - "result = gpd.GeoDataFrame(merged.drop(columns=columns).join(geometries, lsuffix='_merged', rsuffix='_geometries'))\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['geometry_df1', 'geometry_df2']" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "geometry_columns" + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2018-12-30 18:22:25\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-110.45892, 30.28226), (-110.45892, 31.28226)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " ],\n", + " \"satname\": [ \"S2\", ],\n", + " \"cloud_cover\": [ 0.23,],\n", + " }\n", + "extracted_gdf1 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf1" ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "merged[\"geometry\"] = merged[geometry_columns].apply(\n", - " lambda row: unary_union(row.tolist()), axis=1\n", - " )" + "# this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1\n", + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2015-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2020-1-28 05:12:28\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.44480, 33.26540)]),\n", + " MultiPoint([(-117.45899, 33.28226)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 6.02,\n", + " ],\n", + " \"satname\": [\"L8\", \"L8\", ],\n", + " \"cloud_cover\": [0.0,0.263967],\n", + "}\n", + "extracted_gdf2 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf2 " ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dategeometry_df1satnamegeometry_df2cloud_covergeoaccuracygeometry
02018-12-30 18:22:25MULTIPOINT (-117.45892 33.28226)L8MULTIPOINT (-117.44480 33.26540)0.0000005.088MULTIPOINT (-117.45892 33.28226, -117.44480 33...
12019-01-28 05:12:28MULTIPOINT (-117.45881 33.28239)L8None0.2300005.802POINT (-117.45881 33.28239)
22020-05-23 19:24:27MULTIPOINT (-117.45875 33.28242)L8MULTIPOINT (-117.45896 33.28226)0.2639676.596MULTIPOINT (-117.45896 33.28226, -117.45875 33...
32020-01-28 05:12:28NoneL8MULTIPOINT (-117.45899 33.28226)0.0000005.802POINT (-117.45899 33.28226)
\n", - "
" - ], - "text/plain": [ - " date geometry_df1 satname \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) L8 \n", - "1 2019-01-28 05:12:28 MULTIPOINT (-117.45881 33.28239) L8 \n", - "2 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) L8 \n", - "3 2020-01-28 05:12:28 None L8 \n", - "\n", - " geometry_df2 cloud_cover geoaccuracy \\\n", - "0 MULTIPOINT (-117.44480 33.26540) 0.000000 5.088 \n", - "1 None 0.230000 5.802 \n", - "2 MULTIPOINT (-117.45896 33.28226) 0.263967 6.596 \n", - "3 MULTIPOINT (-117.45899 33.28226) 0.000000 5.802 \n", - "\n", - " geometry \n", - "0 MULTIPOINT (-117.45892 33.28226, -117.44480 33... \n", - "1 POINT (-117.45881 33.28239) \n", - "2 MULTIPOINT (-117.45896 33.28226, -117.45875 33... \n", - "3 POINT (-117.45899 33.28226) " - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "merged" + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2023-12-30 18:22:25\"),\n", + " pd.Timestamp(\"1998-4-28 05:12:28\"),\n", + " pd.Timestamp(\"2001-8-23 19:24:27\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.45896, 33.28226)]),\n", + " MultiPoint([(-117.45894, 33.28226)]),\n", + " MultiPoint([(-117.45891, 33.28232)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 5.802,\n", + " 6.596,\n", + " ],\n", + " \"satname\": [\"L9\", \"L9\", \"L8\"],\n", + " \"cloud_cover\": [0.0, 0.1, 0.263967],\n", + "}\n", + "extracted_gdf3 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf3" ] }, { @@ -5377,7 +2537,17 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# List of GeoDataFrames\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "# this merge should not have any common dates and should contain 1+2+3 = 6 rows\n", + "result.sort_values(by='date', inplace=True)\n", + "result.reset_index(drop=True, inplace=True)\n", + "\n", + "result " + ] }, { "cell_type": "code", @@ -5385,116 +2555,196 @@ "metadata": {}, "outputs": [], "source": [ - "from collections import Counter\n", - "\n", - "# Put all dataframes in a list\n", + "# List of GeoDataFrames\n", "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "# this merge should not have any common dates and should contain 1+2+3 = 6 rows\n", + "result.sort_values(by='date', inplace=True)\n", + "result.reset_index(drop=True, inplace=True)\n", + "\n", + "# Concatenate the 'geoaccuracy' values from all GeoDataFrames\n", + "concated_gdf = pd.concat([gdf for gdf in gdfs])\n", "\n", - "# Initialize counters for dates and satellite names\n", - "date_counter = Counter()\n", - "satname_counter = Counter()\n", + "# Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame\n", + "assert concated_gdf['geoaccuracy'].isin(result['geoaccuracy']).all()\n", "\n", - "# Loop over all dataframes\n", - "for gdf in gdfs:\n", - " # Update the counters with the dates and satellite names from the current dataframe\n", - " date_counter.update(gdf['date'])\n", - " satname_counter.update(gdf['satname'])\n", + "assert len(result) == 6\n", + "# Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame\n", + "assert concated_gdf['geoaccuracy'].isin(result['geoaccuracy']).all()\n", + "assert concated_gdf['cloud_cover'].isin(result['cloud_cover']).all()\n", + "assert concated_gdf['date'].isin(result['date']).all()\n", + "assert concated_gdf['satname'].isin(result['satname']).all()\n", + "# this test should not have merged any geometries because they were all on different dates\n", + "assert concated_gdf['date'].isin(result['date']).all()\n", "\n", - "# Now date_counter and satname_counter are dictionaries where the keys are the dates and satellite names,\n", - "# and the values are the number of times each date or satellite name appears across all dataframes." + "result " ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from collections import Counter\n", + "import pandas as pd\n", "\n", - "# Put all dataframes in a list\n", + "# Concatenate the geoaccuracy values from all dataframes\n", + "expected_geoaccuracy = np.concatenate([extracted_gdf1['geoaccuracy'].values, extracted_gdf2['geoaccuracy'].values, extracted_gdf3['geoaccuracy'].values])\n", + "\n", + "# Convert expected_geoaccuracy to a pandas Series\n", + "expected_geoaccuracy_series = pd.Series(expected_geoaccuracy)\n", + "\n", + "# Check if the values in expected_geoaccuracy_series are present in the 'geoaccuracy' column of the result DataFrame\n", + "assert expected_geoaccuracy_series.isin(result['geoaccuracy']).all()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# List of GeoDataFrames\n", "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", "\n", - "# Initialize a counter for the combined values\n", - "combined_counter = Counter()\n", + "# Concatenate the 'geoaccuracy' values from all GeoDataFrames\n", + "expected_geoaccuracy = pd.concat([gdf['geoaccuracy'] for gdf in gdfs])\n", + "\n", + "# Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame\n", + "assert expected_geoaccuracy.isin(result['geoaccuracy']).all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# List of GeoDataFrames\n", + "gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3]\n", "\n", - "# Loop over all dataframes\n", - "for gdf in gdfs:\n", - " # Combine the 'date' and 'satname' into a single value and update the counter with these values\n", - " combined_counter.update(gdf['date'].astype(str) + '_' + gdf['satname'])\n", + "# Concatenate the 'geoaccuracy' values from all GeoDataFrames\n", + "concated_gdf = pd.concat([gdf for gdf in gdfs])\n", "\n", - "# Now combined_counter is a dictionary where the keys are the combined 'date' and 'satname' values,\n", - "# and the values are the number of times each combined value appears across all dataframes." + "# Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame\n", + "assert concated_gdf['geoaccuracy'].isin(result['geoaccuracy']).all()" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Counter({'2020-05-23 19:24:27_L8': 3,\n", - " '2018-12-30 18:22:25_L8': 2,\n", - " '2019-01-28 05:12:28_L8': 1,\n", - " '2020-01-28 05:12:28_L8': 1,\n", - " '2015-12-30 18:22:25_L9': 1,\n", - " '2019-01-28 05:12:28_L9': 1})" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "combined_counter" + "mask = extracted_gdf1['geoaccuracy'].isin(result['geoaccuracy'])\n", + "assert len(extracted_gdf1[mask]) == 1" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Counter({'2020-05-23 19:24:27_L8': 3, '2018-12-30 18:22:25_L8': 2})" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Create a new Counter that only includes items with a count of 2 or more\n", - "filtered_counter = Counter({k: v for k, v in combined_counter.items() if v >= 2})\n", - "filtered_counter" + "mask = extracted_gdf1['geoaccuracy'].isin(result['geoaccuracy'])\n", + "assert len(extracted_gdf1[mask]) == 1" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"date\": [\n", + "\n", + " ],\n", + " \"geometry\": [\n", + "\n", + " ],\n", + " \"geoaccuracy\": [\n", + "\n", + " ],\n", + " \"satname\": [],\n", + " \"cloud_cover\": [],\n", + "}\n", + "empty_gdf = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "empty_gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1\n", + "data = {\n", + " \"date\": [\n", + " pd.Timestamp(\"2015-12-30 18:22:25\"),\n", + " pd.Timestamp(\"2020-1-28 05:12:28\"),\n", + " ],\n", + " \"geometry\": [\n", + " MultiPoint([(-117.44480, 33.26540)]),\n", + " MultiPoint([(-117.45899, 33.28226)]),\n", + " ],\n", + " \"geoaccuracy\": [\n", + " 5.088,\n", + " 6.02,\n", + " ],\n", + " \"satname\": [\"L8\", \"L8\", ],\n", + " \"cloud_cover\": [0.0,0.263967],\n", + "}\n", + "extracted_gdf2 = gpd.GeoDataFrame(data, crs=\"epsg:4326\")\n", + "extracted_gdf2 " + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Perform an 'inner' merge between each pair of DataFrames\n", - "merge_df1_df2 = extracted_gdf1.merge(extracted_gdf2, on=['date', 'satname'], how='inner')\n", - "merge_df1_df3 = extracted_gdf1.merge(extracted_gdf3, on=['date', 'satname'], how='inner')\n", - "merge_df2_df3 = extracted_gdf2.merge(extracted_gdf3, on=['date', 'satname'], how='inner')\n", + "from coastseg.merge_utils import convert_lines_to_multipoints\n", + "\n", + "# List of GeoDataFrames\n", + "gdfs = [empty_gdf, extracted_gdf2]\n", + "# Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + "result = reduce(merge_and_average, gdfs)\n", + "# this merge should not have any common dates and should contain 1+2+3 = 6 rows\n", + "result.sort_values(by='date', inplace=True)\n", + "result.reset_index(drop=True, inplace=True)\n", "\n", - "# Concatenate the results\n", - "final_df = pd.concat([merge_df1_df2, merge_df1_df3, merge_df2_df3])\n", + "assert len(result) == len(extracted_gdf2)\n", + "assert result['date'].equals(extracted_gdf2['date'])\n", + "assert result['satname'].equals(extracted_gdf2['satname'])\n", + "assert result['cloud_cover'].equals(extracted_gdf2['cloud_cover'])\n", + "assert result['geoaccuracy'].equals(extracted_gdf2['geoaccuracy'])\n", + "# convert the result geometry to multipoint\n", + "new_result = convert_lines_to_multipoints(result)\n", + "assert new_result['geometry'].equals(extracted_gdf2['geometry'])\n", "\n", - "# Drop duplicates\n", - "final_df = final_df.drop_duplicates()" + "result " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert len(result) == 2" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -5519,91 +2769,109 @@ " \n", " \n", " date\n", - " geometry_x\n", - " geoaccuracy_x\n", " satname\n", - " cloud_cover_x\n", - " geometry_y\n", - " geoaccuracy_y\n", - " cloud_cover_y\n", + " geometry\n", " \n", " \n", " \n", " \n", " 0\n", - " 2018-12-30 18:22:25\n", - " MULTIPOINT (-117.45892 33.28226)\n", - " 5.088\n", - " L8\n", - " 0.000000\n", - " MULTIPOINT (-117.44480 33.26540)\n", - " 5.088\n", - " 0.000000\n", - " \n", - " \n", - " 1\n", - " 2020-05-23 19:24:27\n", - " MULTIPOINT (-117.45875 33.28242)\n", - " 6.596\n", - " L8\n", - " 0.263967\n", - " MULTIPOINT (-117.45896 33.28226)\n", - " 6.596\n", - " 0.263967\n", - " \n", - " \n", - " 0\n", - " 2020-05-23 19:24:27\n", - " MULTIPOINT (-117.45875 33.28242)\n", - " 6.596\n", - " L8\n", - " 0.263967\n", - " MULTIPOINT (-117.45891 33.28232)\n", - " 6.596\n", - " 0.263967\n", - " \n", - " \n", - " 0\n", - " 2020-05-23 19:24:27\n", - " MULTIPOINT (-117.45896 33.28226)\n", - " 6.596\n", - " L8\n", - " 0.263967\n", - " MULTIPOINT (-117.45891 33.28232)\n", - " 6.596\n", - " 0.263967\n", + " 2022-01-01\n", + " sat1\n", + " MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)\n", " \n", " \n", "\n", "" ], "text/plain": [ - " date geometry_x geoaccuracy_x \\\n", - "0 2018-12-30 18:22:25 MULTIPOINT (-117.45892 33.28226) 5.088 \n", - "1 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45875 33.28242) 6.596 \n", - "0 2020-05-23 19:24:27 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "\n", - " satname cloud_cover_x geometry_y geoaccuracy_y \\\n", - "0 L8 0.000000 MULTIPOINT (-117.44480 33.26540) 5.088 \n", - "1 L8 0.263967 MULTIPOINT (-117.45896 33.28226) 6.596 \n", - "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "0 L8 0.263967 MULTIPOINT (-117.45891 33.28232) 6.596 \n", - "\n", - " cloud_cover_y \n", - "0 0.000000 \n", - "1 0.263967 \n", - "0 0.263967 \n", - "0 0.263967 " + " date satname geometry\n", + "0 2022-01-01 sat1 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)" ] }, - "execution_count": 9, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df" + "from coastseg.merge_utils import merge_geometries\n", + "import geopandas as gpd\n", + "from shapely.geometry import Point\n", + "data = {\n", + " \"date\": [\"2022-01-01\",],\n", + " \"satname\": [\"sat1\",],\n", + " \"geometry_df1\": [\n", + " Point(0, 0),\n", + " ],\n", + " \"geometry\": [Point(1, 1)],\n", + " }\n", + "gdf = gpd.GeoDataFrame(data)\n", + "result = merge_geometries(gdf)\n", + "result " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "columns = None\n", + "merged_gdf = gdf\n", + "if columns is None:\n", + " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", + "else:\n", + " columns = [col for col in columns if col in merged_gdf.columns]\n", + " \n", + "columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shapely.ops import unary_union\n", + "\n", + "merged_gdf = gdf\n", + "merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", + " lambda row: unary_union(row.tolist()), axis=1\n", + ")\n", + "merged_gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for col in columns:\n", + " if col in merged_gdf.columns and col != \"geometry\":\n", + " print(col)\n", + " merged_gdf = merged_gdf.drop(columns=col)\n", + "merged_gdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"date\": [\"2022-01-01\",],\n", + " \"satname\": [\"sat1\",],\n", + " \"geometry_df1\": [\n", + " Point(0, 0),\n", + " ],\n", + " \"geometry_df2\": [Point(1, 1)],\n", + " }\n", + "gdf = gpd.GeoDataFrame(data)\n", + "result = merge_geometries(gdf)\n", + "result " ] } ], diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py index e69de29b..3ac953fb 100644 --- a/tests/test_merge_utils.py +++ b/tests/test_merge_utils.py @@ -0,0 +1,470 @@ +# Standard library imports +from collections import defaultdict + +# Related third party imports +import geopandas as gpd +import numpy as np +import pandas as pd +import pytest +from shapely.geometry import LineString, MultiLineString, MultiPoint, Point, Polygon +from coastseg import merge_utils +from functools import reduce + +# Local application/library specific imports +from coastseg.merge_utils import ( + calculate_overlap, + convert_lines_to_multipoints, + merge_geometries, +) + + +@pytest.fixture +def gdf_empty(): + return gpd.gpd.GeoDataFrame() + + +@pytest.fixture +def gdf_with_crs(): + # Create an empty GeoSeries with the specified CRS + geoseries = gpd.GeoSeries(crs="EPSG:4326") + # Create the gpd.GeoDataFrame using the empty GeoSeries + return gpd.gpd.GeoDataFrame(geometry=geoseries) + + +@pytest.fixture +def gdf_overlap(): + data = { + "geometry": [ + Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), + Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), + ] + } + return gpd.gpd.GeoDataFrame(data, crs="EPSG:4326") + + +@pytest.fixture +def gdf_no_overlap(): + data = { + "geometry": [ + Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), + Polygon([(2, 2), (3, 2), (3, 3), (2, 3)]), + ] + } + return gpd.gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326") + + +@pytest.fixture +def empty_extracted_gdf(): + data = { + "date": [], + "geometry": [], + "geoaccuracy": [], + "satname": [], + "cloud_cover": [], + } + return gpd.gpd.GeoDataFrame(data, crs="epsg:4326") + + +@pytest.fixture +def extracted_gdf1(): + data = { + "date": [ + pd.Timestamp("2018-12-30 18:22:25"), + pd.Timestamp("2019-1-28 05:12:28"), + pd.Timestamp("2020-5-23 19:24:27"), + ], + "geometry": [ + MultiPoint([(-117.45892, 33.28226), (-118.45892, 35.28226)]), + MultiPoint([(-117.45881, 33.28239), (-120.45892, 40.28226)]), + MultiPoint([(-117.45875, 33.28242)]), + ], + "geoaccuracy": [ + 5.088, + 5.802, + 6.596, + ], + "satname": ["L8", "L8", "L8"], + "cloud_cover": [0.0, 0.23, 0.263967], + } + return gpd.gpd.GeoDataFrame(data, crs="epsg:4326") + + +@pytest.fixture +def extracted_gdf2(): + # this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1 + data = { + "date": [ + pd.Timestamp("2018-12-30 18:22:25"), + pd.Timestamp("2020-1-28 05:12:28"), + pd.Timestamp("2020-5-23 19:24:27"), + ], + "geometry": [ + MultiPoint([(-117.44480, 33.26540)]), + MultiPoint([(-117.45899, 33.28226)]), + MultiPoint([(-117.45896, 33.28226)]), + ], + "geoaccuracy": [ + 5.088, + 5.802, + 6.596, + ], + "satname": ["L8", "L8", "L8"], + "cloud_cover": [0.0, 0.0, 0.263967], + } + return gpd.gpd.GeoDataFrame(data, crs="epsg:4326") + + +@pytest.fixture +def extracted_gdf3(): + # this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1 + data = { + "date": [ + pd.Timestamp("2015-12-30 18:22:25"), + pd.Timestamp("2019-1-28 05:12:28"), + pd.Timestamp("2020-5-23 19:24:27"), + ], + "geometry": [ + MultiPoint([(-117.45896, 33.28226)]), + MultiPoint([(-117.45894, 33.28226)]), + MultiPoint([(-117.45891, 33.28232)]), + ], + "geoaccuracy": [ + 5.088, + 5.802, + 6.596, + ], + "satname": ["L9", "L9", "L8"], + "cloud_cover": [0.0, 0.1, 0.263967], + } + return gpd.gpd.GeoDataFrame(data, crs="epsg:4326") + + +def test_empty_gdf(gdf_empty): + result = calculate_overlap(gdf_empty) + assert result.empty + + +def test_empty_gdf_with_crs(gdf_with_crs): + result = calculate_overlap(gdf_with_crs) + assert result.empty + assert result.crs == gdf_with_crs.crs + + +def test_overlap(gdf_overlap): + result = calculate_overlap(gdf_overlap) + assert not result.empty + assert result.crs == gdf_overlap.crs + assert len(result) == 1 + assert result.iloc[0].geometry.equals(Polygon([(1, 1), (2, 1), (2, 2), (1, 2)])) + + +def test_no_overlap(gdf_no_overlap): + result = calculate_overlap(gdf_no_overlap) + assert result.empty + + +def test_convert_multipoints_to_linestrings_with_linestrings(): + """ + Test function to check if the convert_multipoints_to_linestrings function + correctly converts a gpd.GeoDataFrame with LineString geometries to the same + gpd.GeoDataFrame. + """ + # Create a gpd.GeoDataFrame with LineString geometries + data = {"geometry": [LineString([(0, 0), (1, 1)]), LineString([(2, 2), (3, 3)])]} + gdf = gpd.gpd.GeoDataFrame(data) + result = merge_utils.convert_multipoints_to_linestrings(gdf) + assert result.equals(gdf) + + +def test_convert_multipoints_to_linestrings_with_multipoints(): + """ + Test function to check if the function `convert_multipoints_to_linestrings` correctly converts + MultiPoint geometries to LineString geometries. + """ + # Create a gpd.GeoDataFrame with MultiPoint geometries + data = {"geometry": [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3)])]} + gdf = gpd.gpd.GeoDataFrame(data) + result = merge_utils.convert_multipoints_to_linestrings(gdf) + expected = gpd.gpd.GeoDataFrame( + {"geometry": [LineString([(0, 0), (1, 1)]), LineString([(2, 2), (3, 3)])]} + ) + assert result.equals(expected) + + +def test_convert_multipoints_to_linestrings_with_mixed_geometries(): + """ + Test function to check if the function `convert_multipoints_to_linestrings` correctly + converts a gpd.GeoDataFrame with mixed geometries (MultiPoint and LineString) to a gpd.GeoDataFrame + with only LineString geometries. + """ + # Create a gpd.GeoDataFrame with mixed geometries + data = { + "geometry": [ + MultiPoint([(0, 0), (1, 1)]), + LineString([(2, 2), (3, 3)]), + MultiPoint([(4, 4), (5, 5)]), + ] + } + gdf = gpd.gpd.GeoDataFrame(data) + result = merge_utils.convert_multipoints_to_linestrings(gdf) + expected = gpd.gpd.GeoDataFrame( + { + "geometry": [ + LineString([(0, 0), (1, 1)]), + LineString([(2, 2), (3, 3)]), + LineString([(4, 4), (5, 5)]), + ] + } + ) + assert result.equals(expected) + + +def test_dataframe_to_dict(): + """ + Test function to check if the `dataframe_to_dict` function correctly converts a DataFrame to a dictionary + with specific mapping between dictionary keys and DataFrame columns. + """ + # create a list of geometries + geometries = [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(2, 2), (3, 3)]), + MultiPoint([(4, 4), (5, 5)]), + ] + + # create a dictionary with the other columns + data = { + "geoaccuracy": [1, 2, 3], + "cloud_cover": [0.1, 0.2, 0.3], + "satname": ["L8", "L8", "L8"], + "date": [ + pd.Timestamp("2018-12-30 18:22:25"), + pd.Timestamp("2018-1-30 19:22:25"), + pd.Timestamp("2022-01-03 19:22:25"), + ], + "geometry": geometries, + } + + # create a gpd.GeoDataFrame from the dictionary + df = gpd.gpd.GeoDataFrame(data, geometry="geometry", crs="epsg:4326") + df.set_crs("epsg:4326", inplace=True) + + # Define the key mapping + key_map = { + "shorelines": "geometry", + "dates": "date", + "satname": "satname", + "cloud_cover": "cloud_cover", + "geoaccuracy": "geoaccuracy", + } + # Convert the DataFrame to a dictionary using the `dataframe_to_dict` function + result = merge_utils.dataframe_to_dict(df, key_map) + + # Define the expected dictionary + expected = { + "geoaccuracy": [1, 2, 3], + "cloud_cover": [0.1, 0.2, 0.3], + "satname": ["L8", "L8", "L8"], + "dates": [ + "2018-12-30 18:22:25", + "2018-01-30 19:22:25", + "2022-01-03 19:22:25", + ], + "shorelines": [ + np.array([[0.0, 0.0], [1.0, 1.0]]), + np.array([[2.0, 2.0], [3.0, 3.0]]), + np.array([[4.0, 4.0], [5.0, 5.0]]), + ], + } + # Check if the resulting dictionary is equal to the expected dictionary + assert result["geoaccuracy"] == expected["geoaccuracy"] + assert result["cloud_cover"] == expected["cloud_cover"] + assert result["satname"] == expected["satname"] + assert result["dates"] == expected["dates"] + assert all( + np.array_equal(a, b) + for a, b in zip(result["shorelines"], expected["shorelines"]) + ) + + +def test_convert_lines_to_multipoints_with_linestrings(): + """ + Test function to check if the convert_lines_to_multipoints function + correctly converts a gpd.GeoDataFrame with LineString geometries to a new + gpd.GeoDataFrame with MultiPoint geometries. + """ + # Create a gpd.GeoDataFrame with LineString geometries + data = {"geometry": [LineString([(0, 0), (1, 1)]), LineString([(2, 2), (3, 3)])]} + gdf = gpd.GeoDataFrame(data) + result = convert_lines_to_multipoints(gdf) + expected = gpd.GeoDataFrame( + {"geometry": [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3)])]} + ) + assert result.equals(expected) + + +def test_convert_lines_to_multipoints_with_multilinestrings(): + """ + Test function to check if the convert_lines_to_multipoints function + correctly converts a gpd.GeoDataFrame with MultiLineString geometries to a new + gpd.GeoDataFrame with MultiPoint geometries. + """ + # Create a gpd.GeoDataFrame with MultiLineString geometries + data = { + "geometry": [ + MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]), + MultiLineString([[(4, 4), (5, 5)], [(6, 6), (7, 7)]]), + ] + } + gdf = gpd.GeoDataFrame(data) + result = convert_lines_to_multipoints(gdf) + expected = gpd.GeoDataFrame( + { + "geometry": [ + MultiPoint([(0, 0), (1, 1), (2, 2), (3, 3)]), + MultiPoint([(4, 4), (5, 5), (6, 6), (7, 7)]), + ] + } + ) + assert result.equals(expected) + + +def test_convert_lines_to_multipoints_with_mixed_geometries(): + """ + Test function to check if the convert_lines_to_multipoints function + correctly converts a gpd.GeoDataFrame with mixed geometries (LineString and MultiLineString) + to a new gpd.GeoDataFrame with MultiPoint geometries. + """ + # Create a gpd.GeoDataFrame with mixed geometries + data = { + "geometry": [ + LineString([(0, 0), (1, 1)]), + MultiLineString([[(2, 2), (3, 3)], [(4, 4), (5, 5)]]), + ] + } + gdf = gpd.GeoDataFrame(data) + result = convert_lines_to_multipoints(gdf) + expected = gpd.GeoDataFrame( + { + "geometry": [ + MultiPoint([(0, 0), (1, 1)]), + MultiPoint([(2, 2), (3, 3), (4, 4), (5, 5)]), + ] + } + ) + assert result.equals(expected) + + +def test_convert_lines_to_multipoints_with_points(): + """ + Test function to check if the convert_lines_to_multipoints function + correctly handles a gpd.GeoDataFrame with Point geometries. + """ + # Create a gpd.GeoDataFrame with Point geometries + data = {"geometry": [Point(0, 0), Point(1, 1)]} + gdf = gpd.GeoDataFrame(data, geometry="geometry") + result = convert_lines_to_multipoints(gdf) + expected = gpd.GeoDataFrame( + {"geometry": [MultiPoint([(0, 0)]), MultiPoint([(1, 1)])]} + ) + assert result.equals(expected) + + +def test_merge_geometries_with_default_columns_and_operation(): + """ + Test function to check if the merge_geometries function correctly merges geometries + with the same date and satname using the default columns and operation. + """ + # Create a gpd.GeoDataFrame with two rows of Point geometries with the same date and satname + data = { + "date": [ + "2022-01-01", + ], + "satname": [ + "sat1", + ], + "geometry_df1": [ + Point(0, 0), + ], + "geometry": [Point(1, 1)], + } + gdf = gpd.GeoDataFrame(data) + + # Merge the geometries using the merge_geometries function + result = merge_geometries(gdf) + + # Define the expected gpd.GeoDataFrame with one row of a MultiPoint geometry + expected = gpd.GeoDataFrame( + { + "date": ["2022-01-01"], + "satname": ["sat1"], + "geometry": [ + MultiPoint([(0, 0), (1, 1)]), + ], + } + ) + + # Check if the resulting gpd.GeoDataFrame is equal to the expected gpd.GeoDataFrame + assert result.equals(expected) + + +def test_merge_geometries_with_standard_input(): + """ + Test function to check if the merge_geometries function correctly merges geometries + with the same date and satname using the default columns and operation. + """ + # Create a gpd.GeoDataFrame with two rows of Point geometries with the same date and satname + data = { + "date": [ + "2022-01-01", + ], + "satname": [ + "sat1", + ], + "geometry_df1": [ + Point(0, 0), + ], + "geometry_df2": [ + MultiPoint( + [ + (1, 1), + ] + ) + ], + } + gdf = gpd.GeoDataFrame(data) + + # Merge the geometries using the merge_geometries function + result = merge_geometries(gdf) + + # Define the expected gpd.GeoDataFrame with one row of a MultiPoint geometry + expected = gpd.GeoDataFrame( + { + "date": ["2022-01-01"], + "satname": ["sat1"], + "geometry": [ + MultiPoint([(0, 0), (1, 1)]), + ], + } + ) + + # Check if the resulting gpd.GeoDataFrame is equal to the expected gpd.GeoDataFrame + assert result.equals(expected) + + +def test_merge_and_average(extracted_gdf1, extracted_gdf2, extracted_gdf3): + # List of GeoDataFrames + gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3] + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + result = reduce(merge_utils.merge_and_average, gdfs) + + result.sort_values(by="date", inplace=True) + result.reset_index(drop=True, inplace=True) + + assert len(result) == 6 + assert ( + result[["date", "satname"]].duplicated().sum() == 0 + ), "The combination of 'date' and 'satname' is not unique." + # assert np.all(result['cloud_cover'] == [0.0, 0.115, 0.263967, 0.0, 0.0, 0.1]) + + result From d6da585d0e002e63191499e7558f5b506484246f Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 20 Nov 2023 10:20:33 -0800 Subject: [PATCH 29/87] #179 add unit tests merge_utils.merge_and_average --- tests/test_merge_utils.py | 182 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 175 insertions(+), 7 deletions(-) diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py index 3ac953fb..3b47517a 100644 --- a/tests/test_merge_utils.py +++ b/tests/test_merge_utils.py @@ -20,7 +20,20 @@ @pytest.fixture def gdf_empty(): - return gpd.gpd.GeoDataFrame() + data = { + "date": [], + "geometry": [], + "geoaccuracy": [], + "satname": [], + "cloud_cover": [], + } + empty_gdf = gpd.GeoDataFrame(data, crs="epsg:4326") + return empty_gdf + + +# @pytest.fixture +# def gdf_empty(): +# return gpd.gpd.GeoDataFrame() @pytest.fixture @@ -139,18 +152,18 @@ def extracted_gdf3(): return gpd.gpd.GeoDataFrame(data, crs="epsg:4326") -def test_empty_gdf(gdf_empty): +def test_calculate_overlap_empty_gdf(gdf_empty): result = calculate_overlap(gdf_empty) assert result.empty -def test_empty_gdf_with_crs(gdf_with_crs): +def test_calculate_overlap_empty_gdf_with_crs(gdf_with_crs): result = calculate_overlap(gdf_with_crs) assert result.empty assert result.crs == gdf_with_crs.crs -def test_overlap(gdf_overlap): +def test_calculate_overlap(gdf_overlap): result = calculate_overlap(gdf_overlap) assert not result.empty assert result.crs == gdf_overlap.crs @@ -158,7 +171,7 @@ def test_overlap(gdf_overlap): assert result.iloc[0].geometry.equals(Polygon([(1, 1), (2, 1), (2, 2), (1, 2)])) -def test_no_overlap(gdf_no_overlap): +def test_calculate_overlap_no_overlap(gdf_no_overlap): result = calculate_overlap(gdf_no_overlap) assert result.empty @@ -465,6 +478,161 @@ def test_merge_and_average(extracted_gdf1, extracted_gdf2, extracted_gdf3): assert ( result[["date", "satname"]].duplicated().sum() == 0 ), "The combination of 'date' and 'satname' is not unique." - # assert np.all(result['cloud_cover'] == [0.0, 0.115, 0.263967, 0.0, 0.0, 0.1]) + # Concatenate the 'geoaccuracy' values from all GeoDataFrames + expected_geoaccuracy = pd.concat([gdf["geoaccuracy"] for gdf in gdfs]) + + # Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame + assert expected_geoaccuracy.isin(result["geoaccuracy"]).all() + + +def test_merge_and_average_empty_gdf_and_non_empty( + gdf_empty, + extracted_gdf2, +): + # List of GeoDataFrames + gdfs = [gdf_empty, extracted_gdf2] + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + result = reduce(merge_utils.merge_and_average, gdfs) + # this merge should not have any common dates and should contain 1+2+3 = 6 rows + result.sort_values(by="date", inplace=True) + result.reset_index(drop=True, inplace=True) + + assert len(result) == len(extracted_gdf2) + assert result["date"].equals(extracted_gdf2["date"]) + assert result["satname"].equals(extracted_gdf2["satname"]) + assert result["cloud_cover"].equals(extracted_gdf2["cloud_cover"]) + assert result["geoaccuracy"].equals(extracted_gdf2["geoaccuracy"]) + # convert the result geometry to multipoint + new_result = convert_lines_to_multipoints(result) + assert new_result["geometry"].equals(extracted_gdf2["geometry"]) + + +def test_merge_and_average_different_sized_gdfs(extracted_gdf1, extracted_gdf3): + # make the geodataframes different sizes + # Create a new GeoDataFrame with just the top row + extracted_gdf1_1_row = extracted_gdf1.head(1) + + # this is the gdf shares pd.Timestamp('2018-12-30 18:22:25') and pd.Timestamp('2020-5-23 19:24:27') with extracted_gdf1 + data = { + "date": [ + pd.Timestamp("2015-12-30 18:22:25"), + pd.Timestamp("2020-1-28 05:12:28"), + ], + "geometry": [ + MultiPoint([(-117.44480, 33.26540)]), + MultiPoint([(-117.45899, 33.28226)]), + ], + "geoaccuracy": [ + 5.088, + 6.02, + ], + "satname": [ + "L8", + "L8", + ], + "cloud_cover": [0.0, 0.263967], + } + extracted_gdf2_2_row = gpd.GeoDataFrame(data, crs="epsg:4326") + + # List of GeoDataFrames + gdfs = [extracted_gdf1_1_row, extracted_gdf2_2_row, extracted_gdf3] + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + result = reduce(merge_utils.merge_and_average, gdfs) + # this merge should not have any common dates and should contain 1+2+3 = 6 rows + result.sort_values(by="date", inplace=True) + result.reset_index(drop=True, inplace=True) + + # Concatenate the 'geoaccuracy' values from all GeoDataFrames + concated_gdf = pd.concat([gdf for gdf in gdfs]) + + # Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame + assert concated_gdf["geoaccuracy"].isin(result["geoaccuracy"]).all() + + assert len(result) == 6 + # Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame + assert concated_gdf["geoaccuracy"].isin(result["geoaccuracy"]).all() + assert concated_gdf["cloud_cover"].isin(result["cloud_cover"]).all() + assert concated_gdf["date"].isin(result["date"]).all() + assert concated_gdf["satname"].isin(result["satname"]).all() + # this test should not have merged any geometries because they were all on different dates + assert concated_gdf["date"].isin(result["date"]).all() + + +def test_merge_and_average_2_overlapping_gdfs(extracted_gdf1, extracted_gdf2): + # List of GeoDataFrames + # these gdfs have 2 dates with the same satellite in common + gdfs = [extracted_gdf1, extracted_gdf2] + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + result = reduce(merge_utils.merge_and_average, gdfs) + # this merge should not have any common dates and should contain 1+2+3 = 6 rows + result.sort_values(by="date", inplace=True) + result.reset_index(drop=True, inplace=True) + + # Concatenate the 'geoaccuracy' values from all GeoDataFrames + concated_gdf = pd.concat([gdf for gdf in gdfs]) + + # Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame + assert concated_gdf["geoaccuracy"].isin(result["geoaccuracy"]).all() + + # from the original 6 rows with 2 overlapping dates, the result should have 4 rows + assert len(result) == 4 + assert ( + result[["date", "satname"]].duplicated().sum() == 0 + ), "The combination of 'date' and 'satname' is not unique." + # Concatenate the 'geoaccuracy' values from all GeoDataFrames + expected_geoaccuracy = pd.concat([gdf["geoaccuracy"] for gdf in gdfs]) + + # Check if the values in expected_geoaccuracy are present in the 'geoaccuracy' column of the result DataFrame + assert expected_geoaccuracy.isin(result["geoaccuracy"]).all() + assert isinstance( + result[result["date"] == pd.Timestamp("2018-12-30 18:22:25")]["geometry"].iloc[ + 0 + ], + MultiPoint, + ) + assert ( + len( + result[result["date"] == pd.Timestamp("2018-12-30 18:22:25")]["geometry"] + .iloc[0] + .geoms + ) + == 3 + ) + assert np.isin(["L8"], result["satname"]).all() + + +def test_merge_and_average_1_gdf(extracted_gdf1): + # List of GeoDataFrames + # these gdfs have 2 dates with the same satellite in common + gdfs = [ + extracted_gdf1, + ] + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + result = reduce(merge_utils.merge_and_average, gdfs) + + result.sort_values(by="date", inplace=True) + result.reset_index(drop=True, inplace=True) - result + assert len(result) == 3 + assert ( + result[["date", "satname"]].duplicated().sum() == 0 + ), "The combination of 'date' and 'satname' is not unique." + assert isinstance( + result[result["date"] == pd.Timestamp("2018-12-30 18:22:25")]["geometry"].iloc[ + 0 + ], + MultiPoint, + ) + assert extracted_gdf1["geoaccuracy"].isin(result["geoaccuracy"]).all() + assert result["date"].equals(extracted_gdf1["date"]) + assert result["satname"].equals(extracted_gdf1["satname"]) + assert result["cloud_cover"].equals(extracted_gdf1["cloud_cover"]) + # convert the result geometry to multipoint + from coastseg.merge_utils import convert_lines_to_multipoints + + new_result = convert_lines_to_multipoints(result) + assert new_result["geometry"].equals(extracted_gdf1["geometry"]) From bf4ad7fd39db6ab4163867ce8fe6ada58a2e9d27 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 20 Nov 2023 10:23:39 -0800 Subject: [PATCH 30/87] #179 remove funcs that are no longer used --- src/coastseg/merge_utils.py | 332 ++++-------------------------------- 1 file changed, 34 insertions(+), 298 deletions(-) diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py index 5440201b..f9a52d5b 100644 --- a/src/coastseg/merge_utils.py +++ b/src/coastseg/merge_utils.py @@ -1,20 +1,18 @@ +# Standard library imports from collections import defaultdict import os -from typing import List, Union +from typing import List, Optional, Union +# Related third party imports import geopandas as gpd import numpy as np import pandas as pd -from shapely.geometry import LineString, MultiLineString, MultiPoint +from shapely.geometry import LineString, MultiLineString, MultiPoint, Point from shapely.ops import unary_union +# Local application/library specific imports from coastseg import geodata_processing -# from coastseg.file_utilities import to_file -# from coastseg.common import get_cross_distance_df -# from coastseg.common import convert_linestrings_to_multipoints, stringify_datetime_columns -# from coastsat import SDS_transects - def convert_multipoints_to_linestrings(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ @@ -88,11 +86,7 @@ def dataframe_to_dict(df: pd.DataFrame, key_map: dict) -> dict: return dict(result_dict) -from geopandas import GeoDataFrame -from shapely.geometry import LineString, MultiLineString, MultiPoint, Point - - -def convert_lines_to_multipoints(gdf: GeoDataFrame) -> GeoDataFrame: +def convert_lines_to_multipoints(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Convert LineString or MultiLineString geometries in a GeoDataFrame to MultiPoint geometries. @@ -130,60 +124,6 @@ def line_to_multipoint(geometry): return gdf -def merge_geodataframes( - on, how="inner", aggregation_funcs=None, crs="epsg:4326", *gdfs -): - """ - Merges multiple GeoDataFrames based on a common column. - - Parameters: - on : str or list of str - Column name or list of column names to merge on. - how : str, optional - Type of merge to be performed (default is 'inner'). - aggregation_funcs : dict, optional - Dictionary of column names to aggregation functions. - Example: for the columns 'cloud_cover' and 'geoaccuracy', the mean aggregation function can be specified as: - aggregation_funcs = { - 'cloud_cover': 'mean', - 'geoaccuracy': 'mean' - } - *gdfs : GeoDataFrames - Variable number of GeoDataFrames to be merged. - - Returns: - GeoDataFrame - The merged GeoDataFrame with aggregated columns as specified. - """ - if len(gdfs) < 2: - raise ValueError("At least two GeoDataFrames must be provided for merging") - - # Set default aggregation functions if none are provided - if aggregation_funcs is None: - aggregation_funcs = {} - - # Perform the merge while applying the custom aggregation functions - merged_gdf = gdfs[0] - merged_gdf.set_crs(crs) - for gdf in gdfs[1:]: - merged_gdf = pd.merge( - merged_gdf, gdf, on=on, how=how, suffixes=("_left", "_right") - ) - - # Apply aggregation functions - for col, func in aggregation_funcs.items(): - col_left = f"{col}_left" - col_right = f"{col}_right" - - # Check if the columns exist in both GeoDataFrames - if col_left in merged_gdf.columns and col_right in merged_gdf.columns: - # Apply the aggregation function and drop the original columns - merged_gdf[col] = merged_gdf[[col_left, col_right]].agg(func, axis=1) - merged_gdf = merged_gdf.drop(columns=[col_left, col_right]) - - return merged_gdf - - def read_first_geojson_file( directory: str, filenames=["extracted_shorelines_lines.geojson", "extracted_shorelines.geojson"], @@ -266,24 +206,6 @@ def calculate_overlap(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: # Create a GeoDataFrame from the GeoSeries overlap_gdf = gpd.GeoDataFrame(geometry=intersection_series) - - # # Loop over each pair of rows in gdf - # for i in range(len(gdf)): - # for j in range(i+1, len(gdf)): - # # Check for intersection - # if gdf.iloc[i].geometry.intersects(gdf.iloc[j].geometry): - # # Calculate the intersection - # intersection = gdf.iloc[i].geometry.intersection(gdf.iloc[j].geometry) - - # # Create a new row with the intersection and append to the result list - # overlap_list.append({'geometry': intersection}) - - # # Create a DataFrame from the results list - # overlap_df = pd.DataFrame(overlap_list) - - # # Convert the result DataFrame to a GeoDataFrame and set the CRS - # overlap_gdf = gpd.GeoDataFrame(overlap_df, geometry='geometry', crs=gdf.crs) - return overlap_gdf @@ -342,152 +264,6 @@ def average_multipoints(multipoints) -> MultiPoint: return MultiPoint(average_coords) -def average_columns(df, col1, col2, new_col): - df[new_col] = df[[col1, col2]].mean(axis=1, skipna=True) - return df - - -def combine_dataframes(df1, df2, join_columns): - # Perform an outer join and mark the origin of each row - all_rows = pd.merge(df1, df2, on=join_columns, how="outer", indicator=True) - - # Keep only the rows that are in 'df1' but not in 'df2' - df1_unique = all_rows[all_rows["_merge"] == "left_only"] - if "cloud_cover_x" in df1_unique.columns and "cloud_cover_y" in df1_unique.columns: - df1_unique = average_columns( - df1_unique, "cloud_cover_x", "cloud_cover_y", "cloud_cover" - ) - df1_unique.drop(columns=["cloud_cover_x", "cloud_cover_y"], inplace=True) - if "geoaccuracy_x" in df1_unique.columns and "geoaccuracy_y" in df1_unique.columns: - df1_unique = average_columns( - df1_unique, "geoaccuracy_x", "geoaccuracy_y", "geoaccuracy" - ) - df1_unique.drop(columns=["geoaccuracy_x", "geoaccuracy_y"], inplace=True) - df1_unique.drop(columns=["_merge"], inplace=True) - - # Concatenate 'df2' and the unique rows from 'df1' - result = pd.concat([df2, df1_unique], ignore_index=True) - - def assign_geometry(row): - if pd.isnull(row["geometry"]): - if pd.notnull(row["geometry_x"]): - return row["geometry_x"] - elif pd.notnull(row["geometry_y"]): - return row["geometry_y"] - else: - return row["geometry"] - - if "geometry_x" in result.columns and "geometry_y" in result.columns: - result["geometry"] = result.apply(assign_geometry, axis=1) - result.drop(columns=["geometry_x", "geometry_y"], inplace=True) - return result - - -def combine_geodataframes(gdf1, gdf2, join_columns, average_columns=None): - """ - Combines two GeoDataFrames, performing an outer join and averaging specified numerical columns. - - Parameters: - gdf1, gdf2 : GeoDataFrame - The GeoDataFrames to combine. - join_columns : list of str - The columns to join on. - average_columns : list of str, optional - The columns to average. If None, all numerical columns with the same name in both GeoDataFrames will be averaged. - - Returns: - GeoDataFrame - The combined GeoDataFrame. - """ - # Ensure that the 'geometry' column is present in both GeoDataFrames - if "geometry" not in gdf1.columns or "geometry" not in gdf2.columns: - raise ValueError("Both GeoDataFrames must have a 'geometry' column.") - - # Combine GeoDataFrames using an outer join - combined_gdf = pd.merge( - gdf1, gdf2, on=join_columns, how="outer", suffixes=("_gdf1", "_gdf2") - ) - - if average_columns is None: - # List of numerical columns to be averaged - average_columns = [ - col - for col in gdf1.columns - if col in gdf2.columns - and col not in join_columns + ["geometry"] - and np.issubdtype(gdf1[col].dtype, np.number) - and np.issubdtype(gdf2[col].dtype, np.number) - ] - - # Average specified numerical columns - for col in average_columns: - if ( - f"{col}_gdf1" in combined_gdf.columns - and f"{col}_gdf2" in combined_gdf.columns - ): - combined_gdf[col] = combined_gdf[[f"{col}_gdf1", f"{col}_gdf2"]].mean( - axis=1 - ) - combined_gdf.drop(columns=[f"{col}_gdf1", f"{col}_gdf2"], inplace=True) - - # Resolve geometry conflicts by prioritizing non-null values - combined_gdf["geometry"] = combined_gdf["geometry_gdf1"].combine_first( - combined_gdf["geometry_gdf2"] - ) - combined_gdf.drop(columns=["geometry_gdf1", "geometry_gdf2"], inplace=True) - - return gpd.GeoDataFrame(combined_gdf, geometry="geometry") - - -def mergeRightUnique( - left_df: gpd.GeoDataFrame, - right_df: gpd.GeoDataFrame, - join_columns: Union[str, List[str]] = ["date", "satname"], - CRS: str = "EPSG:4326", -) -> pd.DataFrame: - """ - Merges two GeoDataFrames, keeping only the unique rows from the right GeoDataFrame based on the specified join columns. - - Parameters: - left_df : GeoDataFrame - The left GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set. - right_df : GeoDataFrame - The right GeoDataFrame to merge. Its CRS is set to the specified CRS if not already set. - join_columns : str or list of str, default ['date', 'satname'] - The columns to join on. These columns are set as the index for both GeoDataFrames. If a string is passed, it is converted to a list. - CRS : str, default 'EPSG:4326' - The Coordinate Reference System to set for the GeoDataFrames if not already set. - - Returns: - GeoDataFrame - The merged GeoDataFrame, containing all rows from the left GeoDataFrame and only the unique rows from the right GeoDataFrame based on the join columns. - """ - if not left_df.crs: - left_df.set_crs(CRS, inplace=True) - if not right_df.crs: - right_df.set_crs(CRS, inplace=True) - - if isinstance(join_columns, str): - join_columns = [join_columns] - # Ensure that join are set as the index for both DataFrames - left_df.set_index(join_columns, inplace=True) - right_df.set_index(join_columns, inplace=True) - - # Find the difference in the MultiIndex between right_df and merged_gdf - unique_indices = right_df.index.difference(merged_gdf.index) - - # Select only those rows from right_df that have unique indices - unique_to_right_df = right_df.loc[unique_indices] - if unique_to_right_df.crs: - unique_to_right_df.crs = right_df.crs - - # Now concatenate the merged_gdf with the unique_to_right_df - combined_gdf = pd.concat( - [merged_gdf.reset_index(), unique_to_right_df.reset_index()], ignore_index=True - ) - return combined_gdf - - def merge_geometries(merged_gdf, columns=None, operation=unary_union): """ Performs a specified operation for the geometries with the same date and satname. @@ -513,7 +289,7 @@ def merge_geometries(merged_gdf, columns=None, operation=unary_union): lambda row: operation(row.tolist()), axis=1 ) for col in columns: - if col in merged_gdf.columns: + if col in merged_gdf.columns and col != "geometry": merged_gdf = merged_gdf.drop(columns=col) return merged_gdf @@ -575,70 +351,30 @@ def create_csv_per_transect( print(f"Failed to save time-series for transect {key}: {e}") -# better way of mergine multiple gdfs together -# from shapely.ops import unary_union -# from coastseg.merge_utils import merge_geometries -# from functools import reduce -# import pandas as pd - - -# def merge_geometries(merged_gdf, columns=None, operation=unary_union): -# """ -# Performs a specified operation for the geometries with the same date and satname. - -# Parameters: -# merged_gdf : GeoDataFrame -# The GeoDataFrame to perform the operation on. -# columns : list of str, optional -# The columns to perform the operation on. If None, all columns with 'geometry' in the name are used. -# operation : function, optional -# The operation to perform. If None, unary_union is used. - -# Returns: -# GeoDataFrame -# The GeoDataFrame with the operation performed. -# """ -# if columns is None: -# columns = [col for col in merged_gdf.columns if "geometry" in col] -# else: -# columns = [col for col in columns if col in merged_gdf.columns] - -# merged_gdf["geometry"] = merged_gdf[columns].apply( -# lambda row: operation(row.tolist()), axis=1 -# ) -# for col in columns: -# if col in merged_gdf.columns: -# merged_gdf = merged_gdf.drop(columns=col) -# return merged_gdf - -# def merge_and_average(df1, df2): -# # Perform a full outer join -# merged = pd.merge(df1, df2, on=['satname', 'date'], how='outer', suffixes=('_df1', '_df2')) - -# # Loop over all columns -# for column in set(df1.columns).intersection(df2.columns): -# # Merge the geometries - -# if isinstance(df1[column].dtype, gpd.array.GeometryDtype): -# print(f"merging {{['{column}_df1', '{column}_df2']}}") -# print(df1[column]) -# print(df2[column]) -# # merged = merge_geometries(merged, columns=[f'{column}_df1', f'{column}_df2'], operation=unary_union) -# merged = merge_geometries(merged) -# continue -# # Skip non-numeric columns -# if not pd.api.types.is_numeric_dtype(df1[column]): -# continue -# # Average the values in the two columns -# merged[column] = merged[[f'{column}_df1', f'{column}_df2']].mean(axis=1) -# merged.drop(columns=[f'{column}_df1', f'{column}_df2'], inplace=True) - -# return merged - -# # List of GeoDataFrames -# gdfs = [extracted_gdf1, extracted_gdf2, extracted_gdf3] - -# # Perform a full outer join and average the numeric columns across all GeoDataFrames -# result = reduce(merge_and_average, gdfs) - -# result +def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + # Perform a full outer join + merged = pd.merge( + df1, df2, on=["satname", "date"], how="outer", suffixes=("_df1", "_df2") + ) + + # Identify numeric columns from both dataframes + numeric_columns_df1 = df1.select_dtypes(include="number").columns + numeric_columns_df2 = df2.select_dtypes(include="number").columns + common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2) + + # Average the numeric columns + for column in common_numeric_columns: + merged[column] = merged[[f"{column}_df1", f"{column}_df2"]].mean(axis=1) + + # Drop the original numeric columns + merged.drop( + columns=[f"{column}_df1" for column in common_numeric_columns] + + [f"{column}_df2" for column in common_numeric_columns], + inplace=True, + ) + + # Merge geometries + geometry_columns = [col for col in merged.columns if "geometry" in col] + merged = merge_geometries(merged, columns=geometry_columns) + + return merged From e80bdddace2d6ca4f09abdc3e1ea5ea6adf9fa2e Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 21 Nov 2023 22:43:43 -0800 Subject: [PATCH 31/87] #179 allow same ROI w/ diff dates to merge configs --- src/coastseg/merge_utils.py | 98 ++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 19 deletions(-) diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py index f9a52d5b..c5283174 100644 --- a/src/coastseg/merge_utils.py +++ b/src/coastseg/merge_utils.py @@ -294,27 +294,87 @@ def merge_geometries(merged_gdf, columns=None, operation=unary_union): return merged_gdf -def merge_geojson_files( - *file_paths: str, -) -> gpd.GeoDataFrame: - """ - Merges any number of GeoJSON files into a single GeoDataFrame, removing any duplicate rows. +def read_geojson_files(filepaths): + """Read GeoJSON files into GeoDataFrames and return a list.""" + return [gpd.read_file(path) for path in filepaths] - Parameters: - - *file_paths (str): Paths to the GeoJSON files. - Returns: - - GeoDataFrame: A GeoDataFrame containing the merged data from all input files, with duplicates removed. - """ - merged_gdf = gpd.GeoDataFrame() - for filepath in file_paths: - gdf = geodata_processing.read_gpd_file(filepath) - # Merging the two dataframes - merged_gdf = gpd.GeoDataFrame(pd.concat([merged_gdf, gdf], ignore_index=True)) - - # Dropping any duplicated rows based on all columns - merged_gdf_cleaned = merged_gdf.drop_duplicates() - return merged_gdf_cleaned +def concatenate_gdfs(gdfs): + """Concatenate a list of GeoDataFrames into a single GeoDataFrame.""" + return pd.concat(gdfs, ignore_index=True) + + +def filter_and_join_gdfs(gdf, feature_type, predicate="intersects"): + """Filter GeoDataFrame by feature type, ensure spatial index, and perform a spatial join.""" + filtered_gdf = gdf[gdf["type"] == feature_type].copy()[["geometry"]] + filtered_gdf["geometry"] = filtered_gdf["geometry"].simplify( + tolerance=0.001 + ) # Simplify geometry if possible to improve performance + filtered_gdf.sindex # Ensure spatial index + return gpd.sjoin(gdf, filtered_gdf[["geometry"]], how="inner", predicate=predicate) + + +def aggregate_gdf(gdf, group_fields): + """Aggregate a GeoDataFrame by specified fields using a custom combination function.""" + + def combine_non_nulls(series): + unique_values = series.dropna().unique() + return ( + unique_values[0] + if len(unique_values) == 1 + else ", ".join(map(str, unique_values)) + ) + + return ( + gdf.drop(columns=["index_right"]) + .drop_duplicates() + .groupby(group_fields, as_index=False) + .agg(combine_non_nulls) + ) + + +def merge_geojson_files(session_locations, merged_session_location): + """Main function to merge GeoJSON files from different session locations.""" + filepaths = [ + os.path.join(location, "config_gdf.geojson") for location in session_locations + ] + gdfs = read_geojson_files(filepaths) + merged_gdf = gpd.GeoDataFrame(concatenate_gdfs(gdfs), geometry="geometry") + + # Filter the geodataframe to only elements that intersect with the rois (dramatically drops the size of the geodataframe) + merged_config = filter_and_join_gdfs(merged_gdf, "roi", predicate="intersects") + # apply a group by operation to combine the rows with the same type and geometry into a single row + merged_config = aggregate_gdf(merged_config, ["type", "geometry"]) + # applying the group by function in aggregate_gdf() turns the geodataframe into a dataframe + merged_config = gpd.GeoDataFrame(merged_config, geometry="geometry") + + output_path = os.path.join(merged_session_location, "merged_config.geojson") + merged_config.to_file(output_path, driver="GeoJSON") + + return merged_config + + +# def merge_geojson_files( +# *file_paths: str, +# ) -> gpd.GeoDataFrame: +# """ +# Merges any number of GeoJSON files into a single GeoDataFrame, removing any duplicate rows. + +# Parameters: +# - *file_paths (str): Paths to the GeoJSON files. + +# Returns: +# - GeoDataFrame: A GeoDataFrame containing the merged data from all input files, with duplicates removed. +# """ +# merged_gdf = gpd.GeoDataFrame() +# for filepath in file_paths: +# gdf = geodata_processing.read_gpd_file(filepath) +# # Merging the two dataframes +# merged_gdf = gpd.GeoDataFrame(pd.concat([merged_gdf, gdf], ignore_index=True)) + +# # Dropping any duplicated rows based on all columns +# merged_gdf_cleaned = merged_gdf.drop_duplicates() +# return merged_gdf_cleaned def create_csv_per_transect( From c4c1fdfcb1d8a6917f4688ddec81845d2d6fbf0e Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 22 Nov 2023 09:21:18 -0800 Subject: [PATCH 32/87] #179 add more tests and fix aggrgate --- src/coastseg/merge_utils.py | 44 ++++------- tests/test_merge_utils.py | 152 ++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 27 deletions(-) diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py index c5283174..ff066f0f 100644 --- a/src/coastseg/merge_utils.py +++ b/src/coastseg/merge_utils.py @@ -306,6 +306,8 @@ def concatenate_gdfs(gdfs): def filter_and_join_gdfs(gdf, feature_type, predicate="intersects"): """Filter GeoDataFrame by feature type, ensure spatial index, and perform a spatial join.""" + if "type" not in gdf.columns: + raise ValueError("The GeoDataFrame must contain a column named 'type'") filtered_gdf = gdf[gdf["type"] == feature_type].copy()[["geometry"]] filtered_gdf["geometry"] = filtered_gdf["geometry"].simplify( tolerance=0.001 @@ -314,8 +316,17 @@ def filter_and_join_gdfs(gdf, feature_type, predicate="intersects"): return gpd.sjoin(gdf, filtered_gdf[["geometry"]], how="inner", predicate=predicate) -def aggregate_gdf(gdf, group_fields): - """Aggregate a GeoDataFrame by specified fields using a custom combination function.""" +def aggregate_gdf(gdf: gpd.GeoDataFrame, group_fields: list) -> gpd.GeoDataFrame: + """ + Aggregate a GeoDataFrame by specified fields using a custom combination function. + + Parameters: + gdf (GeoDataFrame): The input GeoDataFrame to be aggregated. + group_fields (list): The fields to group the GeoDataFrame by. + + Returns: + GeoDataFrame: The aggregated GeoDataFrame. + """ def combine_non_nulls(series): unique_values = series.dropna().unique() @@ -325,9 +336,11 @@ def combine_non_nulls(series): else ", ".join(map(str, unique_values)) ) + if "index_right" in gdf.columns: + gdf = gdf.drop(columns=["index_right"]) + return ( - gdf.drop(columns=["index_right"]) - .drop_duplicates() + gdf.drop_duplicates() .groupby(group_fields, as_index=False) .agg(combine_non_nulls) ) @@ -354,29 +367,6 @@ def merge_geojson_files(session_locations, merged_session_location): return merged_config -# def merge_geojson_files( -# *file_paths: str, -# ) -> gpd.GeoDataFrame: -# """ -# Merges any number of GeoJSON files into a single GeoDataFrame, removing any duplicate rows. - -# Parameters: -# - *file_paths (str): Paths to the GeoJSON files. - -# Returns: -# - GeoDataFrame: A GeoDataFrame containing the merged data from all input files, with duplicates removed. -# """ -# merged_gdf = gpd.GeoDataFrame() -# for filepath in file_paths: -# gdf = geodata_processing.read_gpd_file(filepath) -# # Merging the two dataframes -# merged_gdf = gpd.GeoDataFrame(pd.concat([merged_gdf, gdf], ignore_index=True)) - -# # Dropping any duplicated rows based on all columns -# merged_gdf_cleaned = merged_gdf.drop_duplicates() -# return merged_gdf_cleaned - - def create_csv_per_transect( save_path: str, cross_distance_transects: dict, diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py index 3b47517a..7c2b8f8d 100644 --- a/tests/test_merge_utils.py +++ b/tests/test_merge_utils.py @@ -636,3 +636,155 @@ def test_merge_and_average_1_gdf(extracted_gdf1): new_result = convert_lines_to_multipoints(result) assert new_result["geometry"].equals(extracted_gdf1["geometry"]) + + +def test_aggregate_gdf(): + # Create a sample GeoDataFrame + data = { + "field1": [1, 1, 2, 2, 3], + "field2": ["A", "A", "B", "B", "C"], + "field3": [10, 20, 30, 40, 50], + } + gdf = gpd.GeoDataFrame(data) + + # Define the group fields + group_fields = ["field1", "field2"] + + # Call the aggregate_gdf function + result = merge_utils.aggregate_gdf(gdf, group_fields) + + # Define the expected result + expected_data = { + "field1": [1, 2, 3], + "field2": ["A", "B", "C"], + "field3": ["10, 20", "30, 40", "50"], + } + expected_result = gpd.GeoDataFrame(expected_data) + + # Check if the resulting GeoDataFrame is equal to the expected GeoDataFrame + assert result.equals(expected_result) + + +@pytest.fixture +def merged_config_no_nulls_no_index_right(): + data = { + "type": ["bbox", "bbox", "roi", "roi", "shoreline", "shoreline"], + "id": ["1", "1", "B", "B", "D", "C"], + "geometry": [ + Point(0, 0), + Point(0, 0), + Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]), + Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]), + LineString([(0, 0), (1, 1), (2, 2)]), + LineString([(0, 0), (1, 1), (2, 2)]), + ], + } + return gpd.GeoDataFrame(data) + + +@pytest.fixture +def merged_config_nulls(): + data = { + "type": ["bbox", "bbox", "roi", "roi", "shoreline", "shoreline"], + "id": [None, np.NaN, "B", "B", "D", "C"], + "geometry": [ + Point(0, 0), + Point(0, 0), + Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]), + Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]), + LineString([(0, 0), (1, 1), (2, 2)]), + LineString([(0, 0), (1, 1), (2, 2)]), + ], + "index_right": [0, 1, 2, 3, 4, 5], + } + return gpd.GeoDataFrame(data) + + +@pytest.fixture +def merged_config_nulls_all_unique(): + data = { + "type": ["bbox", "bbox", "roi", "roi", "shoreline", "shoreline"], + "id": [None, np.NaN, "Z", "B", "D", "C"], + "geometry": [ + Point(0, 0), + Point(1, 1), + Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]), + Polygon([(2, 2), (3, 4), (6, 5), (7, 8)]), + LineString([(0, 0), (1, 1), (2, 2)]), + LineString([(8, 8), (8, 5), (9, 4)]), + ], + "index_right": [0, 1, 2, 3, 4, 5], + } + return gpd.GeoDataFrame(data) + + +def test_aggregate_gdf_merged_config_with_nulls(merged_config_nulls): + group_fields = ["type", "geometry"] + result = merge_utils.aggregate_gdf(merged_config_nulls, group_fields) + + # Check if null values are filtered out + assert result["id"].isnull().sum() == 0 + assert len(result) == 3 + # very the ids got combined for rows with the same type and geometry + assert result[result["type"] == "shoreline"]["id"].values[0] == "D, C" + assert result[result["type"] == "roi"]["id"].values[0] == "B" + assert result[result["type"] == "bbox"]["id"].values[0] == "" + + +def test_aggregate_gdf_merged_config_no_nulls(merged_config_no_nulls_no_index_right): + group_fields = ["type", "geometry"] + result = merge_utils.aggregate_gdf( + merged_config_no_nulls_no_index_right, group_fields + ) + + # Check if null values are filtered out + assert result["id"].isnull().sum() == 0 + assert len(result) == 3 + # very the ids got combined for rows with the same type and geometry + assert result[result["type"] == "shoreline"]["id"].values[0] == "D, C" + assert result[result["type"] == "roi"]["id"].values[0] == "B" + assert result[result["type"] == "bbox"]["id"].values[0] == "1" + + +def test_aggregate_gdf_merged_config_all_unique(merged_config_nulls_all_unique): + group_fields = ["type", "geometry"] + result = merge_utils.aggregate_gdf(merged_config_nulls_all_unique, group_fields) + + # Check if null values are filtered out + assert result["id"].isnull().sum() == 0 + assert len(result) == 6 + # very the ids got combined for rows with the same type and geometry + assert len(result[result["type"] == "shoreline"]) == 2 + assert len(result[result["type"] == "roi"]) == 2 + assert len(result[result["type"] == "bbox"]) == 2 + assert result[result["type"] == "shoreline"]["id"].isin(["D", "C"]).all() + assert result[result["type"] == "roi"]["id"].isin(["B", "Z"]).all() + + +def test_filter_and_join_gdfs(): + # Create a sample GeoDataFrame + data = { + "type": ["roi", "poi", "roi", "poi"], + "geometry": [Point(0, 0), Point(1, 1), Point(2, 2), Point(3, 3)], + } + gdf = gpd.GeoDataFrame(data, crs="EPSG:4326") + + # Define the feature type to filter by + feature_type = "roi" + + # Call the function with the sample data + result = merge_utils.filter_and_join_gdfs(gdf, feature_type) + + # # Check that the result is a GeoDataFrame + assert isinstance(result, gpd.GeoDataFrame), "The result should be a GeoDataFrame" + + # # Check that the result only contains 'roi' type features + assert ( + result["type"].eq(feature_type).all() + ), "The result should only contain 'roi' type features" + + # # Check that the spatial join keeps only the intersecting geometries + # # For this, we'll need to make sure the original 'roi' points intersect with themselves + assert ( + len(result) == 2 + ), "The result should only contain intersecting 'roi' geometries" From b277b866c8273832bb8a106b198219c410c10a6b Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 22 Nov 2023 09:34:25 -0800 Subject: [PATCH 33/87] #179 notebook merge sessions works for same ROI --- ...e_roi_across_multiple_sessions_final.ipynb | 810 ++++++++++++++++++ 1 file changed, 810 insertions(+) create mode 100644 merge_session_same_roi_across_multiple_sessions_final.ipynb diff --git a/merge_session_same_roi_across_multiple_sessions_final.ipynb b/merge_session_same_roi_across_multiple_sessions_final.ipynb new file mode 100644 index 00000000..7140b9d1 --- /dev/null +++ b/merge_session_same_roi_across_multiple_sessions_final.ipynb @@ -0,0 +1,810 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging Sessions Script\n", + "\n", + "This script is used to merge two or more sessions, provided they do not contain overlapping regions of interest (ROIs).\n", + "\n", + "### Prerequisites:\n", + "- Paths to two session directories with extracted shorelines.\n", + "- The desired name for the merged session directory that will be saved in the `sessions` directory.\n", + "\n", + "### Optional:\n", + "- A `config.json` file with transect settings for calculating shoreline-transect intersections.\n", + "\n", + "### Instructions:\n", + "1. Enter the paths to the session directories below:\n", + " ``` python\n", + " session_locations=[\n", + " '',\n", + " ''\n", + " ]\n", + " ```\n", + " Example:\n", + " - Notice that because these are Windows locations we put `r` at the beginning of each location\n", + " ``` python\n", + " session_locations=[\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_13_datetime06-05-23__04_16_45',\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\es1\\ID_12_datetime06-05-23__04_16_45'\n", + " ]\n", + " ```\n", + "2. Specify the name for the merged session directory:\n", + " - `merged_session_directory`: `\"\"`\n", + "\n", + "3. (Optional) If you want to use your own advanced settings in a `config.json` file, include its path:\n", + " - `config_file`: `\"\"`\n", + "\n", + "With the above information, the script can be executed to merge the specified sessions into a single session directory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# replace these with the ROI directories from your own extract shorelines sessions\n", + "\n", + "session_locations=[r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\ID_rrw15_datetime11-21-23__11_32_09\\ID_rrw15_datetime11-21-23__11_32_09',\n", + " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\ID_rrw15_datetime11-21-23__11_35_25_es3\\ID_rrw15_datetime11-21-23__11_35_25']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged_session_directory='merged_session_name'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the merged session diretory under sessions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# enter the location of your sessions directory if this is not correct\n", + "sessions_directory = os.path.join(os.getcwd(), 'sessions')\n", + "print(sessions_directory)\n", + "merged_session_location = os.path.join(sessions_directory, merged_session_directory)\n", + "os.makedirs(merged_session_location, exist_ok=True)\n", + "\n", + "print(f\"Merged session will be saved to {merged_session_location}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shoreline-Transect Intersection Analysis Settings\n", + "\n", + "The default settings listed below should suffice for most use cases to find where extracted shorelines intersect transects. However, if you modified the advanced settings then you will need to adjust the settings.\n", + "\n", + "\n", + "Most users will want to just use the default settings listed below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_transects ={\n", + " \"along_dist\": 25, # along-shore distance to use for computing the intersection\n", + " \"min_points\": 3, # minimum number of shoreline points to calculate an intersection\n", + " \"max_std\": 15, # max std for points around transect\n", + " \"max_range\": 30, # max range for points around transect\n", + " \"min_chainage\": -100, # largest negative value along transect (landwards of transect origin)\n", + " \"multiple_inter\": \"auto\", # mode for removing outliers ('auto', 'nan', 'max')\n", + " \"prc_multiple\": 0.1, # percentage of the time that multiple intersects are present to use the max\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "from collections import defaultdict\n", + "import os\n", + "from typing import List, Optional, Union\n", + "\n", + "# Related third party imports\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from shapely.geometry import LineString, MultiLineString, MultiPoint, Point\n", + "from shapely.ops import unary_union\n", + "\n", + "# Local application/library specific imports\n", + "from coastseg import geodata_processing\n", + "\n", + "\n", + "def convert_multipoints_to_linestrings(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Convert MultiPoint geometries in a GeoDataFrame to LineString geometries.\n", + "\n", + " Args:\n", + " - gdf (gpd.GeoDataFrame): The input GeoDataFrame.\n", + "\n", + " Returns:\n", + " - gpd.GeoDataFrame: A new GeoDataFrame with LineString geometries. If the input GeoDataFrame\n", + " already contains LineStrings, the original GeoDataFrame is returned.\n", + " \"\"\"\n", + "\n", + " # Create a copy of the GeoDataFrame\n", + " gdf_copy = gdf.copy()\n", + "\n", + " # Check if all geometries in the gdf are LineStrings\n", + " if all(gdf_copy.geometry.type == \"LineString\"):\n", + " return gdf_copy\n", + "\n", + " def multipoint_to_linestring(multipoint):\n", + " if isinstance(multipoint, MultiPoint):\n", + " return LineString(multipoint.geoms)\n", + " return multipoint\n", + "\n", + " # Convert each MultiPoint to a LineString\n", + " gdf_copy[\"geometry\"] = gdf_copy[\"geometry\"].apply(multipoint_to_linestring)\n", + "\n", + " return gdf_copy\n", + "\n", + "\n", + "def dataframe_to_dict(df: pd.DataFrame, key_map: dict) -> dict:\n", + " \"\"\"\n", + " Converts a DataFrame to a dictionary, with specific mapping between dictionary keys and DataFrame columns.\n", + "\n", + " Parameters:\n", + " df : DataFrame\n", + " The DataFrame to convert.\n", + " key_map : dict\n", + " A dictionary where keys are the desired dictionary keys and values are the corresponding DataFrame column names.\n", + "\n", + " Returns:\n", + " dict\n", + " The resulting dictionary.\n", + " \"\"\"\n", + " result_dict = defaultdict(list)\n", + "\n", + " for dict_key, df_key in key_map.items():\n", + " if df_key in df.columns:\n", + " if df_key == \"date\":\n", + " # Assumes the column to be converted to date is the one specified in the mapping with key 'date'\n", + " result_dict[dict_key] = list(\n", + " df[df_key].apply(\n", + " lambda x: x.strftime(\"%Y-%m-%d %H:%M:%S\")\n", + " if pd.notnull(x)\n", + " else None\n", + " )\n", + " )\n", + " elif df_key == \"geometry\":\n", + " # Assumes the column to be converted to geometry is the one specified in the mapping with key 'geometry'\n", + " result_dict[dict_key] = list(\n", + " df[df_key].apply(\n", + " lambda x: np.array([list(point.coords[0]) for point in x.geoms])\n", + " if pd.notnull(x)\n", + " else None\n", + " )\n", + " )\n", + " else:\n", + " result_dict[dict_key] = list(df[df_key])\n", + "\n", + " return dict(result_dict)\n", + "\n", + "\n", + "def convert_lines_to_multipoints(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Convert LineString or MultiLineString geometries in a GeoDataFrame to MultiPoint geometries.\n", + "\n", + " Parameters\n", + " ----------\n", + " gdf : GeoDataFrame\n", + " The input GeoDataFrame containing LineString or MultiLineString geometries.\n", + "\n", + " Returns\n", + " -------\n", + " GeoDataFrame\n", + " A new GeoDataFrame with MultiPoint geometries.\n", + "\n", + " \"\"\"\n", + " # Create a copy of the input GeoDataFrame to avoid modifying it in place\n", + " gdf = gdf.copy()\n", + "\n", + " # Define a function to convert LineString or MultiLineString to MultiPoint\n", + " def line_to_multipoint(geometry):\n", + " if isinstance(geometry, LineString):\n", + " return MultiPoint(geometry.coords)\n", + " elif isinstance(geometry, MultiLineString):\n", + " points = [MultiPoint(line.coords) for line in geometry.geoms]\n", + " return MultiPoint([point for multi in points for point in multi.geoms])\n", + " elif isinstance(geometry, MultiPoint):\n", + " return geometry\n", + " elif isinstance(geometry, Point):\n", + " return MultiPoint([geometry.coords])\n", + " else:\n", + " raise TypeError(f\"Unsupported geometry type: {type(geometry)}\")\n", + "\n", + " # Apply the conversion function to each row in the GeoDataFrame\n", + " gdf[\"geometry\"] = gdf[\"geometry\"].apply(line_to_multipoint)\n", + "\n", + " return gdf\n", + "\n", + "\n", + "def read_first_geojson_file(\n", + " directory: str,\n", + " filenames=[\"extracted_shorelines_lines.geojson\", \"extracted_shorelines.geojson\"],\n", + "):\n", + " # Loop over the filenames\n", + " for filename in filenames:\n", + " filepath = os.path.join(directory, filename)\n", + "\n", + " # If the file exists, read it and return the GeoDataFrame\n", + " if os.path.exists(filepath):\n", + " return geodata_processing.read_gpd_file(filepath)\n", + "\n", + " # If none of the files exist, raise an exception\n", + " raise FileNotFoundError(\n", + " f\"None of the files {filenames} exist in the directory {directory}\"\n", + " )\n", + "\n", + "\n", + "def clip_gdfs(gdfs, overlap_gdf):\n", + " \"\"\"\n", + " Clips GeoDataFrames to an overlapping region.\n", + "\n", + " Parameters:\n", + " gdfs : list of GeoDataFrames\n", + " The GeoDataFrames to be clipped.\n", + " overlap_gdf : GeoDataFrame\n", + " The overlapping region to which the GeoDataFrames will be clipped.\n", + "\n", + " Returns:\n", + " list of GeoDataFrames\n", + " The clipped GeoDataFrames.\n", + " \"\"\"\n", + " clipped_gdfs = []\n", + " for gdf in gdfs:\n", + " clipped_gdf = gpd.clip(gdf, overlap_gdf)\n", + " if not clipped_gdf.empty:\n", + " clipped_gdfs.append(clipped_gdf)\n", + " clipped_gdf.plot()\n", + " return clipped_gdfs\n", + "\n", + "\n", + "def calculate_overlap(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Calculates the intersection of all pairs of polygons in a GeoDataFrame.\n", + "\n", + " Parameters:\n", + " -----------\n", + " gdf : GeoDataFrame\n", + " A GeoDataFrame containing polygons.\n", + "\n", + " Returns:\n", + " --------\n", + " overlap_gdf : GeoDataFrame\n", + " A GeoDataFrame containing the intersection of all pairs of polygons in gdf.\n", + " \"\"\"\n", + " # Check if the input GeoDataFrame is empty\n", + " if not hasattr(gdf, \"empty\"):\n", + " return gpd.GeoDataFrame()\n", + " if gdf.empty:\n", + " # Return an empty GeoDataFrame with the same CRS if it exists\n", + " return gpd.GeoDataFrame(\n", + " geometry=[], crs=gdf.crs if hasattr(gdf, \"crs\") else None\n", + " )\n", + "\n", + " # Initialize a list to store the intersections\n", + " intersections = []\n", + "\n", + " # Loop over each pair of rows in gdf\n", + " for i in range(len(gdf) - 1):\n", + " for j in range(i + 1, len(gdf)):\n", + " # Check for intersection\n", + " if gdf.iloc[i].geometry.intersects(gdf.iloc[j].geometry):\n", + " # Calculate the intersection\n", + " intersection = gdf.iloc[i].geometry.intersection(gdf.iloc[j].geometry)\n", + " # Append the intersection to the intersections list\n", + " intersections.append(intersection)\n", + "\n", + " # Create a GeoSeries from the intersections\n", + " intersection_series = gpd.GeoSeries(intersections, crs=gdf.crs)\n", + "\n", + " # Create a GeoDataFrame from the GeoSeries\n", + " overlap_gdf = gpd.GeoDataFrame(geometry=intersection_series)\n", + " return overlap_gdf\n", + "\n", + "\n", + "def average_multipoints(multipoints) -> MultiPoint:\n", + " \"\"\"\n", + " Calculate the average MultiPoint geometry from a list of MultiPoint geometries.\n", + "\n", + " This function takes a list of shapely MultiPoint geometries, ensures they all have the same number of points\n", + " by padding shorter MultiPoints with their last point, and then calculates the average coordinates\n", + " for each point position across all the input MultiPoint geometries.\n", + "\n", + " The result is a new MultiPoint geometry that represents the average shape of the input MultiPoints.\n", + "\n", + " Parameters:\n", + " multipoints (list of shapely.geometry.MultiPoint): A list of shapely MultiPoint geometries to be averaged.\n", + "\n", + " Returns:\n", + " shapely.geometry.MultiPoint: A MultiPoint geometry representing the average shape of the input MultiPoints.\n", + "\n", + " Raises:\n", + " ValueError: If the input list of MultiPoint geometries is empty.\n", + "\n", + " Example:\n", + " >>> from shapely.geometry import MultiPoint\n", + " >>> multipoint1 = MultiPoint([(0, 0), (1, 1), (2, 2)])\n", + " >>> multipoint2 = MultiPoint([(1, 1), (2, 2)])\n", + " >>> multipoint3 = MultiPoint([(0, 0), (1, 1), (2, 2), (3, 3)])\n", + " >>> average_mp = average_multipoints([multipoint1, multipoint2, multipoint3])\n", + " >>> print(average_mp)\n", + " MULTIPOINT (0.3333333333333333 0.3333333333333333, 1.3333333333333333 1.3333333333333333, 2 2, 3 3)\n", + " \"\"\"\n", + " if not multipoints:\n", + " raise ValueError(\"The list of MultiPoint geometries is empty\")\n", + "\n", + " # Find the maximum number of points in any MultiPoint\n", + " max_len = max(len(mp.geoms) for mp in multipoints)\n", + "\n", + " # Pad shorter MultiPoints with their last point\n", + " padded_multipoints = []\n", + " for mp in multipoints:\n", + " if len(mp.geoms) < max_len:\n", + " padded_multipoints.append(\n", + " MultiPoint(list(mp.geoms) + [mp.geoms[-1]] * (max_len - len(mp.geoms)))\n", + " )\n", + " else:\n", + " padded_multipoints.append(mp)\n", + "\n", + " # Calculate the average coordinates for each point\n", + " num_multipoints = len(padded_multipoints)\n", + " average_coords = []\n", + " for i in range(max_len):\n", + " avg_left = sum(mp.geoms[i].x for mp in padded_multipoints) / num_multipoints\n", + " avg_right = sum(mp.geoms[i].y for mp in padded_multipoints) / num_multipoints\n", + " average_coords.append((avg_left, avg_right))\n", + "\n", + " return MultiPoint(average_coords)\n", + "\n", + "\n", + "def merge_geometries(merged_gdf, columns=None, operation=unary_union):\n", + " \"\"\"\n", + " Performs a specified operation for the geometries with the same date and satname.\n", + "\n", + " Parameters:\n", + " merged_gdf : GeoDataFrame\n", + " The GeoDataFrame to perform the operation on.\n", + " columns : list of str, optional\n", + " The columns to perform the operation on. If None, all columns with 'geometry' in the name are used.\n", + " operation : function, optional\n", + " The operation to perform. If None, unary_union is used.\n", + "\n", + " Returns:\n", + " GeoDataFrame\n", + " The GeoDataFrame with the operation performed.\n", + " \"\"\"\n", + " if columns is None:\n", + " columns = [col for col in merged_gdf.columns if \"geometry\" in col]\n", + " else:\n", + " columns = [col for col in columns if col in merged_gdf.columns]\n", + "\n", + " merged_gdf[\"geometry\"] = merged_gdf[columns].apply(\n", + " lambda row: operation(row.tolist()), axis=1\n", + " )\n", + " for col in columns:\n", + " if col in merged_gdf.columns and col != \"geometry\":\n", + " merged_gdf = merged_gdf.drop(columns=col)\n", + " return merged_gdf\n", + "\n", + "\n", + "def read_geojson_files(filepaths):\n", + " \"\"\"Read GeoJSON files into GeoDataFrames and return a list.\"\"\"\n", + " return [gpd.read_file(path) for path in filepaths]\n", + "\n", + "\n", + "def concatenate_gdfs(gdfs):\n", + " \"\"\"Concatenate a list of GeoDataFrames into a single GeoDataFrame.\"\"\"\n", + " return pd.concat(gdfs, ignore_index=True)\n", + "\n", + "\n", + "def filter_and_join_gdfs(gdf, feature_type, predicate=\"intersects\"):\n", + " \"\"\"Filter GeoDataFrame by feature type, ensure spatial index, and perform a spatial join.\"\"\"\n", + " if \"type\" not in gdf.columns:\n", + " raise ValueError(\"The GeoDataFrame must contain a column named 'type'\")\n", + " filtered_gdf = gdf[gdf[\"type\"] == feature_type].copy()[[\"geometry\"]]\n", + " filtered_gdf[\"geometry\"] = filtered_gdf[\"geometry\"].simplify(\n", + " tolerance=0.001\n", + " ) # Simplify geometry if possible to improve performance\n", + " filtered_gdf.sindex # Ensure spatial index\n", + " return gpd.sjoin(gdf, filtered_gdf[[\"geometry\"]], how=\"inner\", predicate=predicate)\n", + "\n", + "\n", + "def aggregate_gdf(gdf: gpd.GeoDataFrame, group_fields: list) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Aggregate a GeoDataFrame by specified fields using a custom combination function.\n", + "\n", + " Parameters:\n", + " gdf (GeoDataFrame): The input GeoDataFrame to be aggregated.\n", + " group_fields (list): The fields to group the GeoDataFrame by.\n", + "\n", + " Returns:\n", + " GeoDataFrame: The aggregated GeoDataFrame.\n", + " \"\"\"\n", + "\n", + " def combine_non_nulls(series):\n", + " unique_values = series.dropna().unique()\n", + " return (\n", + " unique_values[0]\n", + " if len(unique_values) == 1\n", + " else \", \".join(map(str, unique_values))\n", + " )\n", + "\n", + " if \"index_right\" in gdf.columns:\n", + " gdf = gdf.drop(columns=[\"index_right\"])\n", + "\n", + " return (\n", + " gdf.drop_duplicates()\n", + " .groupby(group_fields, as_index=False)\n", + " .agg(combine_non_nulls)\n", + " )\n", + "\n", + "\n", + "def merge_geojson_files(session_locations, merged_session_location):\n", + " \"\"\"Main function to merge GeoJSON files from different session locations.\"\"\"\n", + " filepaths = [\n", + " os.path.join(location, \"config_gdf.geojson\") for location in session_locations\n", + " ]\n", + " gdfs = read_geojson_files(filepaths)\n", + " merged_gdf = gpd.GeoDataFrame(concatenate_gdfs(gdfs), geometry=\"geometry\")\n", + "\n", + " # Filter the geodataframe to only elements that intersect with the rois (dramatically drops the size of the geodataframe)\n", + " merged_config = filter_and_join_gdfs(merged_gdf, \"roi\", predicate=\"intersects\")\n", + " # apply a group by operation to combine the rows with the same type and geometry into a single row\n", + " merged_config = aggregate_gdf(merged_config, [\"type\", \"geometry\"])\n", + " # applying the group by function in aggregate_gdf() turns the geodataframe into a dataframe\n", + " merged_config = gpd.GeoDataFrame(merged_config, geometry=\"geometry\")\n", + "\n", + " output_path = os.path.join(merged_session_location, \"merged_config.geojson\")\n", + " merged_config.to_file(output_path, driver=\"GeoJSON\")\n", + "\n", + " return merged_config\n", + "\n", + "\n", + "def create_csv_per_transect(\n", + " save_path: str,\n", + " cross_distance_transects: dict,\n", + " extracted_shorelines_dict: dict,\n", + " roi_id: str = None, # ROI ID is now optional and defaults to None\n", + " filename_suffix: str = \"_timeseries_raw.csv\",\n", + "):\n", + " for key, distances in cross_distance_transects.items():\n", + " # Initialize the dictionary for DataFrame with mandatory keys\n", + " data_dict = {\n", + " \"dates\": extracted_shorelines_dict[\"dates\"],\n", + " \"satname\": extracted_shorelines_dict[\"satname\"],\n", + " key: distances,\n", + " }\n", + "\n", + " # Add roi_id to the dictionary if provided\n", + " if roi_id is not None:\n", + " data_dict[\"roi_id\"] = [roi_id] * len(extracted_shorelines_dict[\"dates\"])\n", + "\n", + " # Create a DataFrame directly with the data dictionary\n", + " df = pd.DataFrame(data_dict).set_index(\"dates\")\n", + "\n", + " # Construct the full file path\n", + " csv_filename = f\"{key}{filename_suffix}\"\n", + " fn = os.path.join(save_path, csv_filename)\n", + "\n", + " # Save to CSV file, 'mode' set to 'w' for overwriting\n", + " try:\n", + " df.to_csv(fn, sep=\",\", mode=\"w\")\n", + " print(f\"Time-series for transect {key} saved to {fn}\")\n", + " except Exception as e:\n", + " print(f\"Failed to save time-series for transect {key}: {e}\")\n", + "\n", + "\n", + "def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + " # Perform a full outer join\n", + " merged = pd.merge(\n", + " df1, df2, on=[\"satname\", \"date\"], how=\"outer\", suffixes=(\"_df1\", \"_df2\")\n", + " )\n", + "\n", + " # Identify numeric columns from both dataframes\n", + " numeric_columns_df1 = df1.select_dtypes(include=\"number\").columns\n", + " numeric_columns_df2 = df2.select_dtypes(include=\"number\").columns\n", + " common_numeric_columns = set(numeric_columns_df1).intersection(numeric_columns_df2)\n", + "\n", + " # Average the numeric columns\n", + " for column in common_numeric_columns:\n", + " merged[column] = merged[[f\"{column}_df1\", f\"{column}_df2\"]].mean(axis=1)\n", + "\n", + " # Drop the original numeric columns\n", + " merged.drop(\n", + " columns=[f\"{column}_df1\" for column in common_numeric_columns]\n", + " + [f\"{column}_df2\" for column in common_numeric_columns],\n", + " inplace=True,\n", + " )\n", + "\n", + " # Merge geometries\n", + " geometry_columns = [col for col in merged.columns if \"geometry\" in col]\n", + " merged = merge_geometries(merged, columns=geometry_columns)\n", + "\n", + " return merged\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge all the config_gdf.geojson files together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# if the shorelines or transects are at the exact same location, they will be merged into one\n", + "# if transects have different ids for the same location, they will be merged into one and both ids will be saved\n", + "\n", + "merged_config = merge_geojson_files(session_locations, merged_session_location)\n", + "merged_config " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The ROI Listed Below Will be Merged Together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roi_rows = merged_config[merged_config['type'] == 'roi']\n", + "roi_rows" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge the Extracted Shorelines Together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.merge_utils import calculate_overlap, clip_gdfs, read_first_geojson_file, convert_lines_to_multipoints,merge_and_average\n", + "from functools import reduce\n", + "\n", + "combined_gdf = gpd.GeoDataFrame( geometry=[], crs='epsg:4326')\n", + "# calculate the overlapping regions between the ROIs\n", + "overlap_gdf=calculate_overlap(roi_rows)\n", + "\n", + "# read all the extracted shorelines from the session locations\n", + "gdfs = []\n", + "for session_dir in session_locations:\n", + " # attempt to read the extracted shoreline files\n", + " es_gdf = read_first_geojson_file(session_dir,['extracted_shorelines_points.geojson', 'extracted_shorelines.geojson'])\n", + " es_gdf = convert_lines_to_multipoints(es_gdf)\n", + " es_gdf = es_gdf.to_crs('epsg:4326')\n", + " gdfs.append(es_gdf)\n", + "print(f\"Read {len(gdfs)} extracted shorelines GeoDataFrames\")\n", + "\n", + "# clip the extracted shorelines to the overlapping regions\n", + "clipped_shorelines_gdfs=clip_gdfs(gdfs, overlap_gdf)\n", + "\n", + "# sometimes there are not shorelines in the overlapping regions\n", + "if overlap_gdf.empty or len(clipped_shorelines_gdfs) == 0:\n", + " print(\"No overlapping ROIs found. Sessions can be merged.\")\n", + " # merge the geodataframes on date and satname and average the cloud_cover and geoaccuracy for the merged rows\n", + "\n", + " for gdf in gdfs:\n", + " if not gdf.crs:\n", + " gdf.set_crs(\"EPSG:4326\", inplace=True)\n", + " \n", + " # Perform a full outer join and average the numeric columns across all GeoDataFrames\n", + " result = reduce(merge_and_average, gdfs)\n", + "\n", + " result.sort_values(by='date', inplace=True)\n", + " result.reset_index(drop=True, inplace=True)\n", + "\n", + "print(f\"Combined {len(result)} rows from {len(gdfs)} GeoDataFrames\")\n", + "print(f\"The following dataframe contains the combined extracted shorelines from all sessions.\\n Shorelines that were extracted on the same dates have been combined.\")\n", + "\n", + "\n", + "combined_gdf = result\n", + "combined_gdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the Merged Extracted Shorelines to a JSON file\n", + "- This will contains all the metadata for each extracted shoreline such as \n", + "\n", + "\n", + " 1. cloud cover\n", + " 2. date\n", + " 3. satellite it was derived from \n", + " 4. geoaccuracy\n", + "- Filename: `extracted_shorelines_dict.json`\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg import file_utilities\n", + "\n", + "# mapping of dictionary keys to dataframe columns\n", + "keymap ={'shorelines':'geometry',\n", + " 'dates':'date',\n", + " 'satname':'satname',\n", + " 'cloud_cover':'cloud_cover',\n", + " 'geoaccuracy':'geoaccuracy'}\n", + "# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines\n", + "shoreline_dict = dataframe_to_dict(combined_gdf,keymap)\n", + "# save the extracted shoreline dictionary to json file\n", + "file_utilities.to_file(shoreline_dict, os.path.join(merged_session_location, \"extracted_shorelines_dict.json\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## New Number of Extracted Shorelines Across All ROIs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(shoreline_dict['shorelines'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the Merged Extracted Shorelines to GeoJSON Files\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.common import convert_linestrings_to_multipoints, stringify_datetime_columns\n", + "import os\n", + "# Save extracted shorelines as a GeoJSON file\n", + "es_line_path = os.path.join(merged_session_location, \"extracted_shorelines_lines.geojson\")\n", + "es_pts_path = os.path.join(merged_session_location, \"extracted_shorelines_points.geojson\")\n", + "\n", + "es_lines_gdf = convert_multipoints_to_linestrings(combined_gdf)\n", + "# save extracted shorelines as interpolated linestrings\n", + "es_lines_gdf.to_file(es_line_path, driver='GeoJSON')\n", + "\n", + "\n", + "points_gdf = convert_linestrings_to_multipoints(combined_gdf)\n", + "points_gdf = stringify_datetime_columns(points_gdf)\n", + "# Save extracted shorelines as mulitpoints GeoJSON file\n", + "points_gdf.to_file(es_pts_path, driver='GeoJSON')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Find when the Transects and Shorelines intersect\n", + "1. Loads the Transects for all the ROIs \n", + "2. Get the shoreline dictionary we created earlier and read the shorelines from it\n", + "3. Find where the shorelines and transects intersect\n", + "4. Save the shoreline and transect intersections as a timeseries to a csv file\n", + "5. Save the timeseries of intersections between the shoreline and a single tranesct to csv file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastsat import SDS_transects\n", + "# 1. load transects for all ROIs\n", + "transect_rows = merged_config[merged_config['type'] == 'transect']\n", + "transects_dict = {row['id']: np.array(row[\"geometry\"].coords) for i, row in transect_rows.iterrows()}\n", + "# 2. compute the intersection between the transects and the extracted shorelines\n", + "cross_distance = SDS_transects.compute_intersection_QC(shoreline_dict, transects_dict, settings_transects)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.common import get_cross_distance_df\n", + "# use coastseg.common to get the cross_distance_df\n", + "transects_df = get_cross_distance_df(shoreline_dict,cross_distance)\n", + "# save the transect shoreline intersections to csv timeseries file\n", + "filepath = os.path.join(merged_session_location, \"transect_time_series.csv\")\n", + "transects_df.to_csv(filepath, sep=\",\")\n", + "transects_df.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save a CSV for Each Transect \n", + "- WARNING some of these transects will contain a lot of null values because they don't intersect with other ROI's extracted shorelines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the timeseries of intersections between the shoreline and a single tranesct to csv file\n", + "create_csv_per_transect(merged_session_location,cross_distance,shoreline_dict,)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From be7a7404241d5d318361b5d39463a323de2c3f00 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 27 Nov 2023 14:33:19 -0800 Subject: [PATCH 34/87] add set_settings() and tests for settings_ui.py --- src/coastseg/settings_UI.py | 41 +++- tests/test_settings_UI.py | 366 ++++++++++++++++++++++++++++++++++++ 2 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 tests/test_settings_UI.py diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index cba4afa6..174ba736 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -14,7 +14,7 @@ class ButtonColors: def str_to_bool(var: str) -> bool: - return var == "True" + return var.lower().strip() == "true" def convert_date(date_str): @@ -47,6 +47,21 @@ def __init__(self, start_date=None, end_date=None, **kwargs): def value(self): return [str(self.start_date.value), str(self.end_date.value)] + @value.setter + def value(self, values): + if len(values) != 2: + raise ValueError("You must provide a list of two dates.") + + start_date, end_date = values + + if isinstance(start_date, str): + start_date = datetime.date.fromisoformat(start_date) + if isinstance(end_date, str): + end_date = datetime.date.fromisoformat(end_date) + + self.start_date.value = start_date + self.end_date.value = end_date + @property def options(self): return [self.start_date.value, self.end_date.value] @@ -369,6 +384,30 @@ def create_setting_widget( return widget, instructions + def set_settings(self, settings: dict) -> None: + """ + Set the settings of the UI widgets based on the provided dictionary. + + Args: + settings (dict): A dictionary containing the settings to be applied. + + Returns: + None + """ + for setting_name, widget in self.settings_widgets.items(): + if setting_name in settings: + if isinstance(widget, DateBox): + widget.value = list(map(convert_date, settings[setting_name])) + elif isinstance(widget.value, str): + widget.value = str(settings[setting_name]) + elif isinstance(widget.value, bool): + if isinstance(settings[setting_name], str): + widget.value = str_to_bool(settings[setting_name]) + else: + widget.value = bool(settings[setting_name]) + else: + widget.value = settings[setting_name] + def get_settings(self) -> dict: for setting_name, widget in self.settings_widgets.items(): self.settings[setting_name] = widget.value diff --git a/tests/test_settings_UI.py b/tests/test_settings_UI.py new file mode 100644 index 00000000..25592236 --- /dev/null +++ b/tests/test_settings_UI.py @@ -0,0 +1,366 @@ +import pytest +from coastseg.settings_UI import Settings_UI +import ipywidgets + + +@pytest.fixture +def settings_dashboard(): + basic_settings = [ + "dates", + "max_dist_ref", + "min_length_sl", + "min_beach_area", + "dist_clouds", + "apply_cloud_mask", + "cloud_thresh", + "percent_no_data", + ] + + settings_dashboard = Settings_UI(basic_settings) + return settings_dashboard + + +def test_set_settings_with_datebox(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "bogus_settings": True, # this settings is not in the basic settings + "cloud_thresh": 0.8, + } + settings_dashboard.set_settings(settings) + assert settings_dashboard.settings_widgets["dates"].value == [ + "2022-01-01", + "2022-01-02", + ] + assert settings_dashboard.settings_widgets["max_dist_ref"].value == 30 + assert settings_dashboard.settings_widgets["cloud_thresh"].value == 0.8 + assert "bogus_settings" not in settings_dashboard.settings_widgets.keys() + # assert settings_ui.settings_widgets["bogus_settings"].value is True + + +def test_set_settings_with_string(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "apply_cloud_mask": True, # this settings should be converted to a str to be rendered in the UI + "cloud_thresh": 0.8, + } + settings_dashboard.set_settings(settings) + assert settings_dashboard.settings_widgets["dates"].value == [ + "2022-01-01", + "2022-01-02", + ] + assert settings_dashboard.settings_widgets["max_dist_ref"].value == 30 + assert settings_dashboard.settings_widgets["cloud_thresh"].value == 0.8 + assert settings_dashboard.settings_widgets["apply_cloud_mask"].value == "True" + + +def test_set_settings_with_bool(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "apply_cloud_mask": True, # this settings should be converted to a str to be rendered in the UI + "cloud_thresh": 0.8, + "image_size_filter": "False", + } + # add a custom widget which only accepts bools + image_size_filter_checkbox = ipywidgets.Checkbox( + value=True, + description="Enable Image Size Filter", + indent=False, # To align the description with the label + ) + settings_dashboard.add_custom_widget( + image_size_filter_checkbox, + "image_size_filter", + "Image Size Filter", + "Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).", + advanced=False, + index=-1, + ) + settings_dashboard.set_settings(settings) + + assert "image_size_filter" in settings_dashboard.settings_widgets.keys() + assert settings_dashboard.settings_widgets["dates"].value == [ + "2022-01-01", + "2022-01-02", + ] + assert settings_dashboard.settings_widgets["max_dist_ref"].value == 30 + assert settings_dashboard.settings_widgets["cloud_thresh"].value == 0.8 + assert settings_dashboard.settings_widgets["apply_cloud_mask"].value == "True" + assert settings_dashboard.settings_widgets["image_size_filter"].value == False + + +def test_add_custom_widget(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "apply_cloud_mask": True, # this settings should be converted to a str to be rendered in the UI + "cloud_thresh": 0.8, + "image_size_filter": "False", + } + + instructions = ( + "Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)" + ) + sand_widget = ipywidgets.Dropdown( + options=["default", "latest", "dark", "bright"], + value="default", + description="sand_color :", + disabled=False, + ) + + settings_dashboard.add_custom_widget( + sand_widget, + "sand_color", + "Select Sand Color", + instructions, + advanced=True, + index=0, + ) + satellite_selection = ipywidgets.SelectMultiple( + options=["L5", "L7", "L8", "L9", "S2"], + value=["L8"], + description="Satellites", + disabled=False, + ) + cloud_mask_issue = ipywidgets.ToggleButtons( + options=["False", "True"], + description=" Switch to True if sand pixels are masked (in black) on many images", + disabled=False, + button_style="", + tooltips=[ + "No cloud mask issue", + "Fix cloud masking", + ], + ) + settings_dashboard.add_custom_widget( + cloud_mask_issue, + "cloud_mask_issue", + "Cloud Mask Issue", + "Switch to True if sand pixels are masked (in black) on many images", + advanced=True, + index=-1, + ) + + settings_dashboard.add_custom_widget( + satellite_selection, + "sat_list", + "Select Satellites", + "Pick multiple satellites by holding the control key", + advanced=False, + index=1, + ) + + # add a custom widget which only accepts bools + image_size_filter_checkbox = ipywidgets.Checkbox( + value=True, + description="Enable Image Size Filter", + indent=False, # To align the description with the label + ) + settings_dashboard.add_custom_widget( + image_size_filter_checkbox, + "image_size_filter", + "Image Size Filter", + "Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).", + advanced=False, + index=-1, + ) + + assert "image_size_filter" in settings_dashboard.settings_widgets.keys() + assert "sat_list" in settings_dashboard.settings_widgets.keys() + assert "cloud_mask_issue" in settings_dashboard.settings_widgets.keys() + assert "sand_color" in settings_dashboard.settings_widgets.keys() + + +def test_add_custom_widget_set_custom_settings(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "apply_cloud_mask": True, # custom widget added with add_custom_widget + "cloud_thresh": 0.8, + "image_size_filter": "False", + "sand_color": "dark", # custom widget added with add_custom_widget + "sat_list": ["L9", "S2"], # custom widget added with add_custom_widget + "cloud_mask_issue": "True", # custom widget added with add_custom_widget + } + + instructions = ( + "Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)" + ) + sand_widget = ipywidgets.Dropdown( + options=["default", "latest", "dark", "bright"], + value="default", + description="sand_color :", + disabled=False, + ) + + settings_dashboard.add_custom_widget( + sand_widget, + "sand_color", + "Select Sand Color", + instructions, + advanced=True, + index=0, + ) + satellite_selection = ipywidgets.SelectMultiple( + options=["L5", "L7", "L8", "L9", "S2"], + value=["L8"], + description="Satellites", + disabled=False, + ) + cloud_mask_issue = ipywidgets.ToggleButtons( + options=["False", "True"], + description=" Switch to True if sand pixels are masked (in black) on many images", + disabled=False, + button_style="", + tooltips=[ + "No cloud mask issue", + "Fix cloud masking", + ], + ) + settings_dashboard.add_custom_widget( + cloud_mask_issue, + "cloud_mask_issue", + "Cloud Mask Issue", + "Switch to True if sand pixels are masked (in black) on many images", + advanced=True, + index=-1, + ) + + settings_dashboard.add_custom_widget( + satellite_selection, + "sat_list", + "Select Satellites", + "Pick multiple satellites by holding the control key", + advanced=False, + index=1, + ) + + # add a custom widget which only accepts bools + image_size_filter_checkbox = ipywidgets.Checkbox( + value=True, + description="Enable Image Size Filter", + indent=False, # To align the description with the label + ) + settings_dashboard.add_custom_widget( + image_size_filter_checkbox, + "image_size_filter", + "Image Size Filter", + "Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).", + advanced=False, + index=-1, + ) + settings_dashboard.set_settings(settings) + + assert "image_size_filter" in settings_dashboard.settings_widgets.keys() + assert "sat_list" in settings_dashboard.settings_widgets.keys() + assert "cloud_mask_issue" in settings_dashboard.settings_widgets.keys() + assert "sand_color" in settings_dashboard.settings_widgets.keys() + assert settings_dashboard.settings_widgets["image_size_filter"].value == False + assert settings_dashboard.settings_widgets["sat_list"].value == ("L9", "S2") + assert settings_dashboard.settings_widgets["cloud_mask_issue"].value == "True" + assert settings_dashboard.settings_widgets["sand_color"].value == "dark" + + +def test_get_settings_custom_widgets(settings_dashboard): + settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "apply_cloud_mask": True, # custom widget added with add_custom_widget + "cloud_thresh": 0.8, + "image_size_filter": "False", + "sand_color": "dark", # custom widget added with add_custom_widget + "sat_list": ["L9", "S2"], # custom widget added with add_custom_widget + "cloud_mask_issue": "True", # custom widget added with add_custom_widget + } + + instructions = ( + "Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)" + ) + sand_widget = ipywidgets.Dropdown( + options=["default", "latest", "dark", "bright"], + value="default", + description="sand_color :", + disabled=False, + ) + + settings_dashboard.add_custom_widget( + sand_widget, + "sand_color", + "Select Sand Color", + instructions, + advanced=True, + index=0, + ) + satellite_selection = ipywidgets.SelectMultiple( + options=["L5", "L7", "L8", "L9", "S2"], + value=["L8"], + description="Satellites", + disabled=False, + ) + cloud_mask_issue = ipywidgets.ToggleButtons( + options=["False", "True"], + description=" Switch to True if sand pixels are masked (in black) on many images", + disabled=False, + button_style="", + tooltips=[ + "No cloud mask issue", + "Fix cloud masking", + ], + ) + settings_dashboard.add_custom_widget( + cloud_mask_issue, + "cloud_mask_issue", + "Cloud Mask Issue", + "Switch to True if sand pixels are masked (in black) on many images", + advanced=True, + index=-1, + ) + + settings_dashboard.add_custom_widget( + satellite_selection, + "sat_list", + "Select Satellites", + "Pick multiple satellites by holding the control key", + advanced=False, + index=1, + ) + + # add a custom widget which only accepts bools + image_size_filter_checkbox = ipywidgets.Checkbox( + value=True, + description="Enable Image Size Filter", + indent=False, # To align the description with the label + ) + settings_dashboard.add_custom_widget( + image_size_filter_checkbox, + "image_size_filter", + "Image Size Filter", + "Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).", + advanced=False, + index=-1, + ) + settings_dashboard.set_settings(settings) + + expected_settings = { + "dates": ["2022-01-01", "2022-01-02"], + "max_dist_ref": 30, + "min_length_sl": 500, + "min_beach_area": 10, + "dist_clouds": 300, + "apply_cloud_mask": True, + "cloud_thresh": 0.8, + "percent_no_data": 50.0, + "along_dist": 25, + "min_points": 3, + "max_std": 15.0, + "max_range": 30.0, + "min_chainage": -100.0, + "multiple_inter": "auto", + "prc_multiple": 0.1, + "sand_color": "dark", + "cloud_mask_issue": True, + "sat_list": ["L9", "S2"], + "image_size_filter": False, + } + assert expected_settings == settings_dashboard.get_settings() From 0e8bc85cde702cc2a4eb40a80724ee3352299b81 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 27 Nov 2023 16:53:04 -0800 Subject: [PATCH 35/87] update setting_ui notebook --- settings_ui.ipynb | 478 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 444 insertions(+), 34 deletions(-) diff --git a/settings_ui.ipynb b/settings_ui.ipynb index b66e16ea..88152809 100644 --- a/settings_ui.ipynb +++ b/settings_ui.ipynb @@ -2,9 +2,25 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3965ac6a839146d096633d96ce72e494", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Accordion(children=(Tab(children=(VBox(children=(VBox(children=(HTML(value='Pick a date:'), DateBox(chiā€¦" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from coastseg.settings_UI import Settings_UI\n", "import ipywidgets\n", @@ -53,7 +69,7 @@ " ]\n", "\n", "settings_dashboard=Settings_UI(basic_settings)\n", - "settings_dashboard.add_custom_widget(widget,'sand_dropbox','Select Sand Color',instructions,advanced=True,index=0)\n", + "settings_dashboard.add_custom_widget(widget,'sand_color','Select Sand Color',instructions,advanced=True,index=0)\n", "settings_dashboard.add_custom_widget(cloud_mask_issue,\"cloud_mask_issue\",'Cloud Mask Issue',\"Switch to True if sand pixels are masked (in black) on many images\",advanced=True,index=-1)\n", "\n", "settings_dashboard.add_custom_widget(satellite_selection,'sat_list','Select Satellites',\"Pick multiple satellites by holding the control key\",advanced=False,index=1)\n", @@ -61,6 +77,278 @@ "settings_dashboard.render()" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "basic_settings = [\n", + " \"dates\",\n", + " \"max_dist_ref\",\n", + " \"min_length_sl\",\n", + " \"min_beach_area\",\n", + " \"dist_clouds\",\n", + " \"apply_cloud_mask\",\n", + " \"cloud_thresh\",\n", + " \"percent_no_data\",\n", + " ]\n", + "\n", + "settings_dashboard = Settings_UI(basic_settings)\n", + "\n", + "settings = {\n", + " \"dates\": [\"2022-01-01\", \"2022-01-02\"],\n", + " \"max_dist_ref\": 30,\n", + " \"apply_cloud_mask\": True, # custom widget added with add_custom_widget\n", + " \"cloud_thresh\": 0.8,\n", + " \"image_size_filter\": \"False\",\n", + " \"sand_color\": \"dark\", # custom widget added with add_custom_widget\n", + " \"sat_list\": [\"L9\", \"S2\"], # custom widget added with add_custom_widget\n", + " \"cloud_mask_issue\": \"True\", # custom widget added with add_custom_widget\n", + "}\n", + "instructions = (\n", + " \"Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)\"\n", + ")\n", + "sand_widget = ipywidgets.Dropdown(\n", + " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", + " value=\"default\",\n", + " description=\"sand_color :\",\n", + " disabled=False,\n", + ")\n", + "settings_dashboard.add_custom_widget(\n", + " sand_widget,\n", + " \"sand_color\",\n", + " \"Select Sand Color\",\n", + " instructions,\n", + " advanced=True,\n", + " index=0,\n", + ")\n", + "satellite_selection = ipywidgets.SelectMultiple(\n", + " options=[\"L5\", \"L7\", \"L8\", \"L9\", \"S2\"],\n", + " value=[\"L8\"],\n", + " description=\"Satellites\",\n", + " disabled=False,\n", + ")\n", + "cloud_mask_issue = ipywidgets.ToggleButtons(\n", + " options=[\"False\", \"True\"],\n", + " description=\" Switch to True if sand pixels are masked (in black) on many images\",\n", + " disabled=False,\n", + " button_style=\"\",\n", + " tooltips=[\n", + " \"No cloud mask issue\",\n", + " \"Fix cloud masking\",\n", + " ],\n", + ")\n", + "settings_dashboard.add_custom_widget(\n", + " cloud_mask_issue,\n", + " \"cloud_mask_issue\",\n", + " \"Cloud Mask Issue\",\n", + " \"Switch to True if sand pixels are masked (in black) on many images\",\n", + " advanced=True,\n", + " index=-1,\n", + ")\n", + "settings_dashboard.add_custom_widget(\n", + " satellite_selection,\n", + " \"sat_list\",\n", + " \"Select Satellites\",\n", + " \"Pick multiple satellites by holding the control key\",\n", + " advanced=False,\n", + " index=1,\n", + ")\n", + "# add a custom widget which only accepts bools\n", + "image_size_filter_checkbox = ipywidgets.Checkbox(\n", + " value=True,\n", + " description=\"Enable Image Size Filter\",\n", + " indent=False, # To align the description with the label\n", + ")\n", + "settings_dashboard.add_custom_widget(\n", + " image_size_filter_checkbox,\n", + " \"image_size_filter\",\n", + " \"Image Size Filter\",\n", + " \"Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).\",\n", + " advanced=False,\n", + " index=-1,\n", + ")\n", + "\n", + "\n", + "# settings_dashboard.set_settings(settings)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dates': ['2022-01-01', '2022-01-02'],\n", + " 'max_dist_ref': 30,\n", + " 'apply_cloud_mask': True,\n", + " 'cloud_thresh': 0.8,\n", + " 'image_size_filter': 'False',\n", + " 'sand_color': 'dark',\n", + " 'sat_list': ['L9', 'S2'],\n", + " 'cloud_mask_issue': 'True'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "settings" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.set_settings(settings)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'dates': ['2022-01-01', '2022-01-02'],\n", + " 'max_dist_ref': 30,\n", + " 'min_length_sl': 500,\n", + " 'min_beach_area': 10,\n", + " 'dist_clouds': 300,\n", + " 'apply_cloud_mask': True,\n", + " 'cloud_thresh': 0.8,\n", + " 'percent_no_data': 50.0,\n", + " 'along_dist': 25,\n", + " 'min_points': 3,\n", + " 'max_std': 15.0,\n", + " 'max_range': 30.0,\n", + " 'min_chainage': -100.0,\n", + " 'multiple_inter': 'auto',\n", + " 'prc_multiple': 0.1,\n", + " 'sand_color': 'dark',\n", + " 'cloud_mask_issue': True,\n", + " 'sat_list': ['L9', 'S2'],\n", + " 'image_size_filter': False}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "settings_dashboard.get_settings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.set_settings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# the keys returned by get_settings() are the same as the keys in settings_widgets attribute\n", + "settings_dashboard.settings_widgets.keys() == settings_dashboard.get_settings().keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# the keys for the custom widgets are in the settings_widgets attribute\n", + "print('sat_list' in settings_dashboard.settings_widgets.keys())\n", + "print('sand_dropbox' in settings_dashboard.settings_widgets.keys())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg import coastseg_map\n", + "coastsegmap=coastseg_map.CoastSeg_Map()\n", + "coastsegmap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings = coastsegmap.settings\n", + "settings['image_size_filter']='False'\n", + "settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# settings_dashboard.set_settings(**settings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def str_to_bool(var: str) -> bool:\n", + " return var.lower().strip() == \"true\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "str_to_bool('TRue ')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.settings_UI import convert_date,DateBox\n", + "for setting_name, widget in settings_dashboard.settings_widgets.items():\n", + " print(setting_name)\n", + " print(type(widget.value))\n", + " if setting_name in settings:\n", + " if isinstance(widget,DateBox):\n", + " widget.value=list(map(convert_date,settings[\"dates\"]))\n", + " elif isinstance(widget.value,str):\n", + " widget.value = str(settings[setting_name])\n", + " elif isinstance(widget.value,bool):\n", + " widget.value = bool(settings[setting_name])\n", + " else:\n", + " widget.value = settings[setting_name]" + ] + }, { "cell_type": "code", "execution_count": null, @@ -76,7 +364,7 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.render()" + "settings_dashboard.get_settings()" ] }, { @@ -85,7 +373,7 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.settings_widgets" + "settings" ] }, { @@ -94,7 +382,7 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.advanced_settings.insert(0, 'sand_dropbox')" + "settings_dashboard.settings_widgets['sat_list'].value=['L5', 'L7', 'L8', 'L9', 'S2']" ] }, { @@ -103,7 +391,7 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.advanced_settings" + "settings_dashboard.settings_widgets['sat_list']" ] }, { @@ -112,8 +400,8 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.settings_widgets['sand_dropbox'] = widget\n", - "settings_dashboard.settings_widgets" + "start_date_str, end_date_str = settings[\"dates\"]\n", + "start_date_str, end_date_str" ] }, { @@ -122,12 +410,48 @@ "metadata": {}, "outputs": [], "source": [ - "index=0\n", - "settings_dashboard.advanced_settings_tab.children = (\n", - " settings_dashboard.advanced_settings_tab.children[:index]\n", - " + (ipywidgets.HTML(value=f\"{instructions}\"),)\n", - " + settings_dashboard.advanced_settings_tab.children[index:]\n", - " )" + "from coastseg.settings_UI import convert_date\n", + "settings_dashboard.settings_widgets['dates'].options=list(map(convert_date,settings[\"dates\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['dates'].value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg.settings_UI import convert_date\n", + "\n", + "start_date= convert_date(start_date_str)\n", + "end_date= convert_date(end_date_str)\n", + "start_date, end_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['dates'].options = [start_date, end_date]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['dates'].value" ] }, { @@ -136,9 +460,56 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.advanced_settings_tab" + "settings_dashboard.settings_widgets['sand_color'].value='dark'" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['sand_color'].value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['sand_color']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings = settings_dashboard.get_settings()\n", + "coastsegmap.set_settings(**settings)\n", + "coastsegmap.settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# there needs to be a way to load the settings widgets from a dictionary\n", + "# it needs to match each dictionay key to the widget name\n", + "# and if the dictionary key is not in the widget name, the it is ignored\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -154,12 +525,48 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - " if instructions is not None:\n", - " self.advanced_settings_tab.children = (\n", - " self.advanced_settings_tab.children[:index]\n", + "settings_dashboard.settings_widgets['max_dist_ref']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.advanced_settings.insert(0, 'sand_dropbox')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.advanced_settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.settings_widgets['sand_dropbox'] = widget\n", + "settings_dashboard.settings_widgets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index=0\n", + "settings_dashboard.advanced_settings_tab.children = (\n", + " settings_dashboard.advanced_settings_tab.children[:index]\n", " + (ipywidgets.HTML(value=f\"{instructions}\"),)\n", - " + self.advanced_settings_tab.children[index:]\n", + " + settings_dashboard.advanced_settings_tab.children[index:]\n", " )" ] }, @@ -169,7 +576,7 @@ "metadata": {}, "outputs": [], "source": [ - "settings_dashboard.add_custom_widget(widget,instructions,advanced=True,index=0)" + "settings_dashboard.render()" ] }, { @@ -178,18 +585,21 @@ "metadata": {}, "outputs": [], "source": [ - " def get_sand_dropbox(self):\n", - " sand_color_instr = HTML(\n", - " value=\"Sand Color\\\n", - "
- Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)
\"\n", - " )\n", - " self.sand_dropdown = ipywidgets.Dropdown(\n", - " options=[\"default\", \"latest\", \"dark\", \"bright\"],\n", - " value=\"default\",\n", - " description=\"sand_color :\",\n", - " disabled=False,\n", - " )\n", - " return VBox([sand_color_instr, self.sand_dropdown])" + "if instructions is not None:\n", + " self.advanced_settings_tab.children = (\n", + " self.advanced_settings_tab.children[:index]\n", + " + (ipywidgets.HTML(value=f\"{instructions}\"),)\n", + " + self.advanced_settings_tab.children[index:]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings_dashboard.add_custom_widget(widget,instructions,advanced=True,index=0)" ] }, { From 5f687d4965f646da4b5560485bcf1f36c85383c4 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 27 Nov 2023 16:57:18 -0800 Subject: [PATCH 36/87] add more documentation to tide_correction.py --- src/coastseg/tide_correction.py | 37 ++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/coastseg/tide_correction.py b/src/coastseg/tide_correction.py index 39ba695a..01010b97 100644 --- a/src/coastseg/tide_correction.py +++ b/src/coastseg/tide_correction.py @@ -67,6 +67,19 @@ def correct_all_tides( beach_slope: float, use_progress_bar: bool = True, ): + """ + Corrects the tides for all regions of interest (ROIs). + + This function validates the existence of a tide model, loads the regions the tide model was clipped to from a geojson file, + and corrects the tides for each ROI. It logs the progress and updates a progress bar if use_progress_bar is True. + + Args: + roi_ids (Collection): The IDs of the ROIs to correct the tides for. + session_name (str): The name of the session containing the extracted shorelines. + reference_elevation (float): The reference elevation to use for the tide correction. + beach_slope (float): The beach slope to use for the tide correction. + use_progress_bar (bool, optional): Whether to display a progress bar. Defaults to True. + """ # validate tide model exists at CoastSeg/tide_model model_location = get_tide_model_location() # load the regions the tide model was clipped to from geojson file @@ -127,9 +140,12 @@ def save_transect_settings( FileNotFoundError: If the specified settings file does not exist in the given session path. """ - transects_settings = file_utilities.read_json_file( - os.path.join(session_path, filename), raise_error=True - ) + filepath = os.path.join(session_path, filename) + transects_settings = {} + if os.path.exists(filepath): + transects_settings = file_utilities.read_json_file( + os.path.join(session_path, filename), raise_error=True + ) transects_settings["reference_elevation"] = reference_elevation transects_settings["beach_slope"] = beach_slope file_utilities.to_file(transects_settings, os.path.join(session_path, filename)) @@ -292,6 +308,21 @@ def setup_tide_model_config(model_path: str) -> dict: def get_tide_model_location(location: str = "tide_model"): + """ + Validates the existence of a tide model at the specified location and returns the absolute path of the location. + + This function checks if a tide model exists at the given location. If the model exists, it returns the absolute path + of the location. If the model does not exist, it raises an exception. + + Args: + location (str, optional): The location to check for the tide model. Defaults to "tide_model". + + Returns: + str: The absolute path of the location if the tide model exists. + + Raises: + Exception: If the tide model does not exist at the specified location. + """ logger.info(f"Checking if tide model exists at {location}") if validate_tide_model_exists(location): return os.path.abspath(location) From d4a0b6a18b914675db2883df7d58231d67c40ee6 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:49:39 -0800 Subject: [PATCH 37/87] update dist cloud min to 0 --- src/coastseg/settings_UI.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coastseg/settings_UI.py b/src/coastseg/settings_UI.py index 174ba736..789793de 100644 --- a/src/coastseg/settings_UI.py +++ b/src/coastseg/settings_UI.py @@ -249,7 +249,7 @@ def create_setting_widget( widget = ipywidgets.IntSlider( description="Distance to Clouds", value=300, - min=1, + min=0, max=1000, step=1, style={"description_width": "initial"}, @@ -261,8 +261,8 @@ def create_setting_widget( widget = ipywidgets.IntSlider( description="Minimum Beach Area", min=10, - max=100, - value=10, + max=10000, + value=1000, style={"description_width": "initial"}, ) instructions = ipywidgets.HTML( From 0763eecfc2784ccaece1af1b91717fcf339ed039 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:50:00 -0800 Subject: [PATCH 38/87] update file utilites read_json_file to return {} --- src/coastseg/file_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coastseg/file_utilities.py b/src/coastseg/file_utilities.py index 41fafc6f..3e3627d4 100644 --- a/src/coastseg/file_utilities.py +++ b/src/coastseg/file_utilities.py @@ -132,7 +132,7 @@ def read_json_file(json_file_path: str, raise_error=False, encoding="utf-8") -> f"Model settings file does not exist at {json_file_path}" ) else: - return None + return {} with open(json_file_path, "r", encoding=encoding) as f: data = json.load(f) return data From ce7ae7a99d24e2956e70860a33fd761dc9c0d7ea Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:50:24 -0800 Subject: [PATCH 39/87] update map_UI to use settingsUI --- src/coastseg/map_UI.py | 612 ++++++++++------------------------------- 1 file changed, 148 insertions(+), 464 deletions(-) diff --git a/src/coastseg/map_UI.py b/src/coastseg/map_UI.py index 00ed9978..0d543173 100644 --- a/src/coastseg/map_UI.py +++ b/src/coastseg/map_UI.py @@ -19,17 +19,14 @@ from ipywidgets import HBox from ipywidgets import VBox from ipywidgets import Layout -from ipywidgets import DatePicker from ipywidgets import HTML from ipywidgets import BoundedFloatText from ipywidgets import SelectMultiple from ipywidgets import Output -from ipywidgets import Select -from ipywidgets import BoundedIntText from ipywidgets import FloatText from ipywidgets import Accordion -from ipywidgets import Checkbox +from coastseg.settings_UI import Settings_UI logger = logging.getLogger(__name__) @@ -58,6 +55,40 @@ def convert_date(date_str): raise ValueError(f"Invalid date: {date_str}. Expected format: 'YYYY-MM-DD'.{e}") +def format_as_html(settings: dict): + """ + Generates HTML content displaying the settings. + Args: + settings (dict): The dictionary containing the settings. + Returns: + str: The HTML content representing the settings. + """ + return f""" +

Settings

+

sat_list: {settings.get("sat_list", "unknown")}

+

dates: {settings.get("dates", "unknown")}

+

landsat_collection: {settings.get("landsat_collection", "unknown")}

+

cloud_thresh: {settings.get("cloud_thresh", "unknown")}

+

dist_clouds: {settings.get("dist_clouds", "unknown")}

+

output_epsg: {settings.get("output_epsg", "unknown")}

+

save_figure: {settings.get("save_figure", "unknown")}

+

min_beach_area: {settings.get("min_beach_area", "unknown")}

+

min_length_sl: {settings.get("min_length_sl", "unknown")}

+

apply_cloud_mask: {settings.get("apply_cloud_mask", "unknown")}

+

image_size_filter: {settings.get("image_size_filter", "unknown")}

+

cloud_mask_issue: {settings.get("cloud_mask_issue", "unknown")}

+

sand_color: {settings.get("sand_color", "unknown")}

+

max_dist_ref: {settings.get("max_dist_ref", "unknown")}

+

along_dist: {settings.get("along_dist", "unknown")}

+

min_points: {settings.get("min_points", "unknown")}

+

max_std: {settings.get("max_std", "unknown")}

+

max_range: {settings.get("max_range", "unknown")}

+

min_chainage: {settings.get("min_chainage", "unknown")}

+

multiple_inter: {settings.get("multiple_inter", "unknown")}

+

prc_multiple: {settings.get("prc_multiple", "unknown")}

+ """ + + class UI: # all instances of UI will share the same debug_view # this means that UI and coastseg_map must have a 1:1 relationship @@ -67,9 +98,98 @@ class UI: download_view = Output(layout={"border": "1px solid black"}) preview_view = Output() + def get_settings_dashboard(self, basic_settings: dict = {}): + if not basic_settings: + basic_settings = [ + "dates", + "max_dist_ref", + "min_length_sl", + "min_beach_area", + "dist_clouds", + "apply_cloud_mask", + "cloud_thresh", + "percent_no_data", + ] + if not self.settings_dashboard: + self.settings_dashboard = Settings_UI(basic_settings) + return self.settings_dashboard + + def add_custom_widgets(self, settings_dashboard: Settings_UI): + # create dropdown to select sand color + instructions = "Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)" + sand_dropdown = ipywidgets.Dropdown( + options=["default", "latest", "dark", "bright"], + value="default", + description="sand_color :", + disabled=False, + ) + # create dropdown to select mulitple satellites + satellite_selection = ipywidgets.SelectMultiple( + options=["L5", "L7", "L8", "L9", "S2"], + value=["L8"], + description="Satellites", + disabled=False, + ) + # create checkbox to control image size filter + image_size_filter_checkbox = ipywidgets.Checkbox( + value=True, + description="Enable Image Size Filter", + indent=False, # To align the description with the label + ) + # create toggle to select cloud mask issue + cloud_mask_issue = ipywidgets.ToggleButtons( + options=["False", "True"], + description=" Switch to True if sand pixels are masked (in black) on many images", + disabled=False, + button_style="", + tooltips=[ + "No cloud mask issue", + "Fix cloud masking", + ], + ) + settings_dashboard.add_custom_widget( + sand_dropdown, + "sand_color", + "Select Sand Color", + instructions, + advanced=True, + index=0, + ) + settings_dashboard.add_custom_widget( + cloud_mask_issue, + "cloud_mask_issue", + "Cloud Mask Issue", + "Switch to True if sand pixels are masked (in black) on many images", + advanced=True, + index=-1, + ) + + settings_dashboard.add_custom_widget( + satellite_selection, + "sat_list", + "Select Satellites", + "Pick multiple satellites by holding the control key", + advanced=False, + index=1, + ) + settings_dashboard.add_custom_widget( + image_size_filter_checkbox, + "image_size_filter", + "Image Size Filter", + "Activate to filter out images that are smaller than 60% of the Region of Interest (ROI).", + advanced=False, + index=-1, + ) + return settings_dashboard + def __init__(self, coastseg_map, **kwargs): # save an instance of coastseg_map self.coastseg_map = coastseg_map + # create the settings UI controller + self.settings_dashboard = None + self.settings_dashboard = self.get_settings_dashboard() + # create custom widgets and add to settings dashboard + self.settings_dashboard = self.add_custom_widgets(self.settings_dashboard) self.session_name = "" self.session_directory = "" @@ -108,16 +228,10 @@ def __init__(self, coastseg_map, **kwargs): ) self.load_session_button.on_click(self.on_load_session_clicked) - self.load_settings_button = Button( - description="Load settings", icon="file-o", style=self.load_style - ) - self.load_settings_button.on_click(self.on_load_settings_clicked) - self.settings_button = Button( description="Save Settings", icon="floppy-o", style=self.action_style ) self.settings_button.on_click(self.save_settings_clicked) - self.settings_btn_row = VBox([self.settings_button, self.load_settings_button]) self.load_file_instr = HTML( value="

Load Feature from File

\ @@ -395,383 +509,12 @@ def get_view_settings_vbox(self) -> VBox: ) update_settings_btn.on_click(self.update_settings_btn_clicked) self.settings_html = HTML() - self.settings_html.value = self.get_settings_html( - self.coastseg_map.get_settings() - ) + self.settings_html.value = format_as_html(self.coastseg_map.get_settings()) view_settings_vbox = VBox([self.settings_html, update_settings_btn]) html_settings_accordion = Accordion(children=[view_settings_vbox]) html_settings_accordion.set_title(0, "View Settings") return html_settings_accordion - def get_advanced_settings_section(self): - # declare settings widgets - settings = { - "sand_dropbox": self.get_sand_dropbox(), - "cloud_mask_issue": self.get_cloud_issue_toggle(), - "cloud_slider": self.get_cloud_slider(), - "along_dist": self.get_alongshore_distance_slider(), - "min_points": self.get_min_points_text(), - "max_std": self.get_max_std_text(), - "max_range": self.get_max_range_text(), - "min_chainage": self.get_min_chainage_text(), - "multiple_inter": self.get_outliers_mode(), - "prc_multiple": self.get_prc_multiple_text(), - } - - # create settings vbox - settings_vbox = VBox([widget for widget_name, widget in settings.items()]) - return settings_vbox - - def get_basic_settings_section(self): - # declare settings widgets - settings = { - "dates_picker": self.get_dates_picker(), - "satellite_radio": self.get_satellite_radio(), - "min_length_sl_slider": self.get_min_length_sl_slider(), - "beach_area_slider": self.get_beach_area_slider(), - "shoreline_buffer_slider": self.get_shoreline_buffer_slider(), - "apply_cloud_mask": self.get_apply_could_mask_toggle(), - "cloud_threshold_slider": self.get_cloud_threshold_slider(), - "image_size_filter": self.get_image_size_filter(), - } - - # create settings vbox - settings_vbox = VBox([widget for widget_name, widget in settings.items()]) - return settings_vbox - - def get_dates_picker(self): - # Date Widgets - self.start_date = DatePicker( - description="Start Date", - value=datetime.date(2018, 12, 1), - disabled=False, - ) - self.end_date = DatePicker( - description="End Date", - value=datetime.date(2019, 3, 1), # 2019, 1, 1 - disabled=False, - ) - date_instr = HTML(value="Pick a date:", layout=Layout(padding="10px")) - dates_box = HBox([self.start_date, self.end_date]) - dates_vbox = VBox([date_instr, dates_box]) - return dates_vbox - - def get_cloud_threshold_slider(self): - instr = HTML( - value="Cloud Threshold \ -
- Maximum percentage of cloud pixels allowed" - ) - self.cloud_threshold_slider = ipywidgets.FloatSlider( - value=0.5, - min=0, - max=1, - step=0.01, - description="cloud_thres :", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format=".2f", - style={"description_width": "initial"}, - ) - return VBox([instr, self.cloud_threshold_slider]) - - def get_image_size_filter(self): - instr = HTML( - value='
' - "Image Size Filter Control:
Activate to filter out images that are smaller than 80% of the Region of Interest (ROI)." - "
" - ) - # Create a checkbox with a clear and concise description - self.image_size_filter_checkbox = Checkbox( - value=True, # Initially unchecked - description="Enable Image Size Filter", - disabled=False, # Enable user interaction - indent=False, # To align the description with the label - ) - return VBox([instr, self.image_size_filter_checkbox]) - - def get_cloud_issue_toggle(self): - instr = HTML( - value="Cloud Mask Issue Toggle \ -
- Defaults to False. Switch to True if sand pixels are masked (in black) on many images" - ) - self.cloud_issue_toggle = ipywidgets.ToggleButtons( - options=["False", "True"], - description=" Switch to True if sand pixels are masked (in black) on many images", - disabled=False, - button_style="", - tooltips=[ - "No cloud mask issue", - "Fix cloud masking", - ], - ) - return VBox([instr, self.cloud_issue_toggle]) - - def get_apply_could_mask_toggle(self): - instr = HTML( - value="Cloud Mask Toggle \ -
- Defaults to True. Switch to False to turn off cloud masking." - ) - self.apply_cloud_mask_toggle = ipywidgets.ToggleButtons( - options=["True", "False"], - description="Apply Cloud Masking", - disabled=False, - tooltips=[ - "Cloud Masking On", - "Cloud Masking Off", - ], - ) - return VBox([instr, self.apply_cloud_mask_toggle]) - - def get_sand_dropbox(self): - sand_color_instr = HTML( - value="Sand Color\ -
- Sand color on beach for model to detect 'dark' (grey/black) 'bright' (white)
" - ) - self.sand_dropdown = ipywidgets.Dropdown( - options=["default", "latest", "dark", "bright"], - value="default", - description="sand_color :", - disabled=False, - ) - return VBox([sand_color_instr, self.sand_dropdown]) - - def get_alongshore_distance_slider(self): - # returns slider to control beach area slider - instr = HTML( - value="Alongshore Distance:\ -
- Along-shore distance over which to consider shoreline points to compute median intersection with transects" - ) - self.alongshore_distance_slider = ipywidgets.IntSlider( - value=25, - min=10, - max=100, - step=1, - description="along_dist (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([instr, self.alongshore_distance_slider]) - - def get_cloud_slider(self): - # returns slider to control beach area slider - cloud_instr = HTML( - value=" Cloud Distance\ -
- Allowed distance from extracted shoreline to detected clouds\ -
- Any extracted shorelines within this distance to any clouds will be dropped" - ) - - self.cloud_slider = ipywidgets.IntSlider( - value=300, - min=0, - max=1000, - step=1, - description="dist_clouds (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([cloud_instr, self.cloud_slider]) - - def get_shoreline_buffer_slider(self): - # returns slider to control beach area slider - shoreline_buffer_instr = HTML( - value="Reference Shoreline Buffer:\ -
- Buffer around reference shorelines in which shorelines can be extracted" - ) - - self.shoreline_buffer_slider = ipywidgets.IntSlider( - value=100, - min=5, - max=1000, - step=1, - description="max_dist_ref (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([shoreline_buffer_instr, self.shoreline_buffer_slider]) - - def get_beach_area_slider(self): - # returns slider to control beach area slider - beach_area_instr = HTML( - value="Minimum Beach Area \ -
- Minimum area (sqm) for object to be labelled as beach" - ) - - self.beach_area_slider = ipywidgets.IntSlider( - value=1000, - min=10, - max=10000, - step=10, - description="min_beach_area (sqm):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([beach_area_instr, self.beach_area_slider]) - - def get_min_chainage_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value=" Max Landward Distance \ -
- Max distance landward of the transect origin that an intersection is accepted, beyond this point a NaN is returned." - ) - - # min_chainage: (in metres) furthest distance landward of the transect origin that an intersection is accepted, beyond this point a NaN is returned. - self.min_chainage_text = BoundedFloatText( - value=-100.0, - min=-500.0, - max=-1.0, - step=-1.0, - description="min_chainage (m)", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.min_chainage_text]) - - def get_prc_multiple_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Percentage of points std > max_std\ -
- Percentage of points whose std > max_std that will be set to 'max'.Only in 'auto' mode." - ) - # percentage of points whose std > max_std that will be set to 'max' - # percentage of data points where the std is larger than the user-defined max - # 'prc_multiple': percentage to use in 'auto' mode to switch from 'nan' to 'max' - self.prc_multiple_text = BoundedFloatText( - value=0.1, - min=0.0, - max=1.0, - step=0.01, - description="prc_multiple :", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.prc_multiple_text]) - - def get_max_range_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Max Range \ -
- Max range for shoreline points within the alongshore range, if range is above this value a NaN is returned for this intersection" - ) - # max_range: (in metres) maximum RANGE for the shoreline points within the alongshore range, if RANGE is above this value a NaN is returned for this intersection. - self.max_range_text = BoundedFloatText( - value=30.0, - min=1.0, - max=100.0, - step=1.0, - description="max_range (m)", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.max_range_text]) - - def get_outliers_mode(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Outliers Mode\ -
-How to deal with multiple shoreline intersections." - ) - # controls multiple_inter: ('auto','nan','max') defines how to deal with multiple shoreline intersections - self.outliers_mode = Select( - options=["auto", "nan", "max"], - value="auto", - description="multiple_inter :", - style={"description_width": "initial"}, - ) - return VBox([label, self.outliers_mode]) - - def get_max_std_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Maximum STD \ -
- Maximum STD for the shoreline points within the alongshore range" - ) - - # max_std: (in metres) maximum STD for the shoreline points within the alongshore range, if STD is above this value a NaN is returned for this intersection. - self.max_std_text = BoundedFloatText( - value=15.0, - min=1.0, - max=100.0, - step=1.0, - description="max_std (m):", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.max_std_text]) - - def get_min_points_text(self) -> VBox: - # returns slider to control beach area slider - label = HTML( - value="Minimum Number Shoreline of Points \ -
- Minimum number of shoreline points to calculate an intersection" - ) - - # min_points: minimum number of shoreline points to calculate an intersection. - self.min_points_text = BoundedIntText( - value=3, - min=1, - max=100, - step=1, - description="min_points :", - style={"description_width": "initial"}, - disabled=False, - ) - return VBox([label, self.min_points_text]) - - def get_min_length_sl_slider(self): - # returns slider to control beach area slider - min_length_sl_instr = HTML(value="Minimum shoreline length") - - self.min_length_sl_slider = ipywidgets.IntSlider( - value=500, - min=50, - max=1000, - step=1, - description="min_length_sl (m):", - disabled=False, - continuous_update=False, - orientation="horizontal", - readout=True, - readout_format="d", - style={"description_width": "initial"}, - ) - return VBox([min_length_sl_instr, self.min_length_sl_slider]) - - def get_satellite_radio(self): - # satellite selection widgets - satellite_instr = HTML( - value="Pick multiple satellites:\ -
- Pick multiple satellites by holding the control key \ -
- images after 2022/01/01 will be automatically downloaded from Collection 2 ", - layout=Layout(padding="10px"), - ) - - self.satellite_selection = SelectMultiple( - options=["L5", "L7", "L8", "L9", "S2"], - value=["L8"], - description="Satellites", - disabled=False, - ) - satellite_vbox = VBox([satellite_instr, self.satellite_selection]) - return satellite_vbox - def save_to_file_buttons(self): # save to file buttons save_instr = HTML( @@ -949,7 +692,7 @@ def update_settings_selection( if "min_points" in settings: self.min_points_text.value = settings.get("min_points", 3) - def get_settings_html(self, settings: dict): + def format_as_html(self, settings: dict): """ Generates HTML content displaying the settings. @@ -1068,24 +811,12 @@ def create_dashboard(self): [self.instr_create_roi, ROI_btns_box, load_buttons], layout=Layout(margin="0px 5px 5px 0px"), ) - # option 1 - settings_accordion = Accordion( - children=[ - self.get_basic_settings_section(), - self.get_advanced_settings_section(), - ] - ) - # settings_accordion.set_title(0, "Settings") - settings_accordion.set_title(0, "Basic Settings") - settings_accordion.set_title(1, "Advanced Settings") - settings_accordion.selected_index = 0 - self.settings_row = HBox( [ VBox( [ - settings_accordion, - self.settings_btn_row, + self.settings_dashboard.render(), + self.settings_button, ] ), self.get_view_settings_vbox(), @@ -1114,9 +845,7 @@ def update_settings_btn_clicked(self, btn): UI.debug_view.clear_output(wait=True) # Update settings in view settings section try: - self.settings_html.value = self.get_settings_html( - self.coastseg_map.get_settings() - ) + self.settings_html.value = format_as_html(self.coastseg_map.get_settings()) except Exception as error: exception_handler.handle_exception(error, self.coastseg_map.warning_box) @@ -1163,38 +892,19 @@ def load_button_clicked(self, btn): @debug_view.capture(clear_output=True) def save_settings_clicked(self, btn): - if not self.satellite_selection.value: + # get the settings from the settings dashboard + settings = self.settings_dashboard.get_settings() + sat_list = settings.get("sat_list", []) + if not sat_list: try: raise Exception("Must select at least one satellite first") except Exception as error: # renders error message as a box on map exception_handler.handle_exception(error, self.coastseg_map.warning_box) - settings = { - "sat_list": list(self.satellite_selection.value), - "dates": [str(self.start_date.value), str(self.end_date.value)], - "apply_cloud_mask": str_to_bool(self.apply_cloud_mask_toggle.value), - "image_size_filter": bool(self.image_size_filter_checkbox.value), - "max_dist_ref": self.shoreline_buffer_slider.value, - "along_dist": self.alongshore_distance_slider.value, - "dist_clouds": self.cloud_slider.value, - "min_beach_area": self.beach_area_slider.value, - "cloud_mask_issue": str_to_bool(self.cloud_issue_toggle.value), - "min_length_sl": self.min_length_sl_slider.value, - "sand_color": str(self.sand_dropdown.value), - "cloud_thresh": self.cloud_threshold_slider.value, - "min_points": self.min_points_text.value, - "max_std": self.max_std_text.value, - "max_range": self.max_range_text.value, - "min_chainage": self.min_chainage_text.value, - "multiple_inter": self.outliers_mode.value, - "prc_multiple": self.prc_multiple_text.value, - } + # save the settings to coastseg_map try: self.coastseg_map.set_settings(**settings) - self.settings_html.value = self.get_settings_html( - self.coastseg_map.get_settings() - ) - self.update_settings_selection(self.coastseg_map.get_settings()) + self.settings_html.value = format_as_html(self.coastseg_map.get_settings()) except Exception as error: # renders error message as a box on map exception_handler.handle_exception(error, self.coastseg_map.warning_box) @@ -1271,38 +981,6 @@ def clear_row(self, row: HBox): row.children[index].close() row.children = [] - @debug_view.capture(clear_output=True) - def on_load_settings_clicked(self, button): - self.settings_chooser_row = HBox([]) - - # Prompt user to select a config geojson file - def load_callback(filechooser: FileChooser) -> None: - try: - if filechooser.selected: - self.coastseg_map.load_settings(filechooser.selected) - self.settings_html.value = self.get_settings_html( - self.coastseg_map.get_settings() - ) - self.update_settings_selection(self.coastseg_map.get_settings()) - except Exception as error: - # renders error message as a box on map - exception_handler.handle_exception(error, self.coastseg_map.warning_box) - - # create instance of chooser that calls load_callback - file_chooser = common.create_file_chooser( - load_callback, title="Select a settings json file", filter_pattern="*.json" - ) - # clear row and close all widgets in row_4 before adding new file_chooser - self.clear_row(self.settings_chooser_row) - - # add instance of file_chooser to row 4 - self.settings_chooser_row.children = [file_chooser] - self.settings_btn_row.children = [ - self.settings_button, - self.load_settings_button, - self.settings_chooser_row, - ] - @debug_view.capture(clear_output=True) def on_load_session_clicked(self, button): # Prompt user to select a config geojson file @@ -1310,15 +988,21 @@ def load_callback(filechooser: FileChooser) -> None: try: if filechooser.selected: self.coastseg_map.map.default_style = {"cursor": "wait"} + print(f"filechooser.selected: {filechooser.selected}") + print(f"Loading session: {os.path.abspath(filechooser.selected)}") + print(f"Settings before \n {self.coastseg_map.get_settings()}") + # load the session into coastseg_map and this should update the settings in coastseg_map self.coastseg_map.load_fresh_session(filechooser.selected) - # self.session_name_text.value = os.path.basename( - # os.path.abspath(filechooser.selected) - # ) - self.session_name_text.value = self.coastseg_map.get_session_name() - self.settings_html.value = self.get_settings_html( - self.coastseg_map.get_settings() + print( + f"Session Loaded and settings\n {self.coastseg_map.get_settings()}" ) - self.update_settings_selection(self.coastseg_map.get_settings()) + # update the session name text box with the session name + self.session_name_text.value = self.coastseg_map.get_session_name() + # update the settings dashboard with the settings from the loaded session + settings = self.coastseg_map.get_settings() + self.settings_dashboard.set_settings(settings) + self.settings_html.value = format_as_html(settings) + # self.update_settings_selection(self.coastseg_map.get_settings()) self.coastseg_map.map.default_style = {"cursor": "default"} except Exception as error: # renders error message as a box on map From 0f5569a46311c803698a12f18dd5d9b50569f90e Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:51:09 -0800 Subject: [PATCH 40/87] update load settings function --- src/coastseg/coastseg_map.py | 230 ++++++++++++++++------------------- src/coastseg/common.py | 87 ++++++++++++- 2 files changed, 188 insertions(+), 129 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index b0ffc980..d8093436 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -76,6 +76,64 @@ def link_trash_list(self, widget): traitlets.dlink((self, "trash_list"), (widget, "options")) +def filter_settings(**kwargs): + # Check if any of the keys are missing + # if any keys are missing set the default value + default_settings = { + "landsat_collection": "C02", + "dates": ["2017-12-01", "2018-01-01"], + "sat_list": ["L8"], + "cloud_thresh": 0.5, + "dist_clouds": 300, + "output_epsg": 4326, + "check_detection": False, + "adjust_detection": False, + "save_figure": True, + "min_beach_area": 4500, + "min_length_sl": 100, + "cloud_mask_issue": False, + "sand_color": "default", + "pan_off": "False", + "max_dist_ref": 25, + "along_dist": 25, + "min_points": 3, + "max_std": 15, + "max_range": 30, + "min_chainage": -100, + "multiple_inter": "auto", + "prc_multiple": 0.1, + "apply_cloud_mask": True, + "image_size_filter": True, + } + + # Function to parse dates with flexibility for different formats + def parse_date(date_str): + for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"): + try: + return datetime.strptime(date_str, fmt).strftime("%Y-%m-%d") + except ValueError: + continue + raise ValueError(f"Date format for {date_str} not recognized.") + + settings = {} + + # Filter kwargs to keep only keys that are in default_settings + filtered_kwargs = {k: v for k, v in kwargs.items() if k in default_settings} + + # Update settings with filtered kwargs + settings.update(filtered_kwargs) + + # Special handling for 'dates' + if "dates" in filtered_kwargs: + settings["dates"] = [parse_date(d) for d in filtered_kwargs["dates"]] + + # Set default values for missing keys + for key, value in default_settings.items(): + settings.setdefault(key, value) + + return settings + + class CoastSeg_Map: def __init__(self, **kwargs): # Basic settings and configurations @@ -343,7 +401,7 @@ def load_session_files(self, dir_path: str) -> None: self.load_metadata(ids=list(self.rois.roi_settings.keys())) else: logger.warning(f"No ROIs were able to have their metadata loaded.") - # load in settings files + # load in setting from shoreline_settings.json and transects_settings.json for file_name in os.listdir(dir_path): file_path = os.path.join(dir_path, file_name) if not os.path.isfile(file_path): @@ -361,7 +419,8 @@ def load_session_files(self, dir_path: str) -> None: "dist_clouds", "percent_no_data", ] - self.load_settings(file_path, keys) + settings = common.load_settings(file_path, keys) + self.set_settings(**settings) elif file_name == "transects_settings.json": keys = [ "max_std", @@ -372,7 +431,8 @@ def load_session_files(self, dir_path: str) -> None: "multiple_inter", "prc_multiple", ] - self.load_settings(file_path, keys) + settings = common.load_settings(file_path, keys) + self.set_settings(**settings) if not config_loaded: logger.info(f"Not all config files not found at {dir_path}") @@ -414,6 +474,15 @@ def load_session_from_directory(self, dir_path: str) -> None: self.load_extracted_shorelines_to_map(1) def load_fresh_session(self, session_path: str) -> None: + """ + Load a fresh session by removing all the old features from the map and loading a new session. + + Args: + session_path (str): The path to the session directory + + Returns: + None + """ # remove all the old features from the map self.remove_all() self.load_session(session_path) @@ -455,7 +524,6 @@ def get_parent_session_name(session_path: str) -> str: session_name = get_parent_session_name(session_path) logger.info(f"session_name: {session_name} session_path: {session_path}") - # session_name = os.path.basename(session_path) self.set_session_name(session_name) logger.info(f"Loading session from session directory: {session_path}") @@ -482,97 +550,6 @@ def update_roi_ids_with_extracted_shorelines(self, rois: ROI): else: self.id_container.ids = ids_with_extracted_shorelines - def load_settings( - self, - filepath: str = "", - keys: set = ( - "sat_list", - "dates", - "sand_color", - "cloud_thresh", - "cloud_mask_issue", - "min_beach_area", - "min_length_sl", - "output_epsg", - "sand_color", - "pan_off", - "max_dist_ref", - "dist_clouds", - "percent_no_data", - "max_std", - "min_points", - "along_dist", - "max_range", - "min_chainage", - "multiple_inter", - "prc_multiple", - ), - load_nested_settings: bool = True, - ): - """ - Loads settings from a JSON file and applies them to the object. - - Args: - filepath (str, optional): The filepath to the JSON file containing the settings. Defaults to an empty string. - load_nested_setting (bool, optional): Load settings from a nest subdictionary 'settings' or not. - keys (list or set, optional): A list of keys specifying which settings to load from the JSON file. If empty, no settings are loaded. Defaults to a set with the following - "sat_list", - "dates", - "cloud_thresh", - "cloud_mask_issue", - "min_beach_area", - "min_length_sl", - "output_epsg", - "sand_color", - "pan_off", - "max_dist_ref", - "dist_clouds", - "percent_no_data", - "max_std", - "min_points", - "along_dist", - "max_range", - "min_chainage", - "multiple_inter", - "prc_multiple". - - Returns: - None - - """ - # Convert keys to a list if a set is passed - if isinstance(keys, set): - keys = list(keys) - - new_settings = file_utilities.read_json_file(filepath, raise_error=False) - logger.info( - f"all of new settings read from file : {filepath} \n {new_settings}" - ) - - nested_settings = new_settings.get("settings", {}) - logger.info( - f"all of new nested settings read from file : {filepath} \n {nested_settings }" - ) - - if new_settings is None: - new_settings = {} - - # Load only settings with provided keys - if keys: - new_settings = {k: new_settings[k] for k in keys if k in new_settings} - if nested_settings: - nested_settings = { - k: nested_settings[k] for k in keys if k in nested_settings - } - - if new_settings != {}: - self.set_settings(**new_settings) - if nested_settings != {} and load_nested_settings: - self.set_settings(**nested_settings) - logger.info( - f"Loaded new_settings from {filepath}:\n new self.settings {self.settings}" - ) - def load_gdf_config(self, filepath: str) -> None: """Load features from geodataframe located in geojson file at filepath onto map. @@ -848,14 +825,17 @@ def load_json_config(self, filepath: str, data_path: str) -> None: MissingDirectoriesError: If one or more directories specified in the config file are missing """ - logger.info(f"filepath: {filepath}") + logger.info(f"Loading json config from filepath: {filepath}") exception_handler.check_if_None(self.rois) json_data = file_utilities.read_json_file(filepath, raise_error=True) json_data = json_data or {} # Replace coastseg_map.settings with settings from config file - self.set_settings(**json_data.get("settings", {})) + settings = common.load_settings( + filepath, + ) + self.set_settings(**settings) # creates a dictionary mapping ROI IDs to their extracted settings from json_data roi_settings = self._extract_and_validate_roi_settings(json_data, data_path) @@ -999,46 +979,44 @@ def set_settings(self, **kwargs): "landsat_collection": "C02", "dates": ["2017-12-01", "2018-01-01"], "sat_list": ["L8"], - "cloud_thresh": 0.5, # threshold on maximum cloud cover - "dist_clouds": 300, # ditance around clouds where shoreline can't be mapped - "output_epsg": 4326, # epsg code of spatial reference system desired for the output - # quality control: - # if True, shows each shoreline detection to the user for validation + "cloud_thresh": 0.5, + "dist_clouds": 300, + "output_epsg": 4326, "check_detection": False, - # if True, allows user to adjust the position of each shoreline by changing the threshold "adjust_detection": False, - "save_figure": True, # if True, saves a figure showing the mapped shoreline for each image - # minimum area (in metres^2) for an object to be labelled as a beach + "save_figure": True, "min_beach_area": 4500, - # minimum length (in metres) of shoreline perimeter to be valid "min_length_sl": 100, - # switch this parameter to True if sand pixels are masked (in black) on many images "cloud_mask_issue": False, - # 'default', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches) "sand_color": "default", - "pan_off": "False", # if True, no pan-sharpening is performed on Landsat 7,8 and 9 imagery + "pan_off": "False", "max_dist_ref": 25, - "along_dist": 25, # along-shore distance to use for computing the intersection - "min_points": 3, # minimum number of shoreline points to calculate an intersection - "max_std": 15, # max std for points around transect - "max_range": 30, # max range for points around transect - "min_chainage": -100, # largest negative value along transect (landwards of transect origin) - "multiple_inter": "auto", # mode for removing outliers ('auto', 'nan', 'max') - "prc_multiple": 0.1, # percentage of the time that multiple intersects are present to use the max + "along_dist": 25, + "min_points": 3, + "max_std": 15, + "max_range": 30, + "min_chainage": -100, + "multiple_inter": "auto", + "prc_multiple": 0.1, "apply_cloud_mask": True, - "image_size_filter": True, # True means images are filtered out by size + "image_size_filter": True, } - self.settings.update(kwargs) - if "dates" in kwargs.keys(): - updated_dates = [] - self.settings["dates"] = kwargs["dates"] - for date_str in kwargs["dates"]: + + # Function to parse dates with flexibility for different formats + def parse_date(date_str): + for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"): try: - dt = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S") + return datetime.strptime(date_str, fmt).strftime("%Y-%m-%d") except ValueError: - dt = datetime.strptime(date_str, "%Y-%m-%d") - updated_dates.append(dt.strftime("%Y-%m-%d")) - self.settings["dates"] = updated_dates + continue + raise ValueError(f"Date format for {date_str} not recognized.") + + # Update the settings with the new key-value pairs + self.settings.update(kwargs) + + # Special handling for 'dates' + if "dates" in kwargs: + self.settings["dates"] = [parse_date(d) for d in kwargs["dates"]] for key, value in self.default_settings.items(): self.settings.setdefault(key, value) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 387c2d8f..584ffd54 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -41,6 +41,87 @@ logger = logging.getLogger(__name__) +def load_settings( + filepath: str = "", + keys: set = ( + "model_session_path", + "apply_cloud_mask", + "image_size_filter", + "pan_off", + "save_figure", + "adjust_detection", + "check_detection", + "landsat_collection", + "sat_list", + "dates", + "sand_color", + "cloud_thresh", + "cloud_mask_issue", + "min_beach_area", + "min_length_sl", + "output_epsg", + "sand_color", + "pan_off", + "max_dist_ref", + "dist_clouds", + "percent_no_data", + "max_std", + "min_points", + "along_dist", + "max_range", + "min_chainage", + "multiple_inter", + "prc_multiple", + ), +): + """ + Loads settings from a JSON file and applies them to the object. + Args: + filepath (str, optional): The filepath to the JSON file containing the settings. Defaults to an empty string. + keys (list or set, optional): A list of keys specifying which settings to load from the JSON file. If empty, no settings are loaded. Defaults to a set with the following + "sat_list", + "dates", + "cloud_thresh", + "cloud_mask_issue", + "min_beach_area", + "min_length_sl", + "output_epsg", + "sand_color", + "pan_off", + "max_dist_ref", + "dist_clouds", + "percent_no_data", + "max_std", + "min_points", + "along_dist", + "max_range", + "min_chainage", + "multiple_inter", + "prc_multiple". + Returns: + None + """ + # Convert keys to a list if a set is passed + if isinstance(keys, set): + keys = list(keys) + new_settings = file_utilities.read_json_file(filepath, raise_error=False) + logger.info(f"all of new settings read from file : {filepath} \n {new_settings}") + # if no keys are passed then use all of the keys in the settings file + if not keys: + keys = new_settings.keys() + # filter the settings to keep only the keys passed + filtered_settings = {k: new_settings[k] for k in keys if k in new_settings} + # read the nested settings located in the sub dictionary "settings" and keep only the keys passed + nested_settings = new_settings.get("settings", {}) + nested_settings = {k: nested_settings[k] for k in keys if k in nested_settings} + logger.info( + f"all of new nested settings read from file : {filepath} \n {nested_settings }" + ) + # combine the settings into one dictionary WARNING this could overwrite items in both settings + filtered_settings.update(**nested_settings) + return filtered_settings + + def create_new_config(roi_ids: list, settings: dict, roi_settings: dict) -> dict: """ Creates a new configuration dictionary by combining the given settings and ROI settings. @@ -1330,7 +1411,7 @@ def extract_roi_data(json_data: dict, roi_id: str, fields_of_interest: list = [] return roi_data -def extract_fields(data, key=None, fields_of_interest=None): +def extract_fields(data: dict, key=None, fields_of_interest=None): """ Extracts specified fields from a given dictionary. @@ -1354,12 +1435,12 @@ def extract_fields(data, key=None, fields_of_interest=None): "landsat_collection", "filepath", } - + # extract the data from a sub dictionary with a specified key if it exists if key and key in data: for field in fields_of_interest: if field in data[key]: extracted_data[field] = data[key][field] - else: + else: # extract all the fields of interest from the data for field in fields_of_interest: if field in data: extracted_data[field] = data[field] From 88997ac94f5b2cb634d9472716e4178f048d961a Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:51:30 -0800 Subject: [PATCH 41/87] add tests for load_settings --- tests/test_coastseg_map.py | 25 ------- tests/test_common.py | 132 +++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 25 deletions(-) diff --git a/tests/test_coastseg_map.py b/tests/test_coastseg_map.py index eac168ac..e311c436 100644 --- a/tests/test_coastseg_map.py +++ b/tests/test_coastseg_map.py @@ -155,31 +155,6 @@ def test_load_json_config_without_rois(valid_coastseg_map_with_settings, tmp_dat actual_coastsegmap.load_json_config("", tmp_data_path) -def test_load_settings( - downloaded_config_json_filepath, valid_coastseg_map_with_settings -): - # create instance of Coastseg_Map - actual_coastsegmap = valid_coastseg_map_with_settings - actual_coastsegmap.load_settings(downloaded_config_json_filepath) - - actual_coastsegmap.settings["sat_list"] = ["L8"] - actual_coastsegmap.settings["landsat_collection"] = "C02" - actual_coastsegmap.settings["dates"] = ["2018-12-01", "2019-03-01"] - actual_coastsegmap.settings["cloud_thresh"] = 0.5 - actual_coastsegmap.settings["dist_clouds"] = 300 - actual_coastsegmap.settings["output_epsg"] = 3857 - actual_coastsegmap.settings["check_detection"] = False - actual_coastsegmap.settings["adjust_detection"] = False - actual_coastsegmap.settings["save_figure"] = True - actual_coastsegmap.settings["min_beach_area"] = 4500 - actual_coastsegmap.settings["min_length_sl"] = 100 - actual_coastsegmap.settings["cloud_mask_issue"] = False - actual_coastsegmap.settings["sand_color"] = "default" - actual_coastsegmap.settings["pan_off"] = "False" - actual_coastsegmap.settings["max_dist_ref"] = 25 - actual_coastsegmap.settings["along_dist"] = 25 - - # def test_load_json_config_downloaded( # valid_coastseg_map_with_settings, # valid_rois_filepath, diff --git a/tests/test_common.py b/tests/test_common.py index 6d71c35b..fa578bf8 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -16,6 +16,8 @@ import pandas as pd import pytest from unittest.mock import patch +import pytest +from coastseg import common def test_filter_partial_images(): @@ -877,3 +879,133 @@ def test_extract_roi_by_id(valid_rois_gdf): expected_roi = valid_rois_gdf[valid_rois_gdf["id"].astype(int) == roi_id] assert actual_roi["geometry"][0] == expected_roi["geometry"][0] assert actual_roi["id"][0] == expected_roi["id"][0] + + +def test_load_settings_empty_filepath(): + # Test loading all settings from an empty filepath + settings = common.load_settings() + assert isinstance(settings, dict) + assert len(settings) == 0 + + +def test_load_settings_with_invalid_filepath(): + # Test loading settings from an invalid filepath + filepath = "/path/to/invalid.json" + keys = { + "sat_list", + "dates", + "cloud_thresh", + "min_beach_area", + "output_epsg", + "max_dist_ref", + } + settings = common.load_settings(filepath, keys) + assert isinstance(settings, dict) + assert len(settings) == 0 + + +def test_load_settings_with_nested_settings(config_json): + # Test loading specific settings from a JSON file with nested settings + keys = { + "model_session_path", + "apply_cloud_mask", + "image_size_filter", + "pan_off", + "save_figure", + "adjust_detection", + "check_detection", + "landsat_collection", + "sat_list", + "dates", + "sand_color", + "cloud_thresh", + "cloud_mask_issue", + "min_beach_area", + "min_length_sl", + "output_epsg", + "sand_color", + "pan_off", + "max_dist_ref", + "dist_clouds", + "percent_no_data", + "max_std", + "min_points", + "along_dist", + "max_range", + "min_chainage", + "multiple_inter", + "prc_multiple", + } + settings = common.load_settings(config_json, keys) + assert isinstance(settings, dict) + assert settings["landsat_collection"] == "C02" + assert settings["dates"] == ["2018-12-01", "2019-03-01"] + assert settings["sat_list"] == ["L5", "L7", "L8", "L9", "S2"] + assert settings["cloud_thresh"] == 0.8 + assert settings["dist_clouds"] == 350 + assert settings["output_epsg"] == 32610 + assert settings["check_detection"] is False + assert settings["adjust_detection"] is False + assert settings["save_figure"] is True + assert settings["min_beach_area"] == 1050 + assert settings["min_length_sl"] == 600 + assert settings["cloud_mask_issue"] is True + assert settings["sand_color"] == "default" + assert settings["pan_off"] == "False" + assert settings["max_dist_ref"] == 200 + assert settings["along_dist"] == 28 + assert settings["min_points"] == 4 + assert settings["max_std"] == 16.0 + assert settings["max_range"] == 38.0 + assert settings["min_chainage"] == -105.0 + assert settings["multiple_inter"] == "auto" + assert settings["prc_multiple"] == 0.2 + assert settings["apply_cloud_mask"] is False + assert settings["image_size_filter"] is False + + +def test_load_settings_with_empty_keys(config_json): + # Test loading all settings from a JSON file + settings = common.load_settings(config_json, set()) + assert isinstance(settings, dict) + assert len(settings) > 0 + + +def test_load_settings_with_set_keys(config_json): + # Test loading specific settings from a JSON file using a set of keys + keys = { + "sat_list", + "dates", + "cloud_thresh", + "min_beach_area", + "output_epsg", + "max_dist_ref", + } + settings = common.load_settings(config_json, keys) + assert isinstance(settings, dict) + assert settings["dates"] == ["2018-12-01", "2019-03-01"] + assert settings["sat_list"] == ["L5", "L7", "L8", "L9", "S2"] + assert settings["min_beach_area"] == 1050 + assert settings["max_dist_ref"] == 200 + assert len(settings) == len(keys) + assert all(key in settings for key in keys) + + +def test_load_settings_with_list_keys(config_json): + # Test loading specific settings from a JSON file using a list of keys + keys = [ + "sat_list", + "dates", + "cloud_thresh", + "min_beach_area", + "output_epsg", + "max_dist_ref", + ] + settings = common.load_settings(config_json, keys) + assert settings["dates"] == ["2018-12-01", "2019-03-01"] + assert settings["sat_list"] == ["L5", "L7", "L8", "L9", "S2"] + assert settings["min_beach_area"] == 1050 + assert settings["max_dist_ref"] == 200 + assert isinstance(settings, dict) + assert len(settings) == len(keys) + assert all(key in settings for key in keys) From 48d380105972fa2114704516d8deb399a6065061 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:51:56 -0800 Subject: [PATCH 42/87] add fixture for config_json --- tests/conftest.py | 67 +++++++++++++++++++++++++++++++++++++++ tests/test_settings_UI.py | 2 +- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 782687ec..4d4c039e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ import os import json import pytest +import tempfile from PIL import Image from shutil import rmtree import geopandas as gpd @@ -16,6 +17,70 @@ script_dir = os.path.dirname(os.path.abspath(__file__)) +@pytest.fixture(scope="session") +def config_json(): + # The dictionary you want to write to the JSON file + config_data = { + "zih2": { + "dates": ["2018-12-01", "2019-03-01"], + "sat_list": ["L5", "L7", "L8", "L9", "S2"], + "roi_id": "zih2", + "polygon": [ + [ + [-121.84020033533233, 36.74441575726833], + [-121.83959312681607, 36.784722827004146], + [-121.78948275983468, 36.78422337939962], + [-121.79011617443447, 36.74391703739083], + [-121.84020033533233, 36.74441575726833], + ] + ], + "landsat_collection": "C02", + "sitename": "ID_zih2_datetime11-15-23__09_56_01", + "filepath": "C:\\development\\doodleverse\\coastseg\\CoastSeg\\data", + }, + "roi_ids": ["zih2"], + "settings": { + "landsat_collection": "C02", + "dates": ["2018-12-01", "2019-03-01"], + "sat_list": ["L5", "L7", "L8", "L9", "S2"], + "cloud_thresh": 0.8, + "dist_clouds": 350, + "output_epsg": 32610, + "check_detection": False, + "adjust_detection": False, + "save_figure": True, + "min_beach_area": 1050, + "min_length_sl": 600, + "cloud_mask_issue": True, + "sand_color": "default", + "pan_off": "False", + "max_dist_ref": 200, + "along_dist": 28, + "min_points": 4, + "max_std": 16.0, + "max_range": 38.0, + "min_chainage": -105.0, + "multiple_inter": "auto", + "prc_multiple": 0.2, + "apply_cloud_mask": False, + "image_size_filter": False, + }, + } + + # Create a temporary file + with tempfile.NamedTemporaryFile( + mode="w+", delete=False, suffix=".json" + ) as tmpfile: + json.dump(config_data, tmpfile) + tmpfile_path = tmpfile.name # Save the filepath + + # Yield the filepath to the test + yield tmpfile_path + + # Cleanup - delete the file after tests are done + os.remove(tmpfile_path) + + @pytest.fixture(scope="session") def geojson_directory(): """ @@ -56,6 +121,7 @@ def valid_geojson_path(geojson_directory): gdf.to_file(file_path, driver="GeoJSON") return file_path + @pytest.fixture(scope="session") def config_gdf_missing_rois_path(geojson_directory): """Create a valid geojson file and return its path.""" @@ -82,6 +148,7 @@ def config_gdf_missing_rois_path(geojson_directory): gdf.to_file(file_path, driver="GeoJSON") return file_path + @pytest.fixture(scope="session") def empty_geojson_path(geojson_directory): """Create an empty geojson file and return its path.""" diff --git a/tests/test_settings_UI.py b/tests/test_settings_UI.py index 25592236..6b1dc564 100644 --- a/tests/test_settings_UI.py +++ b/tests/test_settings_UI.py @@ -346,7 +346,7 @@ def test_get_settings_custom_widgets(settings_dashboard): "dates": ["2022-01-01", "2022-01-02"], "max_dist_ref": 30, "min_length_sl": 500, - "min_beach_area": 10, + "min_beach_area": 1000, "dist_clouds": 300, "apply_cloud_mask": True, "cloud_thresh": 0.8, From ce8ab9baa00400651bff302fb6a07cc489f974f1 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:53:08 -0800 Subject: [PATCH 43/87] save_transect_settings works if file not exist --- src/coastseg/tide_correction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coastseg/tide_correction.py b/src/coastseg/tide_correction.py index 39ba695a..6add1ad7 100644 --- a/src/coastseg/tide_correction.py +++ b/src/coastseg/tide_correction.py @@ -128,7 +128,7 @@ def save_transect_settings( If the specified settings file does not exist in the given session path. """ transects_settings = file_utilities.read_json_file( - os.path.join(session_path, filename), raise_error=True + os.path.join(session_path, filename), raise_error=False ) transects_settings["reference_elevation"] = reference_elevation transects_settings["beach_slope"] = beach_slope From d338ee5bd7a870f9dc860e0d50db2cf97bbdb74e Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 28 Nov 2023 21:54:08 -0800 Subject: [PATCH 44/87] add test file_utilities and test_tide_correction --- tests/test_file_utilities.py | 48 +++++++++++++++++++++++++++++++++++ tests/test_tide_correction.py | 38 +++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tests/test_file_utilities.py create mode 100644 tests/test_tide_correction.py diff --git a/tests/test_file_utilities.py b/tests/test_file_utilities.py new file mode 100644 index 00000000..6e8d89ea --- /dev/null +++ b/tests/test_file_utilities.py @@ -0,0 +1,48 @@ +import os +import json +import pytest +from coastseg.file_utilities import read_json_file + + +def test_read_existing_json_file(tmpdir): + # Create a temporary JSON file + json_data = {"key": "value"} + json_file = tmpdir.join("test.json") + json_file.write(json.dumps(json_data)) + + # Read the JSON file + result = read_json_file(str(json_file)) + + # Check that the result matches the expected data + assert result == json_data + + +def test_read_non_existing_json_file(tmpdir): + # Create a temporary directory + directory = tmpdir.mkdir("test_directory") + + # Attempt to read a non-existing JSON file + with pytest.raises(FileNotFoundError): + read_json_file(str(directory.join("non_existing.json")), raise_error=True) + + +def test_read_non_existing_json_file_no_error(tmpdir): + # Create a temporary directory + directory = tmpdir.mkdir("test_directory") + + # Attempt to read a non-existing JSON file without raising an error + result = read_json_file(str(directory.join("non_existing.json")), raise_error=False) + + # Check that an empty dictionary is returned + assert result == {} + + +def test_read_invalid_json_file(tmpdir): + # Create a temporary JSON file with invalid JSON data + invalid_json = "not a valid JSON" + json_file = tmpdir.join("test.json") + json_file.write(invalid_json) + + # Attempt to read the invalid JSON file + with pytest.raises(json.JSONDecodeError): + read_json_file(str(json_file), raise_error=True) diff --git a/tests/test_tide_correction.py b/tests/test_tide_correction.py new file mode 100644 index 00000000..a1d75685 --- /dev/null +++ b/tests/test_tide_correction.py @@ -0,0 +1,38 @@ +import os +import json +import tempfile +from coastseg.tide_correction import save_transect_settings + + +def test_save_transect_settings(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdir: + # Create a settings file in the temporary directory + settings_file = os.path.join(tmpdir, "transects_settings.json") + with open(settings_file, "w") as f: + json.dump({"reference_elevation": 0, "beach_slope": 0}, f) + + # Call the function to update the settings + save_transect_settings(tmpdir, 1.23, 4.56) + + # Check that the settings were updated correctly + with open(settings_file, "r") as f: + settings = json.load(f) + assert settings["reference_elevation"] == 1.23 + assert settings["beach_slope"] == 4.56 + + +def test_save_transect_settings_no_file(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdir: + # The settings file does not exist initially + settings_file = os.path.join(tmpdir, "transects_settings.json") + + # Call the function to create and update the settings + save_transect_settings(tmpdir, 1.23, 4.56) + + # Check that the settings file was created with the correct values + with open(settings_file, "r") as f: + settings = json.load(f) + assert settings["reference_elevation"] == 1.23 + assert settings["beach_slope"] == 4.56 From ca90a5ab9b63231082d849b74fdec3c3faa82f50 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 29 Nov 2023 18:28:12 -0800 Subject: [PATCH 45/87] remove prints in map_ui --- src/coastseg/map_UI.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/coastseg/map_UI.py b/src/coastseg/map_UI.py index 726ffa30..396a58d4 100644 --- a/src/coastseg/map_UI.py +++ b/src/coastseg/map_UI.py @@ -972,14 +972,8 @@ def load_callback(filechooser: FileChooser) -> None: try: if filechooser.selected: self.coastseg_map.map.default_style = {"cursor": "wait"} - print(f"filechooser.selected: {filechooser.selected}") - print(f"Loading session: {os.path.abspath(filechooser.selected)}") - print(f"Settings before \n {self.coastseg_map.get_settings()}") # load the session into coastseg_map and this should update the settings in coastseg_map self.coastseg_map.load_fresh_session(filechooser.selected) - print( - f"Session Loaded and settings\n {self.coastseg_map.get_settings()}" - ) # update the session name text box with the session name self.session_name_text.value = self.coastseg_map.get_session_name() # update the settings dashboard with the settings from the loaded session From 80921edf525ec80ff6d547d08df07e67ca919ffd Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 29 Nov 2023 18:29:15 -0800 Subject: [PATCH 46/87] add move_report_files --- src/coastseg/coastseg_map.py | 9 +++++-- src/coastseg/common.py | 47 ++++++++++++++++++++++++++++++---- src/coastseg/file_utilities.py | 5 ++-- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 7a1fdc17..407af7dd 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -843,7 +843,6 @@ def save_config(self, filepath: str = None) -> None: exception_handler.check_empty_roi_layer(selected_layer) logger.info(f"self.rois.roi_settings: {self.rois.roi_settings}") - # @todo can this be removed since we don't have a save_config button anymore? # if the rois do not have any settings then save the currently loaded settings to the ROIs if not self.rois.roi_settings: filepath = filepath or os.path.abspath(os.getcwd()) @@ -1406,6 +1405,7 @@ def extract_all_shorelines(self) -> None: else: self.id_container.ids = ids_with_extracted_shorelines + # save a session for each ROI under one session name self.save_session(roi_ids, save_transects=False) # Get ROI ids that are selected on map and have had their shorelines extracted @@ -1591,8 +1591,13 @@ def save_session(self, roi_ids: list[str], save_transects: bool = True): logger.info(f"No extracted shorelines for ROI: {roi_id}") continue # move extracted shoreline figures to session directory - common.save_extracted_shoreline_figures(extracted_shoreline, session_path) + shoreline_settings = extracted_shoreline.shoreline_settings + common.save_extracted_shoreline_figures(shoreline_settings, session_path) + # move extracted shoreline reports to session directory + common.move_report_files(shoreline_settings, session_path,'extract_shorelines*.txt') + # save the geojson and json files for extracted shorelines common.save_extracted_shorelines(extracted_shoreline, session_path) + # save transects to session folder if save_transects: diff --git a/src/coastseg/common.py b/src/coastseg/common.py index d31210be..a3f7bb74 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -1682,10 +1682,40 @@ def create_csv_per_transect( f"ROI: {roi_id}Time-series of the shoreline change along the transects saved as:{fn}" ) +def move_report_files(settings: dict, dest: str, filename_pattern='extract_shorelines*.txt'): + """ + Move report files matching a specific pattern from the source directory to the destination. -def save_extracted_shoreline_figures( - extracted_shorelines: "Extracted_Shoreline", save_path: str -): + :param settings: Dictionary containing 'filepath' and 'sitename'. + :param dest: The destination path where the report files will be moved. + :param filename_pattern: Pattern of the filenames to search for, defaults to 'extract_shorelines*.txt'. + """ + # Attempt to get the data_path and sitename + filepath = settings.get("filepath") or settings.get("inputs", {}).get("filepath") + sitename = settings.get("sitename") or settings.get("inputs", {}).get("sitename") + + # Check if data_path and sitename were successfully retrieved + if not filepath or not sitename: + logger.error("Data path or sitename not found in settings.") + return + + # Construct the pattern to match files + pattern = os.path.join(filepath, sitename, filename_pattern) + matching_files = glob.glob(pattern) + + # Check if there are files to move + if not matching_files: + logger.warning(f"No files found matching the pattern: {pattern}") + return + + # Move the files + try: + file_utilities.move_files(matching_files, dest, delete_src=True) + logger.info(f"Files moved successfully to {dest}") + except Exception as e: + logger.error(f"Error moving files: {e}") + +def save_extracted_shoreline_figures(settings: dict, save_path: str): """ Save extracted shoreline figures to a specified save path. @@ -1696,8 +1726,15 @@ def save_extracted_shoreline_figures( :param extracted_shorelines:An Extracted_Shoreline object containing the extracted shorelines and shoreline settings. :param save_path: The path where the output figures will be saved. """ - data_path = extracted_shorelines.shoreline_settings["inputs"]["filepath"] - sitename = extracted_shorelines.shoreline_settings["inputs"]["sitename"] + # Attempt to get the data_path and sitename + data_path = settings.get("filepath") or settings.get("inputs", {}).get("filepath") + sitename = settings.get("sitename") or settings.get("inputs", {}).get("sitename") + + # Check if data_path and sitename were successfully retrieved + if not data_path or not sitename: + logger.error(f"Data path or sitename not found in settings.{settings}") + return + extracted_shoreline_figure_path = os.path.join( data_path, sitename, "jpg_files", "detection" ) diff --git a/src/coastseg/file_utilities.py b/src/coastseg/file_utilities.py index 3e3627d4..b06e31fd 100644 --- a/src/coastseg/file_utilities.py +++ b/src/coastseg/file_utilities.py @@ -229,6 +229,9 @@ def move_files( logger.info( f"Moving all files from directory {src} to {dst_dir}. Delete Source: {delete_src}" ) + elif os.path.isfile(src): + src_files = [src] + logger.info(f"Moving file {src} to {dst_dir}. Delete Source: {delete_src}") else: logger.error( f"Provided src is a string but not a valid directory path: {src}" @@ -547,8 +550,6 @@ def write_to_json(filepath: str, settings: dict): """ "Write the settings dictionary to json file""" os.makedirs(os.path.dirname(filepath), exist_ok=True) to_file(settings, filepath) - # with open(filepath, "w", encoding="utf-8") as output_file: - # json.dump(settings, output_file) def to_file(data: dict, filepath: str) -> None: From 80b9193378cbde3572f5b510ca80f625be31f77c Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 30 Nov 2023 08:32:46 -0800 Subject: [PATCH 47/87] add tests for move_files save_extracted shorelines --- tests/conftest.py | 56 ++++++++++++++++++++++++++++++++++++ tests/test_common.py | 34 ++++++++++++++++++++++ tests/test_file_utilities.py | 44 ++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 61f03944..06b423ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,62 @@ script_dir = os.path.dirname(os.path.abspath(__file__)) +@pytest.fixture +def temp_jpg_dir_structure(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # Create subdirectories + # creates a directory structure like this: tmpdir/sitename/jpg_files/detection + sitename_dir = os.path.join(tmpdirname, "sitename","jpg_files","detection") + os.makedirs(sitename_dir) + + # Add JPG files to the subdirectories + for i in range(5): # Creating 5 JPG files for example + image = Image.new("RGB", (100, 100), color="blue") # Simple blue image + image_path = os.path.join(sitename_dir, f"test_image_{i}.jpg") + image.save(image_path) + + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_src_dir(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # Add some files to the directory + for i in range(5): # Creating 5 files for example + with open(os.path.join(tmpdirname, f"test_file_{i}.txt"), "w") as f: + f.write("This is a test file") + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_dst_dir(): + # Create another temporary directory for destination + with tempfile.TemporaryDirectory() as tmpdirname: + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_src_files(): + # Create a list of temporary files + files = [] + for i in range(5): + fd, path = tempfile.mkstemp(suffix=".txt", prefix="test_file_", text=True) + os.write(fd, b"This is a test file") + os.close(fd) + files.append(path) + + yield files + + # Cleanup + for f in files: + os.remove(f) + + @pytest.fixture(scope="session") def config_json(): # The dictionary you want to write to the JSON file diff --git a/tests/test_common.py b/tests/test_common.py index cb065055..38ff6367 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1069,3 +1069,37 @@ def test_load_settings_with_list_keys(config_json): assert isinstance(settings, dict) assert len(settings) == len(keys) assert all(key in settings for key in keys) + + +import pytest +import os +import shutil +from coastseg import common +from coastseg import file_utilities + + +def test_save_extracted_shoreline_figures(temp_jpg_dir_structure, temp_dst_dir): + # Create a settings dictionary + settings = {"filepath": str(temp_jpg_dir_structure), "sitename": "sitename"} + + # Create a directory for extracted shoreline figures + extracted_shoreline_figure_path = os.path.join( + settings["filepath"], settings["sitename"], "jpg_files", "detection" + ) + assert os.path.exists(extracted_shoreline_figure_path) == True + + # Call the function under test + common.save_extracted_shoreline_figures(settings, str(temp_dst_dir)) + assert os.path.exists(os.path.join(temp_dst_dir, "jpg_files", "detection")) == True + assert len(os.listdir(os.path.join(temp_dst_dir, "jpg_files", "detection"))) == 5 + # Check if the extracted shoreline figures directory is empty + assert os.path.exists(extracted_shoreline_figure_path) == False + + # Check if the files are moved to the save path + for i in range(5): + print( + os.path.join(temp_dst_dir, "jpg_files", "detection", f"test_image_{i}.jpg") + ) + assert os.path.exists( + os.path.join(temp_dst_dir, "jpg_files", "detection", f"test_image_{i}.jpg") + ) diff --git a/tests/test_file_utilities.py b/tests/test_file_utilities.py index 6e8d89ea..4d77dae1 100644 --- a/tests/test_file_utilities.py +++ b/tests/test_file_utilities.py @@ -2,6 +2,50 @@ import json import pytest from coastseg.file_utilities import read_json_file +from coastseg import file_utilities + + +def test_move_from_dir_to_dir(temp_src_dir, temp_dst_dir): + """Test moving all files from one directory to another""" + file_utilities.move_files(temp_src_dir, temp_dst_dir) + assert len(os.listdir(temp_dst_dir)) == 5 + assert len(os.listdir(temp_src_dir)) == 0 # Source dir should be empty + assert os.path.exists(temp_src_dir) + + +def test_move_from_file_list_to_dir(temp_src_files, temp_dst_dir): + """Test moving a list of files to a directory""" + file_utilities.move_files(temp_src_files, temp_dst_dir) + assert len(os.listdir(temp_dst_dir)) == 5 + # Check if source files are deleted + for file_path in temp_src_files: + assert not os.path.exists(file_path) + + +def test_move_from_dir_path_to_dir(temp_src_dir, temp_dst_dir): + """Test moving all files from a source directory path to a destination directory""" + file_utilities.move_files(str(temp_src_dir), temp_dst_dir) + assert ( + len(os.listdir(temp_dst_dir)) == 5 + ), "All files should be moved to the destination directory" + assert ( + len(os.listdir(temp_src_dir)) == 0 + ), "Source directory should be empty after moving files" + + +def test_delete_source_directory(temp_src_dir, temp_dst_dir): + """Test deleting the source directory after moving files""" + file_utilities.move_files(temp_src_dir, temp_dst_dir, delete_src=True) + assert len(os.listdir(temp_dst_dir)) == 5 + assert not os.path.exists(temp_src_dir) # Source dir should be deleted + + +def test_delete_source_files(temp_src_files, temp_dst_dir): + """Test deleting source files after moving them""" + file_utilities.move_files(temp_src_files, temp_dst_dir, delete_src=True) + assert len(os.listdir(temp_dst_dir)) == 5 + for file_path in temp_src_files: + assert not os.path.exists(file_path) # Source files should be deleted def test_read_existing_json_file(tmpdir): From 0ce80b5faacf751f3fb7269e27594c6077ccef81 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 30 Nov 2023 09:05:14 -0800 Subject: [PATCH 48/87] update coastsat-package>=0.1.37 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 34ecae3e..ae74db1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ # find` directive with `include` or `exclude` description = "An interactive jupyter notebook for downloading satellite imagery" dependencies = [ - "coastsat-package>=0.1.34", + "coastsat-package>=0.1.37", "area", "doodleverse-utils>=0.0.35", "ipyfilechooser>=0.6.0", From 76f19384a1cd7350b9f8b574856a9bc9c716cd73 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 1 Dec 2023 17:12:19 -0800 Subject: [PATCH 49/87] #179 add more function to merge_utils --- src/coastseg/merge_utils.py | 146 +++++++++++++++++++++++++++++++++--- 1 file changed, 135 insertions(+), 11 deletions(-) diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py index ff066f0f..6b189610 100644 --- a/src/coastseg/merge_utils.py +++ b/src/coastseg/merge_utils.py @@ -128,6 +128,19 @@ def read_first_geojson_file( directory: str, filenames=["extracted_shorelines_lines.geojson", "extracted_shorelines.geojson"], ): + """ + Reads the first available GeoJSON file from the given directory. + + Args: + directory (str): The directory path where the files are located. + filenames (list, optional): List of filenames to search for. Defaults to ["extracted_shorelines_lines.geojson", "extracted_shorelines.geojson"]. + + Returns: + geopandas.GeoDataFrame: The GeoDataFrame read from the first available file. + + Raises: + FileNotFoundError: If none of the specified files exist in the directory. + """ # Loop over the filenames for filename in filenames: filepath = os.path.join(directory, filename) @@ -161,7 +174,6 @@ def clip_gdfs(gdfs, overlap_gdf): clipped_gdf = gpd.clip(gdf, overlap_gdf) if not clipped_gdf.empty: clipped_gdfs.append(clipped_gdf) - clipped_gdf.plot() return clipped_gdfs @@ -181,7 +193,7 @@ def calculate_overlap(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ # Check if the input GeoDataFrame is empty if not hasattr(gdf, "empty"): - return gpd.GeoDataFrame() + return gpd.GeoDataFrame(geometry=[]) if gdf.empty: # Return an empty GeoDataFrame with the same CRS if it exists return gpd.GeoDataFrame( @@ -209,6 +221,31 @@ def calculate_overlap(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return overlap_gdf +def get_overlapping_features( + roi_gdf: gpd.GeoDataFrame, gdfs: list[gpd.GeoDataFrame] +) -> list[gpd.GeoDataFrame]: + """ + Get the overlapping features between the region of interest (ROI) and the provided GeoDataFrames (gdfs). + + Parameters: + - roi_gdf (GeoDataFrame): The region of interest GeoDataFrame. + - gdfs (GeoDataFrame or list): The GeoDataFrame(s) to check for overlap with the ROI. + + Returns: + - list: The overlapping features as a list of GeoDataFrames. Where each element in the list is a GeoDataFrame corresponds + to the overlapping features between the ROI and one of the provided GeoDataFrames (gdfs). + + """ + # calculate the overlapping regions between the ROIs + overlap_gdf = calculate_overlap(roi_gdf) + if overlap_gdf.empty: + return [] + if isinstance(gdfs, gpd.GeoDataFrame): + gdfs = [gdfs] + # clip the gdfs to the overlapping regions ex. clip the extracted shorelines to the overlapping regions + return clip_gdfs(gdfs, overlap_gdf) + + def average_multipoints(multipoints) -> MultiPoint: """ Calculate the average MultiPoint geometry from a list of MultiPoint geometries. @@ -294,9 +331,19 @@ def merge_geometries(merged_gdf, columns=None, operation=unary_union): return merged_gdf -def read_geojson_files(filepaths): +def read_geojson_files(filepaths, column="type", value=None, keep_columns=None): """Read GeoJSON files into GeoDataFrames and return a list.""" - return [gpd.read_file(path) for path in filepaths] + gdfs = [] + for path in filepaths: + gdf = gpd.read_file(path) + print(f"Read {len(gdf)} features from {path}") + # print(gdf[gdf[column] == value]) + if column in gdf.columns and value is not None: + gdf = gdf[gdf[column] == value] + if keep_columns is not None: + gdf = gdf[keep_columns] + gdfs.append(gdf) + return gdfs def concatenate_gdfs(gdfs): @@ -308,11 +355,10 @@ def filter_and_join_gdfs(gdf, feature_type, predicate="intersects"): """Filter GeoDataFrame by feature type, ensure spatial index, and perform a spatial join.""" if "type" not in gdf.columns: raise ValueError("The GeoDataFrame must contain a column named 'type'") + # Filter GeoDataFrame by feature type filtered_gdf = gdf[gdf["type"] == feature_type].copy()[["geometry"]] - filtered_gdf["geometry"] = filtered_gdf["geometry"].simplify( - tolerance=0.001 - ) # Simplify geometry if possible to improve performance filtered_gdf.sindex # Ensure spatial index + # perform a spatial join return gpd.sjoin(gdf, filtered_gdf[["geometry"]], how="inner", predicate=predicate) @@ -346,8 +392,63 @@ def combine_non_nulls(series): ) -def merge_geojson_files(session_locations, merged_session_location): - """Main function to merge GeoJSON files from different session locations.""" +def process_geojson_files( + session_locations, + filenames, + transform_funcs=None, + read_func=None, + crs="epsg:4326", +): + """ + Reads and optionally transforms GeoDataFrames from given session locations. + + Args: + session_locations (list): List of paths to session directories. + filenames (list): List of filenames to read in each session directory. + transform_funcs (list, optional): List of functions to apply to each file. + read_func (callable, optional): Function to use for reading files. + crs (str, optional): Coordinate reference system to convert GeoDataFrames to. Defaults to 'epsg:4326'. + + Returns: + list: List of processed GeoDataFrames. + """ + if transform_funcs is None: + transform_funcs = [] + if transform_funcs and not isinstance(transform_funcs, list): + transform_funcs = [transform_funcs] + if read_func is None: + raise ValueError("read_func must be specified") + + gdfs = [] + for session_dir in session_locations: + try: + gdf = read_func(session_dir, filenames) + for func in transform_funcs: + gdf = func(gdf) + if isinstance(gdf, gpd.GeoDataFrame): + if "geometry" in gdf.columns and not gdf.crs: + gdf.set_crs(crs, inplace=True) + gdf = gdf.to_crs(crs) + gdfs.append(gdf) + except Exception as e: + print(f"Error processing {session_dir}: {e}") + continue + + return gdfs + + +def merge_geojson_files(session_locations, dest): + """ + Merge GeoJSON files from different session locations. + + Args: + session_locations (list): List of session locations containing GeoJSON files. + dest (str): Path to the location where the merged GeoJSON file will be saved. + + Returns: + merged_config (GeoDataFrame): Merged GeoDataFrame containing the merged GeoJSON data. + + """ filepaths = [ os.path.join(location, "config_gdf.geojson") for location in session_locations ] @@ -361,8 +462,11 @@ def merge_geojson_files(session_locations, merged_session_location): # applying the group by function in aggregate_gdf() turns the geodataframe into a dataframe merged_config = gpd.GeoDataFrame(merged_config, geometry="geometry") - output_path = os.path.join(merged_session_location, "merged_config.geojson") - merged_config.to_file(output_path, driver="GeoJSON") + if os.path.isdir(dest): + output_path = os.path.join(dest, "merged_config.geojson") + merged_config.to_file(output_path, driver="GeoJSON") + else: + raise ValueError(f"Output directory {dest} does not exist.") return merged_config @@ -374,6 +478,16 @@ def create_csv_per_transect( roi_id: str = None, # ROI ID is now optional and defaults to None filename_suffix: str = "_timeseries_raw.csv", ): + """ + Create a CSV file for each transect containing time-series data. + + Args: + save_path (str): The directory path where the CSV files will be saved. + cross_distance_transects (dict): A dictionary containing cross-distance transects. + extracted_shorelines_dict (dict): A dictionary containing extracted shorelines data. + roi_id (str, optional): The ROI ID. Defaults to None. + filename_suffix (str, optional): The suffix to be added to the CSV filenames. Defaults to "_timeseries_raw.csv". + """ for key, distances in cross_distance_transects.items(): # Initialize the dictionary for DataFrame with mandatory keys data_dict = { @@ -402,6 +516,16 @@ def create_csv_per_transect( def merge_and_average(df1: gpd.GeoDataFrame, df2: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """ + Merge two GeoDataFrames based on the 'satname' and 'date' columns, and average the common numeric columns. + + Args: + df1 (gpd.GeoDataFrame): The first GeoDataFrame. + df2 (gpd.GeoDataFrame): The second GeoDataFrame. + + Returns: + gpd.GeoDataFrame: The merged GeoDataFrame with averaged numeric columns. + """ # Perform a full outer join merged = pd.merge( df1, df2, on=["satname", "date"], how="outer", suffixes=("_df1", "_df2") From 0f016193af71f3bd536518d358a345aa25ac83d2 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 1 Dec 2023 17:13:00 -0800 Subject: [PATCH 50/87] #179 add test for merge_geojson_files --- tests/test_merge_utils.py | 89 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py index 7c2b8f8d..c096f24f 100644 --- a/tests/test_merge_utils.py +++ b/tests/test_merge_utils.py @@ -1,5 +1,6 @@ # Standard library imports from collections import defaultdict +import os # Related third party imports import geopandas as gpd @@ -9,6 +10,7 @@ from shapely.geometry import LineString, MultiLineString, MultiPoint, Point, Polygon from coastseg import merge_utils from functools import reduce +import tempfile # Local application/library specific imports from coastseg.merge_utils import ( @@ -18,6 +20,52 @@ ) +@pytest.fixture(scope="session") +def overlapping_roi_gdf_fixture(): + data = { + "id": ["gac1", "gac6"], + "type": ["roi", "roi"], + "dummy": ["dummy1", "dummy2"], + "geometry": [ + Polygon( + [ + (-121.89294822609095, 36.87805982149002), + (-121.892296987737, 36.923124285162714), + (-121.83616982432659, 36.922587577051516), + (-121.83685404313914, 36.87752398639186), + (-121.89294822609095, 36.87805982149002), + ] + ), + Polygon( + [ + (-121.89236580918869, 36.91836671978996), + (-121.89178223277345, 36.95867333604402), + (-121.8415571844796, 36.95819392646526), + (-121.8421671948108, 36.917888007480336), + (-121.89236580918869, 36.91836671978996), + ] + ), + ], + } + + overlapping_roi_gdf = gpd.GeoDataFrame(data, geometry="geometry") + return overlapping_roi_gdf + + +@pytest.fixture(scope="session") +def temp_geojson_file(overlapping_roi_gdf_fixture): + # Create a temporary file and immediately close it to avoid lock issues on Windows + tmpfile = tempfile.NamedTemporaryFile(suffix=".geojson", mode="w+", delete=False) + tmpfile.close() + + # Now open the file again to write the data + overlapping_roi_gdf_fixture.to_file(tmpfile.name, driver="GeoJSON") + + yield tmpfile.name # This will provide the file path to the test function + # Teardown code: delete the temporary file after the test session + os.remove(tmpfile.name) + + @pytest.fixture def gdf_empty(): data = { @@ -764,7 +812,7 @@ def test_aggregate_gdf_merged_config_all_unique(merged_config_nulls_all_unique): def test_filter_and_join_gdfs(): # Create a sample GeoDataFrame data = { - "type": ["roi", "poi", "roi", "poi"], + "type": ["roi", "shoreline", "roi", "shoreline"], "geometry": [Point(0, 0), Point(1, 1), Point(2, 2), Point(3, 3)], } gdf = gpd.GeoDataFrame(data, crs="EPSG:4326") @@ -788,3 +836,42 @@ def test_filter_and_join_gdfs(): assert ( len(result) == 2 ), "The result should only contain intersecting 'roi' geometries" + + +def test_read_geojson_files( + temp_geojson_file, +): + # Test reading a single GeoJSON file + filepaths = [temp_geojson_file] + result = merge_utils.read_geojson_files(filepaths) + assert len(result) == 1 + assert isinstance(result[0], gpd.GeoDataFrame) + assert result[0].shape[0] > 0 + + # # Test reading multiple GeoJSON files + # filepaths = ['/path/to/file1.geojson', '/path/to/file2.geojson'] + # result = merge_utils.read_geojson_files(filepaths) + # assert len(result) == 2 + # assert isinstance(result[0], gpd.GeoDataFrame) + # assert isinstance(result[1], gpd.GeoDataFrame) + # assert result[0].shape[0] > 0 + # assert result[1].shape[0] > 0 + + # Test filtering by column value + filepaths = [temp_geojson_file] + column = "type" + value = "roi" + result = merge_utils.read_geojson_files(filepaths, column, value) + assert len(result) == 1 + assert isinstance(result[0], gpd.GeoDataFrame) + assert result[0].shape[0] > 0 + assert all(result[0][column] == value) + + # Test keeping specific columns + filepaths = [temp_geojson_file] + keep_columns = ["geometry", "id", "type"] + result = merge_utils.read_geojson_files(filepaths, keep_columns=keep_columns) + assert len(result) == 1 + assert isinstance(result[0], gpd.GeoDataFrame) + assert result[0].shape[0] > 0 + assert set(result[0].columns) == set(keep_columns) From 8c79f04afec3a23fd15364092c5af6d57e42ab65 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 1 Dec 2023 17:15:03 -0800 Subject: [PATCH 51/87] #179 add merge_sessions.py --- scripts/merge_sessions.py | 153 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 scripts/merge_sessions.py diff --git a/scripts/merge_sessions.py b/scripts/merge_sessions.py new file mode 100644 index 00000000..599acedd --- /dev/null +++ b/scripts/merge_sessions.py @@ -0,0 +1,153 @@ +import os +from coastseg import merge_utils, file_utilities +from coastseg.common import ( + convert_linestrings_to_multipoints, + stringify_datetime_columns, + get_cross_distance_df, +) +from functools import reduce +import geopandas as gpd +from coastsat import SDS_transects +import numpy as np + +settings_transects = { + "along_dist": 25, # along-shore distance to use for computing the intersection + "min_points": 3, # minimum number of shoreline points to calculate an intersection + "max_std": 15, # max std for points around transect + "max_range": 30, # max range for points around transect + "min_chainage": -100, # largest negative value along transect (landwards of transect origin) + "multiple_inter": "auto", # mode for removing outliers ('auto', 'nan', 'max') + "prc_multiple": 0.1, # percentage of the time that multiple intersects are present to use the max +} + +# Enter ROI session locations here +# session_locations = [ +# r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac6_datetime10-30-23__01_44_50", +# r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac1_datetime10-30-23__01_44_50", +# ] + +session_locations = [ + r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw1_datetime09-19-23__11_37_20", + r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw3_datetime09-19-23__11_37_20", + r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw2_datetime09-19-23__11_37_20", + r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw4_datetime09-19-23__11_37_20", +] + +# enter directory to save the merged session +save_location = r"C:\development\doodleverse\coastseg\CoastSeg\test_results" +# enter the name of the merged session +merged_session_name = "large_dataset" + +# Script execution begins here +# ---------------------------- + +merged_session_location = os.path.join(save_location, merged_session_name) +# make the location to store the merged session +os.makedirs(merged_session_location, exist_ok=True) + +# Merge the config_gdf.geojson files from each session into a single geodataframe +# - if the shorelines or transects are at the exact same location, they will be merged into one +# - if transects have different ids for the same location, they will be merged into one and both ids will be saved +merged_config = merge_utils.merge_geojson_files( + session_locations, merged_session_location +) + +# read the extracted shorelines from the session locations +gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, +) + +# get all the ROIs from all the sessions +roi_rows = merged_config[merged_config["type"] == "roi"] + +# Determine if any of the extracted shorelines are in the overlapping regions between the ROIs +overlap_list = merge_utils.get_overlapping_features(roi_rows, gdfs) + +if len(overlap_list) > 0: + print("No overlapping ROIs found. Sessions can be merged.") +else: + print( + "Overlapping ROIs found. Overlapping regions may have double shorelines if the shorelines were detected on the same dates." + ) + +# merge the extracted shorelin geodataframes on date and satname, then average the cloud_cover and geoaccuracy for the merged rows + +# Perform a full outer join and average the numeric columns across all GeoDataFrames +merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) +# sort by date and reset the index +merged_shorelines.sort_values(by="date", inplace=True) +merged_shorelines.reset_index(drop=True, inplace=True) + +# Save the merged extracted shorelines to `extracted_shorelines_dict.json` +# -------------------------------------------------------------------------- +# mapping of dictionary keys to dataframe columns +keymap = { + "shorelines": "geometry", + "dates": "date", + "satname": "satname", + "cloud_cover": "cloud_cover", + "geoaccuracy": "geoaccuracy", +} + +# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines +shoreline_dict = merge_utils.dataframe_to_dict(merged_shorelines, keymap) +# save the extracted shoreline dictionary to json file +file_utilities.to_file( + shoreline_dict, + os.path.join(merged_session_location, "extracted_shorelines_dict.json"), +) + +print("Extracted shorelines merged and saved to extracted_shorelines_dict.json") +print(f"Saved {len(shoreline_dict['shorelines'])} extracted shorelines") + +# Save extracted shorelines to GeoJSON file +# ----------------------------------------- + +# 1. convert datetime columns to strings +merged_shorelines = stringify_datetime_columns(merged_shorelines) + +# 2. Save the shorelines that are formatted as mulitpoints a to GeoJSON file +# Save extracted shorelines as mulitpoints GeoJSON file +merged_shorelines.to_file( + os.path.join(merged_session_location, "extracted_shorelines_points.geojson"), + driver="GeoJSON", +) +print("Extracted shorelines saved to extracted_shorelines_points.geojson") +# 3. Convert the multipoints to linestrings and save to GeoJSON file +es_lines_gdf = merge_utils.convert_multipoints_to_linestrings(merged_shorelines) +# save extracted shorelines as interpolated linestrings +es_lines_gdf.to_file( + os.path.join(merged_session_location, "extracted_shorelines_lines.geojson"), + driver="GeoJSON", +) +print("Extracted shorelines saved to extracted_shorelines_lines.geojson") + +# Compute the timeseries of where transects and new merged shorelines intersect +# --------------------------------------------------------------------- + +# 1. load transects for from all the sessions +transect_rows = merged_config[merged_config["type"] == "transect"] +transects_dict = { + row["id"]: np.array(row["geometry"].coords) for i, row in transect_rows.iterrows() +} +# 2. compute the intersection between the transects and the extracted shorelines +cross_distance = SDS_transects.compute_intersection_QC( + shoreline_dict, transects_dict, settings_transects +) + +# use coastseg.common to get the cross_distance_df +transects_df = get_cross_distance_df(shoreline_dict, cross_distance) +# 3. save the timeseries of where all the transects and shorelines intersected to a csv file +filepath = os.path.join(merged_session_location, "transect_time_series.csv") +transects_df.to_csv(filepath, sep=",") + +# 4. Save a CSV file for each transect +# - Save the timeseries of intersections between the shoreline and a single tranesct to csv file +merge_utils.create_csv_per_transect( + merged_session_location, + cross_distance, + shoreline_dict, +) From e988372534383fdd2cc8550143ef8a16af5b140f Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 4 Dec 2023 10:27:28 -0800 Subject: [PATCH 52/87] #179 update merge sessions.py with instructions --- scripts/merge_sessions.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/merge_sessions.py b/scripts/merge_sessions.py index 599acedd..79fa51dc 100644 --- a/scripts/merge_sessions.py +++ b/scripts/merge_sessions.py @@ -10,6 +10,8 @@ from coastsat import SDS_transects import numpy as np +# Step 1: OPTIONAL +# Modify the settings_transects dictionary to change the parameters for computing the intersection between the transects and the extracted shorelines settings_transects = { "along_dist": 25, # along-shore distance to use for computing the intersection "min_points": 3, # minimum number of shoreline points to calculate an intersection @@ -20,23 +22,24 @@ "prc_multiple": 0.1, # percentage of the time that multiple intersects are present to use the max } +# Step 2: REQUIRED # Enter ROI session locations here -# session_locations = [ -# r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac6_datetime10-30-23__01_44_50", -# r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac1_datetime10-30-23__01_44_50", -# ] +# - Replace the session_locations list with the locations of the ROI sessions you want to merge +# - Each of these sessions should have extracted shorelines and transects +# - The order of the sessions in the list does not matter session_locations = [ - r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw1_datetime09-19-23__11_37_20", - r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw3_datetime09-19-23__11_37_20", - r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw2_datetime09-19-23__11_37_20", - r"C:\Users\sf230\Downloads\AK_shoreline1-20231127T221704Z-001\AK_shoreline1\ID_egw4_datetime09-19-23__11_37_20", + r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac6_datetime10-30-23__01_44_50", + r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac1_datetime10-30-23__01_44_50", ] -# enter directory to save the merged session -save_location = r"C:\development\doodleverse\coastseg\CoastSeg\test_results" -# enter the name of the merged session -merged_session_name = "large_dataset" +# Step 3: REQUIRED +# Enter directory to save the merged session folder in +save_location = r"C:\development\doodleverse\coastseg\CoastSeg\merged_sessions" + +# Step 4: REQUIRED +# Enter the name of the merged session +merged_session_name = "merged_session" # Script execution begins here # ---------------------------- From 818001010d0622a5b3c935e86db734f40d8cc2d9 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 4 Dec 2023 11:25:48 -0800 Subject: [PATCH 53/87] #179 update merge_sessions.py to use arg parse --- scripts/merge_sessions.py | 355 +++++++++++++++++++++++--------------- 1 file changed, 213 insertions(+), 142 deletions(-) diff --git a/scripts/merge_sessions.py b/scripts/merge_sessions.py index 79fa51dc..3cf214f5 100644 --- a/scripts/merge_sessions.py +++ b/scripts/merge_sessions.py @@ -1,156 +1,227 @@ import os +import argparse +from functools import reduce + +import numpy as np +import geopandas as gpd +from coastsat import SDS_transects + from coastseg import merge_utils, file_utilities from coastseg.common import ( - convert_linestrings_to_multipoints, stringify_datetime_columns, get_cross_distance_df, ) -from functools import reduce -import geopandas as gpd -from coastsat import SDS_transects -import numpy as np -# Step 1: OPTIONAL -# Modify the settings_transects dictionary to change the parameters for computing the intersection between the transects and the extracted shorelines -settings_transects = { - "along_dist": 25, # along-shore distance to use for computing the intersection - "min_points": 3, # minimum number of shoreline points to calculate an intersection - "max_std": 15, # max std for points around transect - "max_range": 30, # max range for points around transect - "min_chainage": -100, # largest negative value along transect (landwards of transect origin) - "multiple_inter": "auto", # mode for removing outliers ('auto', 'nan', 'max') - "prc_multiple": 0.1, # percentage of the time that multiple intersects are present to use the max -} - -# Step 2: REQUIRED -# Enter ROI session locations here -# - Replace the session_locations list with the locations of the ROI sessions you want to merge -# - Each of these sessions should have extracted shorelines and transects -# - The order of the sessions in the list does not matter - -session_locations = [ - r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac6_datetime10-30-23__01_44_50", - r"C:\development\doodleverse\coastseg\CoastSeg\test_data\test_case4_overlapping\ID_gac1_datetime10-30-23__01_44_50", -] - -# Step 3: REQUIRED -# Enter directory to save the merged session folder in -save_location = r"C:\development\doodleverse\coastseg\CoastSeg\merged_sessions" - -# Step 4: REQUIRED -# Enter the name of the merged session -merged_session_name = "merged_session" - -# Script execution begins here -# ---------------------------- - -merged_session_location = os.path.join(save_location, merged_session_name) -# make the location to store the merged session -os.makedirs(merged_session_location, exist_ok=True) - -# Merge the config_gdf.geojson files from each session into a single geodataframe -# - if the shorelines or transects are at the exact same location, they will be merged into one -# - if transects have different ids for the same location, they will be merged into one and both ids will be saved -merged_config = merge_utils.merge_geojson_files( - session_locations, merged_session_location -) -# read the extracted shorelines from the session locations -gdfs = merge_utils.process_geojson_files( - session_locations, - ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], - merge_utils.convert_lines_to_multipoints, - merge_utils.read_first_geojson_file, -) +def main(args): + # Use args to access the command-line arguments + session_locations = args.session_locations + save_location = args.save_location + merged_session_name = args.merged_session_name + settings_transects = { + "along_dist": args.along_dist, # along-shore distance to use for computing the intersection + "min_points": args.min_points, # minimum number of shoreline points to calculate an intersection + "max_std": args.max_std, # max std for points around transect + "max_range": args.max_range, # max range for points around transect + "min_chainage": args.min_chainage, # largest negative value along transect (landwards of transect origin) + "multiple_inter": args.multiple_inter, # mode for removing outliers ('auto', 'nan', 'max') + "prc_multiple": args.prc_multiple, # percentage of the time that multiple intersects are present to use the max + } + # @DEBUG only + # print(f"settings_transects: {settings_transects}") + + merged_session_location = os.path.join(save_location, merged_session_name) + # make the location to store the merged session + os.makedirs(merged_session_location, exist_ok=True) + + # Merge the config_gdf.geojson files from each session into a single geodataframe + # - if the shorelines or transects are at the exact same location, they will be merged into one + # - if transects have different ids for the same location, they will be merged into one and both ids will be saved + merged_config = merge_utils.merge_geojson_files( + session_locations, merged_session_location + ) -# get all the ROIs from all the sessions -roi_rows = merged_config[merged_config["type"] == "roi"] - -# Determine if any of the extracted shorelines are in the overlapping regions between the ROIs -overlap_list = merge_utils.get_overlapping_features(roi_rows, gdfs) - -if len(overlap_list) > 0: - print("No overlapping ROIs found. Sessions can be merged.") -else: - print( - "Overlapping ROIs found. Overlapping regions may have double shorelines if the shorelines were detected on the same dates." - ) - -# merge the extracted shorelin geodataframes on date and satname, then average the cloud_cover and geoaccuracy for the merged rows - -# Perform a full outer join and average the numeric columns across all GeoDataFrames -merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) -# sort by date and reset the index -merged_shorelines.sort_values(by="date", inplace=True) -merged_shorelines.reset_index(drop=True, inplace=True) - -# Save the merged extracted shorelines to `extracted_shorelines_dict.json` -# -------------------------------------------------------------------------- -# mapping of dictionary keys to dataframe columns -keymap = { - "shorelines": "geometry", - "dates": "date", - "satname": "satname", - "cloud_cover": "cloud_cover", - "geoaccuracy": "geoaccuracy", -} - -# shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines -shoreline_dict = merge_utils.dataframe_to_dict(merged_shorelines, keymap) -# save the extracted shoreline dictionary to json file -file_utilities.to_file( - shoreline_dict, - os.path.join(merged_session_location, "extracted_shorelines_dict.json"), -) + # read the extracted shorelines from the session locations + gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, + ) -print("Extracted shorelines merged and saved to extracted_shorelines_dict.json") -print(f"Saved {len(shoreline_dict['shorelines'])} extracted shorelines") + # get all the ROIs from all the sessions + roi_rows = merged_config[merged_config["type"] == "roi"] + + # Determine if any of the extracted shorelines are in the overlapping regions between the ROIs + overlap_list = merge_utils.get_overlapping_features(roi_rows, gdfs) + + if len(overlap_list) > 0: + print("No overlapping ROIs found. Sessions can be merged.") + else: + print( + "Overlapping ROIs found. Overlapping regions may have double shorelines if the shorelines were detected on the same dates." + ) + + # merge the extracted shorelin geodataframes on date and satname, then average the cloud_cover and geoaccuracy for the merged rows + + # Perform a full outer join and average the numeric columns across all GeoDataFrames + merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) + # sort by date and reset the index + merged_shorelines.sort_values(by="date", inplace=True) + merged_shorelines.reset_index(drop=True, inplace=True) + + # Save the merged extracted shorelines to `extracted_shorelines_dict.json` + # -------------------------------------------------------------------------- + # mapping of dictionary keys to dataframe columns + keymap = { + "shorelines": "geometry", + "dates": "date", + "satname": "satname", + "cloud_cover": "cloud_cover", + "geoaccuracy": "geoaccuracy", + } + + # shoreline dict should have keys: dates, satname, cloud_cover, geoaccuracy, shorelines + shoreline_dict = merge_utils.dataframe_to_dict(merged_shorelines, keymap) + # save the extracted shoreline dictionary to json file + file_utilities.to_file( + shoreline_dict, + os.path.join(merged_session_location, "extracted_shorelines_dict.json"), + ) -# Save extracted shorelines to GeoJSON file -# ----------------------------------------- + print("Extracted shorelines merged and saved to extracted_shorelines_dict.json") + print(f"Saved {len(shoreline_dict['shorelines'])} extracted shorelines") -# 1. convert datetime columns to strings -merged_shorelines = stringify_datetime_columns(merged_shorelines) + # Save extracted shorelines to GeoJSON file + # ----------------------------------------- -# 2. Save the shorelines that are formatted as mulitpoints a to GeoJSON file -# Save extracted shorelines as mulitpoints GeoJSON file -merged_shorelines.to_file( - os.path.join(merged_session_location, "extracted_shorelines_points.geojson"), - driver="GeoJSON", -) -print("Extracted shorelines saved to extracted_shorelines_points.geojson") -# 3. Convert the multipoints to linestrings and save to GeoJSON file -es_lines_gdf = merge_utils.convert_multipoints_to_linestrings(merged_shorelines) -# save extracted shorelines as interpolated linestrings -es_lines_gdf.to_file( - os.path.join(merged_session_location, "extracted_shorelines_lines.geojson"), - driver="GeoJSON", -) -print("Extracted shorelines saved to extracted_shorelines_lines.geojson") - -# Compute the timeseries of where transects and new merged shorelines intersect -# --------------------------------------------------------------------- - -# 1. load transects for from all the sessions -transect_rows = merged_config[merged_config["type"] == "transect"] -transects_dict = { - row["id"]: np.array(row["geometry"].coords) for i, row in transect_rows.iterrows() -} -# 2. compute the intersection between the transects and the extracted shorelines -cross_distance = SDS_transects.compute_intersection_QC( - shoreline_dict, transects_dict, settings_transects -) + # 1. convert datetime columns to strings + merged_shorelines = stringify_datetime_columns(merged_shorelines) -# use coastseg.common to get the cross_distance_df -transects_df = get_cross_distance_df(shoreline_dict, cross_distance) -# 3. save the timeseries of where all the transects and shorelines intersected to a csv file -filepath = os.path.join(merged_session_location, "transect_time_series.csv") -transects_df.to_csv(filepath, sep=",") - -# 4. Save a CSV file for each transect -# - Save the timeseries of intersections between the shoreline and a single tranesct to csv file -merge_utils.create_csv_per_transect( - merged_session_location, - cross_distance, - shoreline_dict, -) + # 2. Save the shorelines that are formatted as mulitpoints a to GeoJSON file + # Save extracted shorelines as mulitpoints GeoJSON file + merged_shorelines.to_file( + os.path.join(merged_session_location, "extracted_shorelines_points.geojson"), + driver="GeoJSON", + ) + print("Extracted shorelines saved to extracted_shorelines_points.geojson") + # 3. Convert the multipoints to linestrings and save to GeoJSON file + es_lines_gdf = merge_utils.convert_multipoints_to_linestrings(merged_shorelines) + # save extracted shorelines as interpolated linestrings + es_lines_gdf.to_file( + os.path.join(merged_session_location, "extracted_shorelines_lines.geojson"), + driver="GeoJSON", + ) + print("Extracted shorelines saved to extracted_shorelines_lines.geojson") + + # Compute the timeseries of where transects and new merged shorelines intersect + # --------------------------------------------------------------------- + + # 1. load transects for from all the sessions + transect_rows = merged_config[merged_config["type"] == "transect"] + transects_dict = { + row["id"]: np.array(row["geometry"].coords) + for i, row in transect_rows.iterrows() + } + # 2. compute the intersection between the transects and the extracted shorelines + cross_distance = SDS_transects.compute_intersection_QC( + shoreline_dict, transects_dict, settings_transects + ) + + # use coastseg.common to get the cross_distance_df + transects_df = get_cross_distance_df(shoreline_dict, cross_distance) + # 3. save the timeseries of where all the transects and shorelines intersected to a csv file + filepath = os.path.join(merged_session_location, "transect_time_series.csv") + transects_df.to_csv(filepath, sep=",") + + # 4. Save a CSV file for each transect + # - Save the timeseries of intersections between the shoreline and a single tranesct to csv file + merge_utils.create_csv_per_transect( + merged_session_location, + cross_distance, + shoreline_dict, + ) + + +if __name__ == "__main__": + # Create the parser + parser = argparse.ArgumentParser(description="Merge sessions script.") + + # Add mandatory arguments + parser.add_argument( + "-i", + "--session_locations", + nargs="+", + required=True, + help="Locations for the session folders to be merged", + ) + + parser.add_argument( + "-n", + "--merged_session_name", + required=True, + help="Name for the merged session folder", + ) + + # Add optional argument with default value + parser.add_argument( + "-s", + "--save_location", + default=os.path.join(os.getcwd(), "merged_sessions"), + help="Location to save the merged session (default: current directory/merged_sessions)", + ) + + # Settings for transects + parser.add_argument( + "-ad", + "--along_dist", + type=int, + default=25, + help="Along-shore distance for computing the intersection (default: 25)", + ) + parser.add_argument( + "-mp", + "--min_points", + type=int, + default=3, + help="Minimum number of shoreline points to calculate an intersection (default: 3)", + ) + parser.add_argument( + "-ms", + "--max_std", + type=int, + default=15, + help="Maximum standard deviation for points around transect (default: 15)", + ) + parser.add_argument( + "-mr", + "--max_range", + type=int, + default=30, + help="Maximum range for points around transect (default: 30)", + ) + parser.add_argument( + "-mc", + "--min_chainage", + type=int, + default=-100, + help="Largest negative value along transect (landwards of transect origin) (default: -100)", + ) + parser.add_argument( + "-mi", + "--multiple_inter", + default="auto", + choices=["auto", "nan", "max"], + help="Mode for removing outliers ('auto', 'nan', 'max') (default: 'auto')", + ) + parser.add_argument( + "-pm", + "--prc_multiple", + type=float, + default=0.1, + help="Percentage of the time that multiple intersects are present to use the max (default: 0.1)", + ) + + # Parse the arguments + args = parser.parse_args() + main(args) From 6c1ea10d010b487fdbd4b1f8cf9727ff19a117dc Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 4 Dec 2023 17:35:12 -0800 Subject: [PATCH 54/87] add tests for load_json_config --- tests/conftest.py | 243 +++++++++++++++++++++++++++++-------- tests/test_coastseg_map.py | 130 ++++++++++++-------- tests/test_common.py | 12 +- 3 files changed, 277 insertions(+), 108 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 06b423ee..9ca597d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,65 +16,82 @@ script_dir = os.path.dirname(os.path.abspath(__file__)) +@pytest.fixture(scope="session") +def config_json_no_sitename_dir(): + # create a temporary directory that will represent the downloaded ROI directory + temp_dir = tempfile.mkdtemp() + # Create don't create the subdirectory in this temporary directory -@pytest.fixture -def temp_jpg_dir_structure(): - # Create a temporary directory - with tempfile.TemporaryDirectory() as tmpdirname: - # Create subdirectories - # creates a directory structure like this: tmpdir/sitename/jpg_files/detection - sitename_dir = os.path.join(tmpdirname, "sitename","jpg_files","detection") - os.makedirs(sitename_dir) - - # Add JPG files to the subdirectories - for i in range(5): # Creating 5 JPG files for example - image = Image.new("RGB", (100, 100), color="blue") # Simple blue image - image_path = os.path.join(sitename_dir, f"test_image_{i}.jpg") - image.save(image_path) - - yield tmpdirname - # Cleanup is handled by TemporaryDirectory context manager - - -@pytest.fixture -def temp_src_dir(): - # Create a temporary directory - with tempfile.TemporaryDirectory() as tmpdirname: - # Add some files to the directory - for i in range(5): # Creating 5 files for example - with open(os.path.join(tmpdirname, f"test_file_{i}.txt"), "w") as f: - f.write("This is a test file") - yield tmpdirname - # Cleanup is handled by TemporaryDirectory context manager - - -@pytest.fixture -def temp_dst_dir(): - # Create another temporary directory for destination - with tempfile.TemporaryDirectory() as tmpdirname: - yield tmpdirname - # Cleanup is handled by TemporaryDirectory context manager - + # The dictionary you want to write to the JSON file + config_data = { + "zih2": { + "dates": ["2018-12-01", "2019-03-01"], + "sat_list": ["L5", "L7", "L8", "L9", "S2"], + "roi_id": "zih2", + "polygon": [ + [ + [-121.84020033533233, 36.74441575726833], + [-121.83959312681607, 36.784722827004146], + [-121.78948275983468, 36.78422337939962], + [-121.79011617443447, 36.74391703739083], + [-121.84020033533233, 36.74441575726833], + ] + ], + "landsat_collection": "C02", + "sitename": "ID_zih2_datetime11-15-23__09_56_01", + "filepath": str(temp_dir), + }, + "roi_ids": ["zih2"], + "settings": { + "landsat_collection": "C02", + "dates": ["2018-12-01", "2019-03-01"], + "sat_list": ["L5", "L7", "L8", "L9", "S2"], + "cloud_thresh": 0.8, + "dist_clouds": 350, + "output_epsg": 32610, + "check_detection": False, + "adjust_detection": False, + "save_figure": True, + "min_beach_area": 1050, + "min_length_sl": 600, + "cloud_mask_issue": True, + "sand_color": "default", + "pan_off": "False", + "max_dist_ref": 200, + "along_dist": 28, + "min_points": 4, + "max_std": 16.0, + "max_range": 38.0, + "min_chainage": -105.0, + "multiple_inter": "auto", + "prc_multiple": 0.2, + "apply_cloud_mask": False, + "image_size_filter": False, + }, + } -@pytest.fixture -def temp_src_files(): - # Create a list of temporary files - files = [] - for i in range(5): - fd, path = tempfile.mkstemp(suffix=".txt", prefix="test_file_", text=True) - os.write(fd, b"This is a test file") - os.close(fd) - files.append(path) + # Create a temporary file + with tempfile.NamedTemporaryFile( + mode="w+", delete=False, suffix=".json" + ) as tmpfile: + json.dump(config_data, tmpfile) + tmpfile_path = tmpfile.name # Save the filepath - yield files + # Yield the filepath to the test + yield tmpfile_path,temp_dir - # Cleanup - for f in files: - os.remove(f) + # Cleanup - delete the file after tests are done + os.remove(tmpfile_path) @pytest.fixture(scope="session") def config_json(): + # create a temporary directory that will represent the downloaded ROI directory + temp_dir = tempfile.mkdtemp() + # Create a subdirectory in this temporary directory + sub_dir = os.path.join(temp_dir, "ID_zih2_datetime11-15-23__09_56_01") + os.makedirs(sub_dir, exist_ok=True) + # The dictionary you want to write to the JSON file config_data = { "zih2": { @@ -92,7 +109,7 @@ def config_json(): ], "landsat_collection": "C02", "sitename": "ID_zih2_datetime11-15-23__09_56_01", - "filepath": "C:\\development\\doodleverse\\coastseg\\CoastSeg\\data", + "filepath": str(temp_dir), }, "roi_ids": ["zih2"], "settings": { @@ -131,12 +148,132 @@ def config_json(): tmpfile_path = tmpfile.name # Save the filepath # Yield the filepath to the test - yield tmpfile_path + yield tmpfile_path,temp_dir # Cleanup - delete the file after tests are done os.remove(tmpfile_path) +@pytest.fixture +def temp_jpg_dir_structure(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # Create subdirectories + # creates a directory structure like this: tmpdir/sitename/jpg_files/detection + sitename_dir = os.path.join(tmpdirname, "sitename", "jpg_files", "detection") + os.makedirs(sitename_dir) + + # Add JPG files to the subdirectories + for i in range(5): # Creating 5 JPG files for example + image = Image.new("RGB", (100, 100), color="blue") # Simple blue image + image_path = os.path.join(sitename_dir, f"test_image_{i}.jpg") + image.save(image_path) + + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_src_dir(): + # Create a temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # Add some files to the directory + for i in range(5): # Creating 5 files for example + with open(os.path.join(tmpdirname, f"test_file_{i}.txt"), "w") as f: + f.write("This is a test file") + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_dst_dir(): + # Create another temporary directory for destination + with tempfile.TemporaryDirectory() as tmpdirname: + yield tmpdirname + # Cleanup is handled by TemporaryDirectory context manager + + +@pytest.fixture +def temp_src_files(): + # Create a list of temporary files + files = [] + for i in range(5): + fd, path = tempfile.mkstemp(suffix=".txt", prefix="test_file_", text=True) + os.write(fd, b"This is a test file") + os.close(fd) + files.append(path) + + yield files + + # Cleanup + for f in files: + os.remove(f) + + +# @pytest.fixture(scope="session") +# def config_json(): +# # The dictionary you want to write to the JSON file +# config_data = { +# "zih2": { +# "dates": ["2018-12-01", "2019-03-01"], +# "sat_list": ["L5", "L7", "L8", "L9", "S2"], +# "roi_id": "zih2", +# "polygon": [ +# [ +# [-121.84020033533233, 36.74441575726833], +# [-121.83959312681607, 36.784722827004146], +# [-121.78948275983468, 36.78422337939962], +# [-121.79011617443447, 36.74391703739083], +# [-121.84020033533233, 36.74441575726833], +# ] +# ], +# "landsat_collection": "C02", +# "sitename": "ID_zih2_datetime11-15-23__09_56_01", +# "filepath": "C:\\development\\doodleverse\\coastseg\\CoastSeg\\data", +# }, +# "roi_ids": ["zih2"], +# "settings": { +# "landsat_collection": "C02", +# "dates": ["2018-12-01", "2019-03-01"], +# "sat_list": ["L5", "L7", "L8", "L9", "S2"], +# "cloud_thresh": 0.8, +# "dist_clouds": 350, +# "output_epsg": 32610, +# "check_detection": False, +# "adjust_detection": False, +# "save_figure": True, +# "min_beach_area": 1050, +# "min_length_sl": 600, +# "cloud_mask_issue": True, +# "sand_color": "default", +# "pan_off": "False", +# "max_dist_ref": 200, +# "along_dist": 28, +# "min_points": 4, +# "max_std": 16.0, +# "max_range": 38.0, +# "min_chainage": -105.0, +# "multiple_inter": "auto", +# "prc_multiple": 0.2, +# "apply_cloud_mask": False, +# "image_size_filter": False, +# }, +# } + +# # Create a temporary file +# with tempfile.NamedTemporaryFile( +# mode="w+", delete=False, suffix=".json" +# ) as tmpfile: +# json.dump(config_data, tmpfile) +# tmpfile_path = tmpfile.name # Save the filepath + +# # Yield the filepath to the test +# yield tmpfile_path + +# # Cleanup - delete the file after tests are done +# os.remove(tmpfile_path) + + @pytest.fixture(scope="session") def geojson_directory(): """ diff --git a/tests/test_coastseg_map.py b/tests/test_coastseg_map.py index e311c436..b2e10899 100644 --- a/tests/test_coastseg_map.py +++ b/tests/test_coastseg_map.py @@ -155,49 +155,45 @@ def test_load_json_config_without_rois(valid_coastseg_map_with_settings, tmp_dat actual_coastsegmap.load_json_config("", tmp_data_path) -# def test_load_json_config_downloaded( -# valid_coastseg_map_with_settings, -# valid_rois_filepath, -# downloaded_config_json_filepath, -# tmp_data_path, -# ): -# # tests if load_json_config will load contents into rois.roi_settings -# # create instance of Coastseg_Map with settings and ROIs initially loaded -# actual_coastsegmap = valid_coastseg_map_with_settings -# actual_coastsegmap.load_feature_on_map("rois", file=valid_rois_filepath) - -# # import os -# # data_path = os.path.join(tmp_path,'data') -# # os.mkdir(data_path) -# # # simulate the ROI directories -# # os.mkdir(os.path.join(data_path,"ID_2_datetime10-19-22__04_00_34")) -# # os.mkdir(os.path.join(data_path,"ID_3_datetime10-19-22__04_00_34")) -# # os.mkdir(os.path.join(data_path,"ID_5_datetime10-19-22__04_00_34")) - -# # test if settings are correctly loaded when valid json config loaded with 'filepath' & 'sitename' keys is loaded -# actual_coastsegmap.load_json_config(downloaded_config_json_filepath, tmp_data_path) -# assert isinstance(actual_coastsegmap.rois.roi_settings, dict) -# actual_config = file_utilities.read_json_file(downloaded_config_json_filepath) -# for key in actual_config["roi_ids"]: -# assert key in actual_coastsegmap.rois.roi_settings - - -# def test_load_json_config( -# valid_coastseg_map_with_settings, -# valid_rois_filepath, -# config_json_filepath, -# tmp_data_path, -# ): -# # tests if load_json_config will load contents into rois.roi_settings when rois have not been downloaded before -# # create instance of Coastseg_Map with settings and ROIs initially loaded -# actual_coastsegmap = valid_coastseg_map_with_settings -# actual_coastsegmap.load_feature_on_map("rois", file=valid_rois_filepath) -# # test if settings are correctly loaded when valid json config without 'filepath' & 'sitename' keys is loaded -# actual_coastsegmap.load_json_config(config_json_filepath, tmp_data_path) -# assert isinstance(actual_coastsegmap.rois.roi_settings, dict) -# actual_config = file_utilities.read_json_file(config_json_filepath) -# for key in actual_config["roi_ids"]: -# assert key in actual_coastsegmap.rois.roi_settings +def test_load_json_config_downloaded( + valid_coastseg_map_with_settings, + valid_rois_filepath, + config_json, +): + config_path, temp_dir = config_json + # tests if load_json_config will load contents into rois.roi_settings + # create instance of Coastseg_Map with settings and ROIs initially loaded + actual_coastsegmap = valid_coastseg_map_with_settings + actual_coastsegmap.load_feature_on_map("rois", file=valid_rois_filepath) + + # test if settings are correctly loaded when valid json config loaded with 'filepath' & 'sitename' keys is loaded + actual_coastsegmap.load_json_config(config_path, temp_dir) + assert isinstance(actual_coastsegmap.rois.roi_settings, dict) + actual_config = file_utilities.read_json_file(config_path) + for roi_id in actual_config["roi_ids"]: + assert roi_id in actual_coastsegmap.rois.roi_settings + for key in actual_config[roi_id]: + assert ( + actual_coastsegmap.rois.roi_settings[roi_id][key] + == actual_config[roi_id][key] + ) + for roi_id, item in actual_config.get("settings", {}).items(): + assert actual_coastsegmap.settings[roi_id] == item + + +def test_load_json_config_when_data_path_not_exist( + valid_coastseg_map_with_settings, + valid_rois_filepath, + config_json_no_sitename_dir, +): + config_path, temp_dir = config_json_no_sitename_dir + # tests if load_json_config will load contents into rois.roi_settings when rois have not been downloaded before + # create instance of Coastseg_Map with settings and ROIs initially loaded + actual_coastsegmap = valid_coastseg_map_with_settings + actual_coastsegmap.load_feature_on_map("rois", file=valid_rois_filepath) + # test if settings are correctly loaded when valid json config without 'filepath' & 'sitename' keys is loaded + with pytest.raises(exceptions.WarningException): + actual_coastsegmap.load_json_config(config_path, temp_dir) def test_valid_shoreline_gdf(valid_shoreline_gdf: gpd.GeoDataFrame): @@ -250,9 +246,39 @@ def test_coastseg_map(): assert isinstance(coastsegmap, coastseg_map.CoastSeg_Map) assert isinstance(coastsegmap.map, Map) assert hasattr(coastsegmap, "draw_control") + assert hasattr(coastsegmap, "settings") + default_settings = { + "landsat_collection": "C02", + "dates": ["2017-12-01", "2018-01-01"], + "sat_list": ["L8"], + "cloud_thresh": 0.5, + "dist_clouds": 300, + "output_epsg": 4326, + "check_detection": False, + "adjust_detection": False, + "save_figure": True, + "min_beach_area": 4500, + "min_length_sl": 100, + "cloud_mask_issue": False, + "sand_color": "default", + "pan_off": "False", + "max_dist_ref": 25, + "along_dist": 25, + "min_points": 3, + "max_std": 15, + "max_range": 30, + "min_chainage": -100, + "multiple_inter": "auto", + "prc_multiple": 0.1, + "apply_cloud_mask": True, + "image_size_filter": True, + } + for key in default_settings: + assert key in coastsegmap.settings + assert coastsegmap.settings[key] == default_settings[key] -def test_coastseg_map_settings(): +def test_set_settings(): """tests if a ROI will be created from valid rois thats a gpd.GeoDataFrame Args: valid_bbox_gdf (gpd.GeoDataFrame): alid rois as a gpd.GeoDataFrame @@ -261,21 +287,21 @@ def test_coastseg_map_settings(): pre_process_settings = { # general parameters: "dates": ["2018-12-01", "2019-03-01"], - "sat_list": ["L8"], - "cloud_thresh": 0.5, # threshold on maximum cloud cover - "dist_clouds": 300, # ditance around clouds where shoreline can't be mapped + "sat_list": ["L9"], + "cloud_thresh": 0.9, # threshold on maximum cloud cover + "dist_clouds": 400, # ditance around clouds where shoreline can't be mapped "output_epsg": 3857, # epsg code of spatial reference system desired for the output # quality control: "check_detection": True, # if True, shows each shoreline detection to the user for validation "adjust_detection": False, # if True, allows user to adjust the position of each shoreline by changing the threshold "save_figure": True, # if True, saves a figure showing the mapped shoreline for each image # [ONLY FOR ADVANCED USERS] shoreline detection parameters: - "min_beach_area": 4500, # minimum area (in metres^2) for an object to be labelled as a beach - "min_length_sl": 200, # minimum length (in metres) of shoreline perimeter to be valid - "cloud_mask_issue": False, # switch this parameter to True if sand pixels are masked (in black) on many images + "min_beach_area": 400, # minimum area (in metres^2) for an object to be labelled as a beach + "min_length_sl": 100, # minimum length (in metres) of shoreline perimeter to be valid + "cloud_mask_issue": True, # switch this parameter to True if sand pixels are masked (in black) on many images "sand_color": "default", # 'default', 'dark' (for grey/black sand beaches) or 'bright' (for white sand beaches) "pan_off": "False", # if True, no pan-sharpening is performed on Landsat 7,8 and 9 imagery - "max_dist_ref": 25, + "max_dist_ref": 20, "landsat_collection": "C02", } coastsegmap.set_settings(**pre_process_settings) @@ -283,6 +309,8 @@ def test_coastseg_map_settings(): expected_settings = set(list(pre_process_settings.keys())) assert expected_settings.issubset(actual_settings) assert set(["dates", "landsat_collection", "sat_list"]).issubset(actual_settings) + for key in pre_process_settings: + assert coastsegmap.settings[key] == pre_process_settings[key] def test_select_roi_layer( diff --git a/tests/test_common.py b/tests/test_common.py index 38ff6367..6b7cbea2 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -965,6 +965,7 @@ def test_load_settings_with_invalid_filepath(): def test_load_settings_with_nested_settings(config_json): + config_path, _ = config_json # Test loading specific settings from a JSON file with nested settings keys = { "model_session_path", @@ -996,7 +997,7 @@ def test_load_settings_with_nested_settings(config_json): "multiple_inter", "prc_multiple", } - settings = common.load_settings(config_json, keys) + settings = common.load_settings(config_path, keys) assert isinstance(settings, dict) assert settings["landsat_collection"] == "C02" assert settings["dates"] == ["2018-12-01", "2019-03-01"] @@ -1026,12 +1027,14 @@ def test_load_settings_with_nested_settings(config_json): def test_load_settings_with_empty_keys(config_json): # Test loading all settings from a JSON file - settings = common.load_settings(config_json, set()) + config_path, tmpdir = config_json + settings = common.load_settings(config_path, set()) assert isinstance(settings, dict) assert len(settings) > 0 def test_load_settings_with_set_keys(config_json): + config_path, tmpdir = config_json # Test loading specific settings from a JSON file using a set of keys keys = { "sat_list", @@ -1041,7 +1044,7 @@ def test_load_settings_with_set_keys(config_json): "output_epsg", "max_dist_ref", } - settings = common.load_settings(config_json, keys) + settings = common.load_settings(config_path, keys) assert isinstance(settings, dict) assert settings["dates"] == ["2018-12-01", "2019-03-01"] assert settings["sat_list"] == ["L5", "L7", "L8", "L9", "S2"] @@ -1052,6 +1055,7 @@ def test_load_settings_with_set_keys(config_json): def test_load_settings_with_list_keys(config_json): + config_path, tmpdir = config_json # Test loading specific settings from a JSON file using a list of keys keys = [ "sat_list", @@ -1061,7 +1065,7 @@ def test_load_settings_with_list_keys(config_json): "output_epsg", "max_dist_ref", ] - settings = common.load_settings(config_json, keys) + settings = common.load_settings(config_path, keys) assert settings["dates"] == ["2018-12-01", "2019-03-01"] assert settings["sat_list"] == ["L5", "L7", "L8", "L9", "S2"] assert settings["min_beach_area"] == 1050 From 7df17691d5fe56b6b151b0ef7d6782a4b559bf47 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 4 Dec 2023 17:35:39 -0800 Subject: [PATCH 55/87] remove filter settings unused function --- src/coastseg/coastseg_map.py | 62 ++---------------------------------- 1 file changed, 2 insertions(+), 60 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 407af7dd..10338ec8 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -93,65 +93,6 @@ def _on_geo_data_changed(self, change): self.satname = properties.get("satname", "") self.date = properties.get("date", "") - -def filter_settings(**kwargs): - # Check if any of the keys are missing - # if any keys are missing set the default value - default_settings = { - "landsat_collection": "C02", - "dates": ["2017-12-01", "2018-01-01"], - "sat_list": ["L8"], - "cloud_thresh": 0.5, - "dist_clouds": 300, - "output_epsg": 4326, - "check_detection": False, - "adjust_detection": False, - "save_figure": True, - "min_beach_area": 4500, - "min_length_sl": 100, - "cloud_mask_issue": False, - "sand_color": "default", - "pan_off": "False", - "max_dist_ref": 25, - "along_dist": 25, - "min_points": 3, - "max_std": 15, - "max_range": 30, - "min_chainage": -100, - "multiple_inter": "auto", - "prc_multiple": 0.1, - "apply_cloud_mask": True, - "image_size_filter": True, - } - - # Function to parse dates with flexibility for different formats - def parse_date(date_str): - for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"): - try: - return datetime.strptime(date_str, fmt).strftime("%Y-%m-%d") - except ValueError: - continue - raise ValueError(f"Date format for {date_str} not recognized.") - - settings = {} - - # Filter kwargs to keep only keys that are in default_settings - filtered_kwargs = {k: v for k, v in kwargs.items() if k in default_settings} - - # Update settings with filtered kwargs - settings.update(filtered_kwargs) - - # Special handling for 'dates' - if "dates" in filtered_kwargs: - settings["dates"] = [parse_date(d) for d in filtered_kwargs["dates"]] - - # Set default values for missing keys - for key, value in default_settings.items(): - settings.setdefault(key, value) - - return settings - - class CoastSeg_Map: def __init__(self): # Basic settings and configurations @@ -789,7 +730,8 @@ def load_json_config(self, filepath: str, data_path: str) -> None: # creates a dictionary mapping ROI IDs to their extracted settings from json_data roi_settings = self._extract_and_validate_roi_settings(json_data, data_path) # Make sure each ROI has the specific settings for its save location, its ID, coordinates etc. - self.rois.roi_settings = roi_settings + if hasattr(self, "rois"): + self.rois.roi_settings = roi_settings logger.info(f"roi_settings: {roi_settings}") def load_config_files(self, dir_path: str, data_path: str) -> None: From 04df8da44b19a18aa83dde2680c0ac5b7a65d139 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 4 Dec 2023 17:41:54 -0800 Subject: [PATCH 56/87] v1.1.20 #199 #125 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ae74db1a..2d377841 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.19" +version = "1.1.20" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From 9d718fc6e0e35ae8eaaae877b3ebe1a62e17e660 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 5 Dec 2023 17:59:51 -0800 Subject: [PATCH 57/87] #205 save the config.json in the correct format --- src/coastseg/common.py | 46 ++++++++------------------------------- src/coastseg/zoo_model.py | 4 ++-- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index a3f7bb74..bc2e2179 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -122,40 +122,6 @@ def load_settings( return filtered_settings -def create_new_config(roi_ids: list, settings: dict, roi_settings: dict) -> dict: - """ - Creates a new configuration dictionary by combining the given settings and ROI settings. - - Arguments: - ----------- - roi_ids: list - A list of ROI IDs to include in the new configuration. - settings: dict - A dictionary containing general settings for the configuration. - roi_settings: dict - A dictionary containing ROI-specific settings for the configuration. - example: - {'example_roi_id': {'dates':[]} - - Returns: - ----------- - new_config: dict - A dictionary containing the combined settings and ROI settings, as well as the ROI IDs. - """ - new_config = { - "settings": {}, - "roi_ids": [], - } - if isinstance(roi_ids, str): - roi_ids = [roi_ids] - if not all(roi_id in roi_settings.keys() for roi_id in roi_ids): - raise ValueError(f"roi_ids {roi_ids} not in roi_settings {roi_settings.keys()}") - new_config = {**new_config, **roi_settings} - new_config["roi_ids"].extend(roi_ids) - new_config["settings"] = settings - return new_config - - def save_new_config(path: str, roi_ids: list, destination: str) -> dict: """Save a new config file to a path. @@ -175,7 +141,7 @@ def save_new_config(path: str, roi_ids: list, destination: str) -> dict: if roi_id in config.keys(): roi_settings[roi_id] = config[roi_id] - new_config = create_new_config(roi_ids, config["settings"], roi_settings) + new_config = create_json_config(roi_settings, config["settings"], roi_ids) with open(destination, "w") as f: json.dump(new_config, f) @@ -1682,7 +1648,10 @@ def create_csv_per_transect( f"ROI: {roi_id}Time-series of the shoreline change along the transects saved as:{fn}" ) -def move_report_files(settings: dict, dest: str, filename_pattern='extract_shorelines*.txt'): + +def move_report_files( + settings: dict, dest: str, filename_pattern="extract_shorelines*.txt" +): """ Move report files matching a specific pattern from the source directory to the destination. @@ -1715,6 +1684,7 @@ def move_report_files(settings: dict, dest: str, filename_pattern='extract_shore except Exception as e: logger.error(f"Error moving files: {e}") + def save_extracted_shoreline_figures(settings: dict, save_path: str): """ Save extracted shoreline figures to a specified save path. @@ -1848,7 +1818,9 @@ def stringify_datetime_columns(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: return gdf -def create_json_config(inputs: dict, settings: dict, roi_ids: list[str] = []) -> dict: +def create_json_config( + inputs: dict, settings: dict = {}, roi_ids: list[str] = [] +) -> dict: """returns config dictionary with the settings, currently selected_roi ids, and each of the inputs specified by roi id. sample config: diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index e67a00c8..8da7fb48 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -850,10 +850,10 @@ def postprocess_data( # if configs do not exist then raise an error and do not save the session if not file_utilities.validate_config_files_exist(roi_directory): logger.warning( - f"Config files config.json or config_gdf.geojson do not exist in roi directory {roi_directory}\n This means that the download did not complete successfully." + f"Config files config.json or config_gdf.geojson do not exist in roi directory {roi_directory}" ) raise FileNotFoundError( - f"Config files config.json or config_gdf.geojson do not exist in roi directory {roi_directory}\n This means that the download did not complete successfully." + f"Config files config.json or config_gdf.geojson do not exist in roi directory {roi_directory}" ) # modify the config.json to only have the ROI ID that was used and save to session directory roi_id = file_utilities.extract_roi_id(roi_directory) From 5dfdc49e0cab23f2b9716ebe163371ebeaed2574 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 6 Dec 2023 18:28:01 -0800 Subject: [PATCH 58/87] #179 add test_merge_sessions --- scripts/test_merge_sessions.py | 298 +++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 scripts/test_merge_sessions.py diff --git a/scripts/test_merge_sessions.py b/scripts/test_merge_sessions.py new file mode 100644 index 00000000..c7736b5f --- /dev/null +++ b/scripts/test_merge_sessions.py @@ -0,0 +1,298 @@ +import os +from coastseg import merge_utils, file_utilities +from coastseg.common import ( + convert_linestrings_to_multipoints, + stringify_datetime_columns, + get_cross_distance_df, +) +from functools import reduce +import geopandas as gpd +from coastsat import SDS_transects +import numpy as np +from merge_sessions import main, parse_arguments +import pytest +import sys +import argparse +import os +import shutil + +TEST_DATA_LOCATION = r"C:\development\doodleverse\coastseg\CoastSeg\test_data" +SAVE_LOCATION = ( + r"C:\development\doodleverse\coastseg\CoastSeg\test_data\merged_sessions" +) + + +def clear_directory(directory): + """ + Deletes all the contents of the specified directory. + + :param directory: The path to the directory to be cleared. + """ + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) # Remove files or links + elif os.path.isdir(file_path): + shutil.rmtree(file_path) # Remove directories + except Exception as e: + print(f"Failed to delete {file_path}. Reason: {e}") + + +def test_with_all_arguments(monkeypatch): + # Test case 1: Required arguments provided + test_args = [ + "program_name", + "-i", + "session1", + "session2", + "-n", + "merged_session", + "-s", + "save_location", + "-ad", + "30", + "-mp", + "5", + "-ms", + "20", + "-mr", + "20", + ] + monkeypatch.setattr(sys, "argv", test_args) + args = parse_arguments() + assert args.session_locations == ["session1", "session2"] + assert args.merged_session_name == "merged_session" + assert args.save_location == "save_location" + assert args.along_dist == 30 + assert args.min_points == 5 + assert args.max_std == 20 + assert args.max_range == 20 + assert args.min_chainage == -100 + assert args.multiple_inter == "auto" + assert args.prc_multiple == 0.1 + + +def test_with_mandatory_arguments_only(monkeypatch): + test_args = ["program_name", "-i", "session1", "session2", "-n", "merged_session"] + monkeypatch.setattr(sys, "argv", test_args) + args = parse_arguments() + assert args.session_locations == ["session1", "session2"] + assert args.merged_session_name == "merged_session" + # Check if defaults are correctly set for optional arguments + assert args.save_location == os.path.join(os.getcwd(), "merged_sessions") + assert args.along_dist == 25 + assert args.min_points == 3 + assert args.max_std == 15 + assert args.max_range == 30 + assert args.min_chainage == -100 + assert args.multiple_inter == "auto" + assert args.prc_multiple == 0.1 + + +def test_main_with_overlapping(): + # Create a Namespace object with your arguments + source_dest = os.path.join(TEST_DATA_LOCATION, "test_case4_overlapping") + session_locations = [ + os.path.join(source_dest, session) for session in os.listdir(source_dest) + ] + if not all([os.path.exists(session) for session in session_locations]): + raise Exception("Test data not found. Please download the test data") + + merged_session_name = "merged_session" + dest = os.path.join(SAVE_LOCATION, merged_session_name) + if os.path.exists(dest): + clear_directory(dest) + + mock_args = argparse.Namespace( + session_locations=session_locations, + save_location=SAVE_LOCATION, + merged_session_name=merged_session_name, + along_dist=25, + min_points=3, + max_std=15, + max_range=30, + min_chainage=-100, + multiple_inter="auto", + prc_multiple=0.1, + ) + + main(mock_args) + assert os.path.exists(dest) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_dict.json")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_lines.geojson")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_points.geojson")) + assert os.path.exists(os.path.join(dest, "merged_config.geojson")) + assert os.path.exists(os.path.join(dest, "transect_time_series.csv")) + + # read all the shoreline geojson files to get the dates + gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, + ) + # get all the dates before merging + total_set = set() + for gdf in gdfs: + total_set.update(gdf.date) + # get the merged shorelines + merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) + merged_set = set(merged_shorelines["date"]) + assert total_set == merged_set + + +def test_main_with_same_rois(): + # Create a Namespace object with your arguments + source_dest = os.path.join(TEST_DATA_LOCATION, "test_case1_same_rois") + session_locations = [ + os.path.join(source_dest, session) for session in os.listdir(source_dest) + ] + if not all([os.path.exists(session) for session in session_locations]): + raise Exception("Test data not found. Please download the test data") + + merged_session_name = "merged_session" + dest = os.path.join(SAVE_LOCATION, merged_session_name) + if os.path.exists(dest): + clear_directory(dest) + + mock_args = argparse.Namespace( + session_locations=session_locations, + save_location=SAVE_LOCATION, + merged_session_name=merged_session_name, + along_dist=25, + min_points=3, + max_std=15, + max_range=30, + min_chainage=-100, + multiple_inter="auto", + prc_multiple=0.1, + ) + + main(mock_args) + assert os.path.exists(dest) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_dict.json")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_lines.geojson")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_points.geojson")) + assert os.path.exists(os.path.join(dest, "merged_config.geojson")) + assert os.path.exists(os.path.join(dest, "transect_time_series.csv")) + + # read all the shoreline geojson files to get the dates + gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, + ) + # get all the dates before merging + total_set = set() + for gdf in gdfs: + total_set.update(gdf.date) + # get the merged shorelines + merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) + merged_set = set(merged_shorelines["date"]) + assert total_set == merged_set + + +def test_main_with_different_rois(): + # Create a Namespace object with your arguments + source_dest = os.path.join(TEST_DATA_LOCATION, "test_case2_different_rois") + session_locations = [ + os.path.join(source_dest, session) for session in os.listdir(source_dest) + ] + if not all([os.path.exists(session) for session in session_locations]): + raise Exception("Test data not found. Please download the test data") + + merged_session_name = "merged_session" + dest = os.path.join(SAVE_LOCATION, merged_session_name) + if os.path.exists(dest): + clear_directory(dest) + + mock_args = argparse.Namespace( + session_locations=session_locations, + save_location=SAVE_LOCATION, + merged_session_name=merged_session_name, + along_dist=25, + min_points=3, + max_std=15, + max_range=30, + min_chainage=-100, + multiple_inter="auto", + prc_multiple=0.1, + ) + + main(mock_args) + assert os.path.exists(dest) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_dict.json")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_lines.geojson")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_points.geojson")) + assert os.path.exists(os.path.join(dest, "merged_config.geojson")) + assert os.path.exists(os.path.join(dest, "transect_time_series.csv")) + + # read all the shoreline geojson files to get the dates + gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, + ) + # get all the dates before merging + total_set = set() + for gdf in gdfs: + total_set.update(gdf.date) + # get the merged shorelines + merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) + merged_set = set(merged_shorelines["date"]) + assert total_set == merged_set + + +def test_main_with_overlapping_dates(): + # Create a Namespace object with your arguments + source_dest = os.path.join(TEST_DATA_LOCATION, "test_case3_rois_overlapping_dates") + session_locations = [ + os.path.join(source_dest, session) for session in os.listdir(source_dest) + ] + if not all([os.path.exists(session) for session in session_locations]): + raise Exception("Test data not found. Please download the test data") + + merged_session_name = "merged_session" + dest = os.path.join(SAVE_LOCATION, merged_session_name) + if os.path.exists(dest): + clear_directory(dest) + + mock_args = argparse.Namespace( + session_locations=session_locations, + save_location=SAVE_LOCATION, + merged_session_name=merged_session_name, + along_dist=25, + min_points=3, + max_std=15, + max_range=30, + min_chainage=-100, + multiple_inter="auto", + prc_multiple=0.1, + ) + + main(mock_args) + assert os.path.exists(dest) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_dict.json")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_lines.geojson")) + assert os.path.exists(os.path.join(dest, "extracted_shorelines_points.geojson")) + assert os.path.exists(os.path.join(dest, "merged_config.geojson")) + assert os.path.exists(os.path.join(dest, "transect_time_series.csv")) + + # read all the shoreline geojson files to get the dates + gdfs = merge_utils.process_geojson_files( + session_locations, + ["extracted_shorelines_points.geojson", "extracted_shorelines.geojson"], + merge_utils.convert_lines_to_multipoints, + merge_utils.read_first_geojson_file, + ) + # get all the dates before merging + total_set = set() + for gdf in gdfs: + total_set.update(gdf.date) + # get the merged shorelines + merged_shorelines = reduce(merge_utils.merge_and_average, gdfs) + merged_set = set(merged_shorelines["date"]) + assert total_set == merged_set From ba42b796a50f94bc83ac66dc8e3c18047a93e5f4 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 6 Dec 2023 18:29:48 -0800 Subject: [PATCH 59/87] #179 update merge_sessions.py w/ parse_arguments --- scripts/merge_sessions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/merge_sessions.py b/scripts/merge_sessions.py index 3cf214f5..c14416e8 100644 --- a/scripts/merge_sessions.py +++ b/scripts/merge_sessions.py @@ -143,7 +143,7 @@ def main(args): ) -if __name__ == "__main__": +def parse_arguments(): # Create the parser parser = argparse.ArgumentParser(description="Merge sessions script.") @@ -223,5 +223,10 @@ def main(args): ) # Parse the arguments - args = parser.parse_args() + return parser.parse_args() + + +if __name__ == "__main__": + # Parse the arguments + args = parse_arguments() main(args) From d1af72456987174e9b02fe1708d904965d5d748c Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 07:05:31 -0800 Subject: [PATCH 60/87] testing merge_sessions --- ...e_roi_across_multiple_sessions_final.ipynb | 254 ++++++++++++++++++ 1 file changed, 254 insertions(+) diff --git a/merge_session_same_roi_across_multiple_sessions_final.ipynb b/merge_session_same_roi_across_multiple_sessions_final.ipynb index 7140b9d1..dd82a281 100644 --- a/merge_session_same_roi_across_multiple_sessions_final.ipynb +++ b/merge_session_same_roi_across_multiple_sessions_final.ipynb @@ -52,6 +52,52 @@ " r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\sessions\\ID_rrw15_datetime11-21-23__11_35_25_es3\\ID_rrw15_datetime11-21-23__11_35_25']\n" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['C:\\\\development\\\\doodleverse\\\\coastseg\\\\CoastSeg\\\\test_data\\\\test_case4_overlapping\\\\ID_gac1_datetime10-30-23__01_44_50',\n", + " 'C:\\\\development\\\\doodleverse\\\\coastseg\\\\CoastSeg\\\\test_data\\\\test_case4_overlapping\\\\ID_gac6_datetime10-30-23__01_44_50']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "TEST_DATA_LOCATION = r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\test_data'\n", + "source_dest = os.path.join(TEST_DATA_LOCATION, \"test_case4_overlapping\")\n", + "session_locations = [ os.path.join(source_dest,session) for session in os.listdir(source_dest) ]\n", + "all()\n", + "session_locations" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all([os.path.exists(session) for session in session_locations])" + ] + }, { "cell_type": "code", "execution_count": null, @@ -784,6 +830,214 @@ "# Save the timeseries of intersections between the shoreline and a single tranesct to csv file\n", "create_csv_per_transect(merged_session_location,cross_distance,shoreline_dict,)" ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from coastseg import merge_utils\n", + "import numpy as np\n", + "from functools import reduce" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "TEST_DATA_LOCATION = r\"C:\\development\\doodleverse\\coastseg\\CoastSeg\\test_data\"\n", + "SAVE_LOCATION = r\"C:\\development\\doodleverse\\coastseg\\CoastSeg\\merged_sessions2\"\n", + "\n", + "source_dest = os.path.join(TEST_DATA_LOCATION, \"test_case4_overlapping\")\n", + "session_locations = [\n", + " os.path.join(source_dest, session) for session in os.listdir(source_dest)\n", + "]\n", + "\n", + "gdfs = merge_utils.process_geojson_files(\n", + " session_locations,\n", + " [\"extracted_shorelines_points.geojson\", \"extracted_shorelines.geojson\"],\n", + " merge_utils.convert_lines_to_multipoints,\n", + " merge_utils.read_first_geojson_file,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "merged_shorelines = reduce(merge_utils.merge_and_average, gdfs)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "merged_set = set(merged_shorelines[\"date\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "total_set = set()\n", + "for gdf in gdfs:\n", + " total_set.update(gdf.date)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_set == merged_set" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{Timestamp('2021-12-02 18:46:26'),\n", + " Timestamp('2021-12-03 18:40:18'),\n", + " Timestamp('2021-12-10 18:46:25'),\n", + " Timestamp('2021-12-18 18:46:24'),\n", + " Timestamp('2021-12-26 18:46:22'),\n", + " Timestamp('2022-01-11 18:46:19'),\n", + " Timestamp('2022-01-19 18:46:16'),\n", + " Timestamp('2022-01-27 18:46:21'),\n", + " Timestamp('2022-02-04 18:46:12'),\n", + " Timestamp('2022-02-05 18:40:33'),\n", + " Timestamp('2022-02-12 18:46:17'),\n", + " Timestamp('2022-02-20 18:46:06'),\n", + " Timestamp('2022-02-28 18:46:06'),\n", + " Timestamp('2022-03-08 18:46:04'),\n", + " Timestamp('2022-03-16 18:46:05'),\n", + " Timestamp('2022-03-24 18:45:53'),\n", + " Timestamp('2022-04-01 18:46:01'),\n", + " Timestamp('2022-04-09 18:45:53'),\n", + " Timestamp('2022-04-10 18:40:11'),\n", + " Timestamp('2022-04-17 18:45:53'),\n", + " Timestamp('2022-04-18 18:39:43'),\n", + " Timestamp('2022-04-25 18:45:51'),\n", + " Timestamp('2022-04-26 18:39:39'),\n", + " Timestamp('2022-05-03 18:45:48'),\n", + " Timestamp('2022-05-11 18:45:59'),\n", + " Timestamp('2022-05-12 18:39:31'),\n", + " Timestamp('2022-05-12 18:39:55'),\n", + " Timestamp('2022-05-19 18:45:35'),\n", + " Timestamp('2022-05-27 18:45:59'),\n", + " Timestamp('2022-06-12 18:46:13'),\n", + " Timestamp('2022-06-20 18:45:49'),\n", + " Timestamp('2022-06-28 18:46:19'),\n", + " Timestamp('2022-06-29 18:40:10'),\n", + " Timestamp('2022-07-06 18:45:55'),\n", + " Timestamp('2022-07-14 18:46:17'),\n", + " Timestamp('2022-07-22 18:46:00'),\n", + " Timestamp('2022-07-30 18:46:29'),\n", + " Timestamp('2022-08-07 18:46:13'),\n", + " Timestamp('2022-08-15 18:46:36'),\n", + " Timestamp('2022-08-16 18:40:30'),\n", + " Timestamp('2022-08-23 18:46:18'),\n", + " Timestamp('2022-08-31 18:46:36'),\n", + " Timestamp('2022-09-08 18:46:17'),\n", + " Timestamp('2022-09-16 18:46:43'),\n", + " Timestamp('2022-09-24 18:46:25'),\n", + " Timestamp('2022-10-02 18:46:42'),\n", + " Timestamp('2022-10-10 18:46:28'),\n", + " Timestamp('2022-10-18 18:46:38'),\n", + " Timestamp('2022-10-26 18:46:31'),\n", + " Timestamp('2022-11-03 18:46:43'),\n", + " Timestamp('2022-11-11 18:46:32'),\n", + " Timestamp('2022-11-19 18:46:36'),\n", + " Timestamp('2022-11-27 18:46:30')}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_set " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['2021-12-02T18:46:26.000000000', '2021-12-03T18:40:18.000000000',\n", + " '2021-12-10T18:46:25.000000000', '2021-12-18T18:46:24.000000000',\n", + " '2021-12-26T18:46:22.000000000', '2022-01-11T18:46:19.000000000',\n", + " '2022-01-19T18:46:16.000000000', '2022-01-27T18:46:21.000000000',\n", + " '2022-02-04T18:46:12.000000000', '2022-02-05T18:40:33.000000000',\n", + " '2022-02-12T18:46:17.000000000', '2022-02-20T18:46:06.000000000',\n", + " '2022-02-28T18:46:06.000000000', '2022-03-08T18:46:04.000000000',\n", + " '2022-03-16T18:46:05.000000000', '2022-03-24T18:45:53.000000000',\n", + " '2022-04-01T18:46:01.000000000', '2022-04-09T18:45:53.000000000',\n", + " '2022-04-10T18:40:11.000000000', '2022-04-17T18:45:53.000000000',\n", + " '2022-04-18T18:39:43.000000000', '2022-04-25T18:45:51.000000000',\n", + " '2022-04-26T18:39:39.000000000', '2022-05-03T18:45:48.000000000',\n", + " '2022-05-11T18:45:59.000000000', '2022-05-12T18:39:31.000000000',\n", + " '2022-05-19T18:45:35.000000000', '2022-05-27T18:45:59.000000000',\n", + " '2022-06-12T18:46:13.000000000', '2022-06-20T18:45:49.000000000',\n", + " '2022-06-28T18:46:19.000000000', '2022-06-29T18:40:10.000000000',\n", + " '2022-07-06T18:45:55.000000000', '2022-07-14T18:46:17.000000000',\n", + " '2022-07-22T18:46:00.000000000', '2022-07-30T18:46:29.000000000',\n", + " '2022-08-07T18:46:13.000000000', '2022-08-15T18:46:36.000000000',\n", + " '2022-08-16T18:40:30.000000000', '2022-08-23T18:46:18.000000000',\n", + " '2022-08-31T18:46:36.000000000', '2022-09-08T18:46:17.000000000',\n", + " '2022-09-16T18:46:43.000000000', '2022-09-24T18:46:25.000000000',\n", + " '2022-10-02T18:46:42.000000000', '2022-10-10T18:46:28.000000000',\n", + " '2022-10-18T18:46:38.000000000', '2022-10-26T18:46:31.000000000',\n", + " '2022-11-03T18:46:43.000000000', '2022-11-11T18:46:32.000000000',\n", + " '2022-11-19T18:46:36.000000000', '2022-11-27T18:46:30.000000000'],\n", + " dtype='datetime64[ns]')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array(gdfs[0].date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From b87588461908ff6a35dbd4cdba4978de73f0b65b Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 09:27:15 -0800 Subject: [PATCH 61/87] #179 fix fixture + remove unused test --- tests/conftest.py | 8 +++++--- tests/test_merge_utils.py | 27 --------------------------- 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9ca597d5..cfecd992 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ script_dir = os.path.dirname(os.path.abspath(__file__)) + @pytest.fixture(scope="session") def config_json_no_sitename_dir(): # create a temporary directory that will represent the downloaded ROI directory @@ -78,7 +79,7 @@ def config_json_no_sitename_dir(): tmpfile_path = tmpfile.name # Save the filepath # Yield the filepath to the test - yield tmpfile_path,temp_dir + yield tmpfile_path, temp_dir # Cleanup - delete the file after tests are done os.remove(tmpfile_path) @@ -148,7 +149,7 @@ def config_json(): tmpfile_path = tmpfile.name # Save the filepath # Yield the filepath to the test - yield tmpfile_path,temp_dir + yield tmpfile_path, temp_dir # Cleanup - delete the file after tests are done os.remove(tmpfile_path) @@ -207,7 +208,8 @@ def temp_src_files(): # Cleanup for f in files: - os.remove(f) + if os.path.exists(f): + os.remove(f) # @pytest.fixture(scope="session") diff --git a/tests/test_merge_utils.py b/tests/test_merge_utils.py index c096f24f..fe254d33 100644 --- a/tests/test_merge_utils.py +++ b/tests/test_merge_utils.py @@ -686,33 +686,6 @@ def test_merge_and_average_1_gdf(extracted_gdf1): assert new_result["geometry"].equals(extracted_gdf1["geometry"]) -def test_aggregate_gdf(): - # Create a sample GeoDataFrame - data = { - "field1": [1, 1, 2, 2, 3], - "field2": ["A", "A", "B", "B", "C"], - "field3": [10, 20, 30, 40, 50], - } - gdf = gpd.GeoDataFrame(data) - - # Define the group fields - group_fields = ["field1", "field2"] - - # Call the aggregate_gdf function - result = merge_utils.aggregate_gdf(gdf, group_fields) - - # Define the expected result - expected_data = { - "field1": [1, 2, 3], - "field2": ["A", "B", "C"], - "field3": ["10, 20", "30, 40", "50"], - } - expected_result = gpd.GeoDataFrame(expected_data) - - # Check if the resulting GeoDataFrame is equal to the expected GeoDataFrame - assert result.equals(expected_result) - - @pytest.fixture def merged_config_no_nulls_no_index_right(): data = { From 37578afb78e8d26e39f0040b6a1e905768447057 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 09:29:46 -0800 Subject: [PATCH 62/87] v1.1.21 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2d377841..5bd7eac2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.20" +version = "1.1.21" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From dfe34e899946c955f40331c9a2bf7cce28b64551 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 16:18:50 -0800 Subject: [PATCH 63/87] update extract_feature_from_geodataframe --- src/coastseg/common.py | 3 -- src/coastseg/extracted_shoreline.py | 1 - src/coastseg/geodata_processing.py | 66 ++++++++++++++++++++++------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 2686ec88..ddc56da5 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -406,9 +406,6 @@ def validate_geometry_types( wrong_geom_type=geom_type, help_msg=help_message, ) - # raise ValueError( - # f"The {feature_type} contained a geometry of type '{geom_type}' which is not in the list of valid types: {valid_types}" - # ) def get_roi_polygon( diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 2b478ddc..85732f2e 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1979,7 +1979,6 @@ def get_reference_shoreline( """ # project shorelines's espg from map's espg to output espg given in settings reprojected_shorlines = shoreline_gdf.to_crs(output_crs) - logger.info(f"reprojected_shorlines: {reprojected_shorlines}") # convert shoreline_in_roi gdf to coastsat compatible format np.array([[lat,lon,0],[lat,lon,0]...]) shorelines = make_coastsat_compatible(reprojected_shorlines) # shorelines = [([lat,lon],[lat,lon],[lat,lon]),([lat,lon],[lat,lon],[lat,lon])...] diff --git a/src/coastseg/geodata_processing.py b/src/coastseg/geodata_processing.py index 546528e0..43551c1a 100644 --- a/src/coastseg/geodata_processing.py +++ b/src/coastseg/geodata_processing.py @@ -165,35 +165,71 @@ def extract_feature_from_geodataframe( gdf: gpd.GeoDataFrame, feature_type: str, type_column: str = "type" ) -> gpd.GeoDataFrame: """ - Extracts a GeoDataFrame of features of a given type and specified columns from a larger GeoDataFrame. + Extracts a GeoDataFrame of features of a given type from a larger GeoDataFrame. Args: gdf (gpd.GeoDataFrame): The GeoDataFrame containing the features to extract. - feature_type (str): The type of feature to extract. Typically one of the following 'shoreline','rois','transects','bbox' + feature_type (str): The type of feature to extract. Typically one of the following 'shoreline', 'rois', 'transects', 'bbox'. type_column (str, optional): The name of the column containing feature types. Defaults to 'type'. Returns: - gpd.GeoDataFrame: A new GeoDataFrame containing only the features of the specified type and columns. + gpd.GeoDataFrame: A new GeoDataFrame containing only the features of the specified type. Raises: - ValueError: Raised when feature_type or any of the columns specified do not exist in the GeoDataFrame. + ValueError: Raised when feature_type or the type_column do not exist in the GeoDataFrame. """ + # Convert column names to lower case for case-insensitive matching + gdf.columns = gdf.columns.str.lower() + type_column = type_column.lower() + # Check if type_column exists in the GeoDataFrame if type_column not in gdf.columns: raise ValueError( f"Column '{type_column}' does not exist in the GeoDataFrame. Incorrect config_gdf.geojson loaded" ) - # Check if feature_type ends with 's' and define alternative feature_type - if feature_type.endswith("s"): - alt_feature_type = feature_type[:-1] - else: - alt_feature_type = feature_type + "s" + # Handling pluralization of feature_type + feature_types = {feature_type.lower(), (feature_type + 's').lower(), (feature_type.rstrip('s')).lower()} + + # Filter the GeoDataFrame for the specified types + filtered_gdf = gdf[gdf[type_column].str.lower().isin(feature_types)] + + return filtered_gdf + + +# def extract_feature_from_geodataframe( +# gdf: gpd.GeoDataFrame, feature_type: str, type_column: str = "type" +# ) -> gpd.GeoDataFrame: +# """ +# Extracts a GeoDataFrame of features of a given type and specified columns from a larger GeoDataFrame. + +# Args: +# gdf (gpd.GeoDataFrame): The GeoDataFrame containing the features to extract. +# feature_type (str): The type of feature to extract. Typically one of the following 'shoreline','rois','transects','bbox' +# type_column (str, optional): The name of the column containing feature types. Defaults to 'type'. + +# Returns: +# gpd.GeoDataFrame: A new GeoDataFrame containing only the features of the specified type and columns. + +# Raises: +# ValueError: Raised when feature_type or any of the columns specified do not exist in the GeoDataFrame. +# """ +# # Check if type_column exists in the GeoDataFrame +# if type_column not in gdf.columns: +# raise ValueError( +# f"Column '{type_column}' does not exist in the GeoDataFrame. Incorrect config_gdf.geojson loaded" +# ) + +# # Check if feature_type ends with 's' and define alternative feature_type +# if feature_type.endswith("s"): +# alt_feature_type = feature_type[:-1] +# else: +# alt_feature_type = feature_type + "s" - # Filter using both feature_types - main_feature_gdf = gdf[gdf[type_column] == feature_type] - alt_feature_gdf = gdf[gdf[type_column] == alt_feature_type] +# # Filter using both feature_types +# main_feature_gdf = gdf[gdf[type_column] == feature_type] +# alt_feature_gdf = gdf[gdf[type_column] == alt_feature_type] - # Combine both GeoDataFrames - combined_gdf = pd.concat([main_feature_gdf, alt_feature_gdf]) - return combined_gdf +# # Combine both GeoDataFrames +# combined_gdf = pd.concat([main_feature_gdf, alt_feature_gdf]) +# return combined_gdf From 5a416bcfa62ad86be0525d661a3d92d77db04085 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 16:19:05 -0800 Subject: [PATCH 64/87] update load_intersecting_transects --- src/coastseg/transects.py | 60 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/src/coastseg/transects.py b/src/coastseg/transects.py index b7e98da1..ceb0fd20 100644 --- a/src/coastseg/transects.py +++ b/src/coastseg/transects.py @@ -24,6 +24,7 @@ def load_intersecting_transects( transect_files: List[str], transect_dir: str, columns_to_keep: set = set(["id", "geometry", "slope"]), + **kwargs, ) -> gpd.GeoDataFrame: """ Loads transects from a list of GeoJSON files in the transect directory, selects the transects that intersect with @@ -33,37 +34,59 @@ def load_intersecting_transects( rectangle (gpd.GeoDataFrame): A GeoDataFrame defining the rectangle to select transects within. transect_files (List[str]): A list of filenames of the GeoJSON transect files to load. transect_dir (str): The directory where the GeoJSON transect files are located. + columns_to_keep (set, optional): A set of column names to keep in the resulting GeoDataFrame. Defaults to set(["id", "geometry", "slope"]). + **kwargs: Additional keyword arguments. + + Keyword Args: + crs (str, optional): The coordinate reference system (CRS) to use. Defaults to "EPSG:4326". Returns: gpd.GeoDataFrame: A new GeoDataFrame with the selected columns ('id', 'geometry', 'slope') containing the transects that intersect with the rectangle. """ - # Create an empty GeoDataFrame to hold the selected transects - selected_transects = gpd.GeoDataFrame(columns=list(columns_to_keep)) + crs = kwargs.get("crs", "EPSG:4326") - # Get the bounding box of the rectangle + # Create an empty GeoDataFrame to hold the selected transects + selected_transects = gpd.GeoDataFrame(columns=list(columns_to_keep), crs=crs) + + # Get the bounding box of the rectangle in the same CRS as the transects + if hasattr(rectangle, "crs") and rectangle.crs: + rectangle = rectangle.copy().to_crs(crs) + else: + rectangle = rectangle.copy().set_crs(crs) + # get the bounding box of the rectangle bbox = rectangle.bounds.iloc[0].tolist() + # Create a list to store the GeoDataFrames + gdf_list = [] + # Iterate over each transect file and select the transects that intersect with the rectangle for transect_file in transect_files: transects_name = os.path.splitext(transect_file)[0] transect_path = os.path.join(transect_dir, transect_file) transects = gpd.read_file(transect_path, bbox=bbox) - if transects.empty: - logger.info("Skipping %s", transects_name) - continue - elif not transects.empty: + # drop any columns that are not in columns_to_keep + columns_to_keep = set(col.lower() for col in columns_to_keep) + transects = transects[ + [col for col in transects.columns if col.lower() in columns_to_keep] + ] + # if the transects are not empty then add them to the list + if not transects.empty: logger.info("Adding transects from %s", transects_name) - transects = preprocess_geodataframe( - transects, columns_to_keep=list(columns_to_keep), create_ids=False - ) - # Append the selected transects to the output GeoDataFrame - selected_transects = pd.concat( - [selected_transects, transects], ignore_index=True - ) - selected_transects = preprocess_geodataframe( - selected_transects, columns_to_keep=list(columns_to_keep), create_ids=True - ) + gdf_list.append(transects) + + # Concatenate all the GeoDataFrames in the list into one GeoDataFrame + if gdf_list: + selected_transects = pd.concat(gdf_list, ignore_index=True) + + if not selected_transects.empty: + selected_transects = preprocess_geodataframe( + selected_transects, + columns_to_keep=list(columns_to_keep), + create_ids=True, + output_crs=crs, + ) + # ensure that the transects are either LineStrings or MultiLineStrings validate_geometry_types( selected_transects, set(["LineString", "MultiLineString"]), @@ -101,6 +124,7 @@ class Transects: # nearest_x: x-coordinate of the nearest slope location to the transect # nearest_y: y-coordinate of the nearest slope location to the transect + def __init__( self, bbox: gpd.GeoDataFrame = None, @@ -156,8 +180,6 @@ def initialize_transects_with_transects(self, transects: gpd.GeoDataFrame): ) # if not all the ids in transects are unique then create unique ids transects = create_unique_ids(transects, prefix_length=3) - # @todo add the transects to the current dataframe - # @todo make sure none of the ids already exist in the dataframe. this can be a flag to turn an exception on/off self.gdf = transects def initialize_transects_with_bbox(self, bbox: gpd.GeoDataFrame): From e7a3e56cc8cdcbc896e88895d813a6109ab7eec8 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 16:19:27 -0800 Subject: [PATCH 65/87] update merge_geometries in merge_utils --- src/coastseg/merge_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coastseg/merge_utils.py b/src/coastseg/merge_utils.py index 6b189610..e7b12487 100644 --- a/src/coastseg/merge_utils.py +++ b/src/coastseg/merge_utils.py @@ -322,9 +322,13 @@ def merge_geometries(merged_gdf, columns=None, operation=unary_union): else: columns = [col for col in columns if col in merged_gdf.columns] - merged_gdf["geometry"] = merged_gdf[columns].apply( - lambda row: operation(row.tolist()), axis=1 + # set the geometry of th merged_gdf to the result of the operation + # if no operation is provided unary_union is used to combine the geometries for the provided columns + merged_gdf.set_geometry( + merged_gdf[columns].apply(lambda row: operation(row.tolist()), axis=1), + inplace=True, ) + for col in columns: if col in merged_gdf.columns and col != "geometry": merged_gdf = merged_gdf.drop(columns=col) From 5f02ad5278149839fa3ff05f72d3aff08a1f9998 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 7 Dec 2023 16:20:12 -0800 Subject: [PATCH 66/87] remove old code from shoreline.py --- src/coastseg/shoreline.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coastseg/shoreline.py b/src/coastseg/shoreline.py index bf992713..47d05f3f 100644 --- a/src/coastseg/shoreline.py +++ b/src/coastseg/shoreline.py @@ -11,7 +11,6 @@ preprocess_geodataframe, create_unique_ids, ) - from coastseg.common import validate_geometry_types # External dependencies imports @@ -130,6 +129,7 @@ def initialize_shorelines_with_shorelines(self, shorelines: gpd.GeoDataFrame): "TIDAL_RANGE", "MEAN_SIG_WAVEHEIGHT", ] + if not shorelines.crs: logger.warning( f"shorelines did not have a crs converting to crs 4326 \n {shorelines}" @@ -146,8 +146,6 @@ def initialize_shorelines_with_shorelines(self, shorelines: gpd.GeoDataFrame): ) # make sure all the ids are unique with 3 random chars in front of id number shorelines = self.create_ids_service(shorelines, 3) - # @todo add the shorelines to the current dataframe - # @todo make sure none of the ids already exist in the dataframe. this can be a flag to turn an exception on/off self.gdf = shorelines def initialize_shorelines_with_bbox(self, bbox: gpd.GeoDataFrame): From 174438babc5cb8a098c107982f2e1211038a5672 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 11 Dec 2023 10:36:27 -0800 Subject: [PATCH 67/87] no print ref shoreline to log --- src/coastseg/extracted_shoreline.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 85732f2e..1e08ab3f 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -573,7 +573,23 @@ def process_satellite_image( do_cloud_mask=apply_cloud_mask, ) - logger.info(f"process_satellite_image_settings: {settings}") + # Log all items except 'reference shoreline' and handle 'reference shoreline' separately + logger.info( + "process_satellite_image_settings : " + + ", ".join( + f"{key}: {value}" + for key, value in settings.items() + if key != "reference shoreline" + ) + ) + + # Check and log 'reference shoreline' if it exists + ref_sl = settings.get("reference shoreline", np.array([])) + if isinstance(ref_sl, np.ndarray): + logger.info(f"reference shoreline.shape: {ref_sl.shape}") + logger.info( + f"Number of 'reference_shoreline': {len(settings.get('reference_shoreline', np.array([])))}" + ) # if percentage of no data pixels are greater than allowed, skip percent_no_data_allowed = settings.get("percent_no_data", None) From 9ed1419719188e59de863765c9d0ffb843ccb4de Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Mon, 11 Dec 2023 17:59:12 -0800 Subject: [PATCH 68/87] #207 improve logging zoo workflow --- src/coastseg/coastseg_map.py | 11 +- src/coastseg/common.py | 49 +++---- src/coastseg/extracted_shoreline.py | 212 ++++++++++++++++++---------- src/coastseg/file_utilities.py | 3 +- src/coastseg/filters.py | 6 +- src/coastseg/models_UI.py | 37 +++-- src/coastseg/zoo_model.py | 55 ++++---- 7 files changed, 217 insertions(+), 156 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 3ed13c85..2b5bb753 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -17,8 +17,6 @@ from ipywidgets import Layout, HTML, HBox from tqdm.auto import tqdm import traitlets -from shapely.geometry import Polygon -from pandas import to_datetime # Internal/Local imports: specific classes/functions from coastseg.bbox import Bounding_Box @@ -94,6 +92,7 @@ def _on_geo_data_changed(self, change): self.satname = properties.get("satname", "") self.date = properties.get("date", "") + class CoastSeg_Map: def __init__(self): # Basic settings and configurations @@ -1535,10 +1534,11 @@ def save_session(self, roi_ids: list[str], save_transects: bool = True): shoreline_settings = extracted_shoreline.shoreline_settings common.save_extracted_shoreline_figures(shoreline_settings, session_path) # move extracted shoreline reports to session directory - common.move_report_files(shoreline_settings, session_path,'extract_shorelines*.txt') + common.move_report_files( + shoreline_settings, session_path, "extract_shorelines*.txt" + ) # save the geojson and json files for extracted shorelines common.save_extracted_shorelines(extracted_shoreline, session_path) - # save transects to session folder if save_transects: @@ -1654,11 +1654,8 @@ def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: # save source data self.save_config(session_path) # save to csv file session path - fn = os.path.join(session_path, "%s_timeseries_raw.csv" % key) - logger.info(f"Save time series to {fn}") if os.path.exists(fn): - logger.info(f"Overwriting:{fn}") os.remove(fn) df.to_csv(fn, sep=",") logger.info( diff --git a/src/coastseg/common.py b/src/coastseg/common.py index ddc56da5..d5286616 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -459,8 +459,6 @@ def get_cert_path_from_config(config_file="certifications.json"): # Get the cert path cert_path = config.get("cert_path") - logger.info(f"certifications.json cert_path: {cert_path}") - # If the cert path is a valid file, return it if cert_path and os.path.isfile(cert_path): logger.info(f"certifications.json cert_path isfile: {cert_path}") @@ -793,7 +791,6 @@ def get_downloaded_models_dir() -> str: ) if not os.path.exists(downloaded_models_path): os.mkdir(downloaded_models_path) - logger.info(f"downloaded_models_path: {downloaded_models_path}") return downloaded_models_path @@ -1131,7 +1128,6 @@ def create_hover_box(title: str, feature_html: HTML = HTML("")) -> VBox: container = VBox([container_header]) def uncollapse_click(change: dict): - logger.info(change) if feature_html.value == "": container_content.children = [msg] elif feature_html.value != "": @@ -1140,7 +1136,6 @@ def uncollapse_click(change: dict): container.children = [container_header, container_content] def collapse_click(change: dict): - logger.info(change) container_header.children = [title, uncollapse_button] container.children = [container_header] @@ -1249,35 +1244,38 @@ def download_url(url: str, save_path: str, filename: str = None, chunk_size: int with response as r: logger.info(r) if r.status_code == 404: - logger.error(f"Error {r.status_code}. DownloadError: {save_path}") + logger.error(f"Error {r.status_code}. DownloadError: {save_path} {r}") raise exceptions.DownloadError(os.path.basename(save_path)) if r.status_code == 429: - logger.error(f"Error {r.status_code}.DownloadError: {save_path}") + logger.error(f"Error {r.status_code}.DownloadError: {save_path} {r}") raise Exception( "Zenodo has denied the request. You may have requested too many files at once." ) if r.status_code != 200: - logger.error(f"Error {r.status_code}. DownloadError: {save_path}") + logger.error(f"Error {r.status_code}. DownloadError: {save_path} {r}") raise exceptions.DownloadError(os.path.basename(save_path)) # check header to get content length, in bytes content_length = r.headers.get("Content-Length") if content_length: total_length = int(content_length) - with open(save_path, "wb") as fd: - with tqdm( - total=total_length, - unit="B", - unit_scale=True, - unit_divisor=1024, - desc=f"Downloading {filename}", - initial=0, - ascii=True, - ) as pbar: - for chunk in r.iter_content(chunk_size=chunk_size): - fd.write(chunk) - pbar.update(len(chunk)) else: + # Content-Length not available logger.warning("Content length not found in response headers") + total_length = None + + with open(save_path, "wb") as fd: + with tqdm( + total=total_length, + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=f"Downloading {filename}", + initial=0, + ascii=True, + ) as pbar: + for chunk in r.iter_content(chunk_size=chunk_size): + fd.write(chunk) + pbar.update(len(chunk)) def get_center_point(coords: list) -> tuple: @@ -1639,29 +1637,22 @@ def create_csv_per_transect( out_dict = dict([]) # copy shoreline intersects for each transect out_dict[key] = cross_distance_transects[key] - logger.info( - f"out dict roi_ids columns : {[roi_id for _ in range(len(extracted_shorelines_dict['dates']))]}" - ) out_dict["roi_id"] = [ roi_id for _ in range(len(extracted_shorelines_dict["dates"])) ] out_dict["dates"] = extracted_shorelines_dict["dates"] out_dict["satname"] = extracted_shorelines_dict["satname"] - logger.info(f"out_dict : {out_dict}") + # logger.info(f"out_dict : {out_dict}") df = pd.DataFrame(out_dict) df.index = df["dates"] df.pop("dates") # save to csv file session path csv_filename = f"{key}{filename}" fn = os.path.join(save_path, csv_filename) - logger.info(f"Save time series to {fn}") if os.path.exists(fn): logger.info(f"Overwriting:{fn}") os.remove(fn) df.to_csv(fn, sep=",") - logger.info( - f"ROI: {roi_id}Time-series of the shoreline change along the transects saved as:{fn}" - ) def move_report_files( diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 1e08ab3f..577f0fca 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -81,7 +81,7 @@ def wrapper(*args, **kwargs): result = func(*args, **kwargs) end = perf_counter() print(f"{func.__name__} took {end - start:.6f} seconds to run.") - logger.debug(f"{func.__name__} took {end - start:.6f} seconds to run.") + # logger.debug(f"{func.__name__} took {end - start:.6f} seconds to run.") return result return wrapper @@ -237,9 +237,9 @@ def compute_transects_from_roi( Not tidally corrected. """ # create dict of numpy arrays of transect start and end points - logger.info(f"transects.crs: {transects_gdf.crs} transects: {transects_gdf}") + # logger.info(f"transects.crs: {transects_gdf.crs} transects: {transects_gdf}") transects = common.get_transect_points_dict(transects_gdf) - logger.info(f"transects as dictionary for coastsat: {transects}") + # logger.info(f"transects as dictionary for coastsat: {transects}") # print(f'settings to extract transects: {settings}') # cross_distance: along-shore distance over which to consider shoreline points to compute median intersection (robust to outliers) cross_distance = compute_intersection_QC(extracted_shorelines, transects, settings) @@ -429,7 +429,6 @@ def process_satellite( espg_list.append(image_epsg) geoaccuracy_list.append(metadata[satname]["acc_georef"][index]) timestamps.append(metadata[satname]["dates"][index]) - logger.info(f"settings[apply_cloud_mask]: {settings['apply_cloud_mask']}") tasks.append( dask.delayed(process_satellite_image)( filenames[index], @@ -572,25 +571,6 @@ def process_satellite_image( collection, do_cloud_mask=apply_cloud_mask, ) - - # Log all items except 'reference shoreline' and handle 'reference shoreline' separately - logger.info( - "process_satellite_image_settings : " - + ", ".join( - f"{key}: {value}" - for key, value in settings.items() - if key != "reference shoreline" - ) - ) - - # Check and log 'reference shoreline' if it exists - ref_sl = settings.get("reference shoreline", np.array([])) - if isinstance(ref_sl, np.ndarray): - logger.info(f"reference shoreline.shape: {ref_sl.shape}") - logger.info( - f"Number of 'reference_shoreline': {len(settings.get('reference_shoreline', np.array([])))}" - ) - # if percentage of no data pixels are greater than allowed, skip percent_no_data_allowed = settings.get("percent_no_data", None) if not check_percent_no_data_allowed( @@ -618,7 +598,7 @@ def process_satellite_image( npz_file = find_matching_npz(filename, os.path.join(session_path, "good")) if npz_file is None: npz_file = find_matching_npz(filename, session_path) - logger.info(f"npz_file: {npz_file}") + # logger.info(f"npz_file: {npz_file}") if npz_file is None: logger.warning(f"npz file not found for {filename}") return None @@ -630,11 +610,12 @@ def process_satellite_image( min_beach_area = settings["min_beach_area"] land_mask = remove_small_objects_and_binarize(land_mask, min_beach_area) - if sum(land_mask[ref_shoreline_buffer]) < 50: - logger.warning( - f"{fn} Not enough sand pixels within the beach buffer to detect shoreline" - ) - return None + # I think we remove this since we assume our models are better + # if sum(land_mask[ref_shoreline_buffer]) < 50: + # logger.warning( + # f"{fn} Not enough sand pixels within the beach buffer to detect shoreline" + # ) + # return None # get the shoreline from the image shoreline = find_shoreline( @@ -671,7 +652,9 @@ def process_satellite_image( "shorelines": shoreline, "cloud_cover": cloud_cover, } - logger.info(f"output shorelines and cloud cover: {output}") + logger.info( + f"output shorelines {len(output['shorelines'])} and cloud cover: {output['cloud_cover']}" + ) return output @@ -685,9 +668,9 @@ def get_model_card_classes(model_card_path: str) -> dict: dict: dictionary of classes in model card and their corresponding index """ model_card_data = file_utilities.read_json_file(model_card_path, raise_error=True) - logger.info( - f"model_card_path: {model_card_path} \nmodel_card_data: {model_card_data}" - ) + # logger.info( + # f"model_card_path: {model_card_path} \nmodel_card_data: {model_card_data}" + # ) # read the classes the model was trained with from either the dictionary under key "DATASET" or "DATASET1" model_card_dataset = common.get_value_by_key_pattern( model_card_data, patterns=("DATASET", "DATASET1") @@ -1106,23 +1089,23 @@ def shoreline_detection_figures( filepath_data = settings["inputs"]["filepath"] filepath = os.path.join(filepath_data, sitename, "jpg_files", "detection") os.makedirs(filepath, exist_ok=True) - logger.info(f"shoreline_detection_figures filepath: {filepath}") + # logger.info(f"shoreline_detection_figures filepath: {filepath}") # increase the intensity of the image for visualization im_RGB = increase_image_intensity(im_ms, cloud_mask, prob_high=99.9) - logger.info( - f"im_RGB.shape: {im_RGB.shape}\n im_RGB.dtype: {im_RGB.dtype}\n im_RGB: {np.unique(im_RGB)[:5]}\n" - ) + # logger.info( + # f"im_RGB.shape: {im_RGB.shape}\n im_RGB.dtype: {im_RGB.dtype}\n im_RGB: {np.unique(im_RGB)[:5]}\n" + # ) im_merged = create_overlay(im_RGB, merged_labels, overlay_opacity=0.35) im_all = create_overlay(im_RGB, all_labels, overlay_opacity=0.35) - logger.info( - f"im_merged.shape: {im_merged.shape}\n im_merged.dtype: {im_merged.dtype}\n im_merged.max: {im_merged.max()}\n im_merged.min: {im_merged.min()}\n" - ) - logger.info( - f"im_all.shape: {im_all.shape}\n im_all.dtype: {im_all.dtype}\n im_all: {np.unique(im_all)[:5]}\n" - ) + # logger.info( + # f"im_merged.shape: {im_merged.shape}\n im_merged.dtype: {im_merged.dtype}\n im_merged.max: {im_merged.max()}\n im_merged.min: {im_merged.min()}\n" + # ) + # logger.info( + # f"im_all.shape: {im_all.shape}\n im_all.dtype: {im_all.dtype}\n im_all: {np.unique(im_all)[:5]}\n" + # ) # Mask clouds in the images im_RGB, im_merged, im_all = mask_clouds_in_images( @@ -1309,7 +1292,25 @@ def extract_shorelines_with_dask( filtered_files = get_filtered_files_dict(good_folder, "npz", sitename) # keep only the metadata for the files that were sorted as 'good' metadata = edit_metadata(metadata, filtered_files) - logger.info(f"edit_metadata metadata: {metadata}") + for key in metadata.keys(): + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('epsg',[]))} of epsg: {np.unique(metadata.get('epsg',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('dates',[]))} of dates: {np.unique(metadata.get('dates',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('filenames',[]))} of filenames: {np.unique(metadata.get('filenames',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata.get('im_dimensions',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('acc_georef',[]))} of acc_georef: {np.unique(metadata.get('acc_georef',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_quality',[]))} of im_quality: {np.unique(metadata.get('im_quality',[]))}" + ) result_dict = {} for satname in metadata.keys(): @@ -1324,20 +1325,32 @@ def extract_shorelines_with_dask( batch_size=10, **kwargs, ) - - logger.info(f"satellite_dict : {satellite_dict}") - logger.info(f"before result_dict : {result_dict}") if not satellite_dict: result_dict[satname] = {} elif not satname in satellite_dict.keys(): result_dict[satname] = {} else: result_dict[satname] = satellite_dict[satname] - logger.info(f"result_dict.keys() : {result_dict.keys()}") - logger.info(f" after result_dict : {result_dict}") - # combine the extracted shorelines for each satellite - logger.info(f"Combining extracted shorelines for each satellite : {result_dict}") + for key in result_dict.keys(): + logger.info( + f"result_dict['{key}'] length {len(result_dict[key].get('dates',[]))} of dates: {np.unique(result_dict[key].get('dates',[]))}" + ) + logger.info( + f"result_dict['{key}'] length {len(result_dict[key].get('geoaccuracy',[]))} of geoaccuracy: {np.unique(result_dict[key].get('geoaccuracy',[]))}" + ) + logger.info( + f"result_dict['{key}'] length {len(result_dict[key].get('cloud_cover',[]))} of cloud_cover: {np.unique(result_dict[key].get('cloud_cover',[]))}" + ) + logger.info( + f"result_dict['{key}'] length {len(result_dict[key].get('filename',[]))} of filename: {np.unique(result_dict[key].get('filename',[]))}" + ) + # Check and log 'reference shoreline' if it exists + ref_sl = result_dict[key].get("shorelines", np.array([])) + if isinstance(ref_sl, np.ndarray): + logger.info(f"shorelines.shape: {ref_sl.shape}") + logger.info(f"Number of 'shorelines': {len(ref_sl)}") + # combine the extracted shorelines for each satellite extracted_shorelines_data = combine_satellite_data(result_dict) return extracted_shorelines_data @@ -1366,7 +1379,7 @@ def get_sorted_model_outputs_directory( files = file_utilities.find_files_recursively( session_path, f".*{re.escape(satname)}.*\\.npz$", raise_error=False ) - logger.info(f"fetched files {files} for satellite {satname}") + # logger.info(f"fetched files {files} for satellite {satname}") except Exception as e: logger.error(f"Error finding files for satellite {satname}: {e}") continue @@ -1492,10 +1505,23 @@ def __init__( self.shoreline_settings = {} def __str__(self): - return f"Extracted Shoreline: ROI ID: {self.roi_id}\n geodataframe {self.gdf.head(5)}\nshoreline_settings{self.shoreline_settings}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf: \nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}\n\n{crs_info}" def __repr__(self): - return f"Extracted Shoreline: ROI ID: {self.roi_id}\n geodataframe {self.gdf.head(5)}\nshoreline_settings{self.shoreline_settings}\ndictionary{self.dictionary}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf: \nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}\n\n{crs_info}" def get_roi_id(self) -> Optional[str]: """ @@ -1645,7 +1671,7 @@ def create_extracted_shorelines_from_session( # validate input parameters are not empty and are of the correct type self._validate_input_params(roi_id, shoreline, roi_settings, settings) - logger.info(f"Extracting shorelines for ROI id{roi_id}") + logger.info(f"Extracting shorelines for ROI id: {roi_id}") # read model settings from session path model_settings_path = os.path.join(session_path, "model_settings.json") @@ -1682,7 +1708,22 @@ def create_extracted_shorelines_from_session( self.shoreline_settings = self.create_shoreline_settings( settings, roi_settings, reference_shoreline ) - logger.info(f"self.shoreline_settings: {self.shoreline_settings}") + # Log all items except 'reference shoreline' and handle 'reference shoreline' separately + logger.info( + "self.shoreline_settings : " + + ", ".join( + f"{key}: {value}" + for key, value in settings.items() + if key != "reference shoreline" + ) + ) + # Check and log 'reference shoreline' if it exists + ref_sl = self.shoreline_settings.get("reference shoreline", np.array([])) + if isinstance(ref_sl, np.ndarray): + logger.info(f"reference shoreline.shape: {ref_sl.shape}") + logger.info( + f"Number of 'reference_shoreline': {len(settings.get('reference_shoreline', np.array([])))}" + ) # gets metadata used to extract shorelines metadata = get_metadata(self.shoreline_settings["inputs"]) sitename = self.shoreline_settings["inputs"]["sitename"] @@ -1708,8 +1749,29 @@ def create_extracted_shorelines_from_session( ) if extracted_shorelines_dict == {}: raise Exception(f"Failed to extract any shorelines.") + from itertools import islice + + logger.info( + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('dates',[]))} of dates[:3]: {list(islice(extracted_shorelines_dict.get('dates',[]),3))}" + ) + logger.info( + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('satname',[]))} of satname: {np.unique(extracted_shorelines_dict.get('satname',[]))}" + ) + logger.info( + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('geoaccuracy',[]))} of geoaccuracy: {np.unique(extracted_shorelines_dict.get('geoaccuracy',[]))}" + ) + logger.info( + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('cloud_cover',[]))} of cloud_cover: {np.unique(extracted_shorelines_dict.get('cloud_cover',[]))}" + ) + logger.info( + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('filename',[]))} of filename[:3]: {list(islice(extracted_shorelines_dict.get('filename',[]),3))}" + ) + # Check and log 'reference shoreline' if it exists + ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) + if isinstance(ref_sl, np.ndarray): + logger.info(f"shorelines.shape: {ref_sl.shape}") + logger.info(f"Number of 'shorelines': {len(ref_sl)}") - logger.info(f"extracted_shoreline_dict: {extracted_shorelines_dict}") # postprocessing by removing duplicates and removing in inaccurate georeferencing (set threshold to 10 m) extracted_shorelines_dict = remove_duplicates( extracted_shorelines_dict @@ -1717,9 +1779,6 @@ def create_extracted_shorelines_from_session( extracted_shorelines_dict = remove_inaccurate_georef( extracted_shorelines_dict, 10 ) # remove inaccurate georeferencing (set threshold to 10 m) - logger.info( - f"after remove_inaccurate_georef : extracted_shoreline_dict: {extracted_shorelines_dict}" - ) self.dictionary = extracted_shorelines_dict if is_list_empty(self.dictionary["shorelines"]): @@ -1797,15 +1856,31 @@ def extract_shorelines( # filter out files that were removed from RGB directory try: - logger.info(f"metadata before filter : {metadata}") + # logger.info(f"metadata before filter : {metadata}") metadata = common.filter_metadata(metadata, sitename, filepath_data) - logger.info(f"metadata after filter : {metadata}") except FileNotFoundError as e: logger.warning(f"No RGB files existed so no metadata.") return {} - logger.info(f"new metadata: {metadata}") - logger.info(f"self.shoreline_settings: {self.shoreline_settings}") + for key in metadata.keys(): + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('epsg',[]))} of epsg: {np.unique(metadata.get('epsg',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('dates',[]))} of dates: {np.unique(metadata.get('dates',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('filenames',[]))} of filenames: {np.unique(metadata.get('filenames',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata.get('im_dimensions',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('acc_georef',[]))} of acc_georef: {np.unique(metadata.get('acc_georef',[]))}" + ) + logger.info( + f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_quality',[]))} of im_quality: {np.unique(metadata.get('im_quality',[]))}" + ) # extract shorelines from ROI if session_path is None: # extract shorelines with coastsat's models @@ -1827,9 +1902,6 @@ def extract_shorelines( extracted_shorelines = remove_inaccurate_georef( extracted_shorelines, 10 ) # remove inaccurate georeferencing (set threshold to 10 m) - logger.info( - f"after remove_inaccurate_georef : extracted_shoreline_dict: {extracted_shorelines}" - ) return extracted_shorelines def create_shoreline_settings( @@ -1906,14 +1978,8 @@ def create_geodataframe( extract_shoreline_gdf = output_to_gdf(self.dictionary, geomtype) if not extract_shoreline_gdf.crs: extract_shoreline_gdf.set_crs(input_crs, inplace=True) - logger.info( - f"extract_shoreline_gdf inital crs {extract_shoreline_gdf.crs} extract_shoreline_gdf {extract_shoreline_gdf}" - ) if output_crs is not None: extract_shoreline_gdf = extract_shoreline_gdf.to_crs(output_crs) - logger.info( - f"extract_shoreline_gdf final crs {extract_shoreline_gdf.crs} extract_shoreline_gdf {extract_shoreline_gdf}" - ) return extract_shoreline_gdf def to_file( diff --git a/src/coastseg/file_utilities.py b/src/coastseg/file_utilities.py index b06e31fd..a58aba2d 100644 --- a/src/coastseg/file_utilities.py +++ b/src/coastseg/file_utilities.py @@ -334,13 +334,12 @@ def config_to_file(config: Union[dict, gpd.GeoDataFrame], filepath: str): filename = f"config.json" save_path = os.path.abspath(os.path.join(filepath, filename)) write_to_json(save_path, config) - logger.info(f"Saved config json: {filename} \nSaved to {save_path}") elif isinstance(config, gpd.GeoDataFrame): filename = f"config_gdf.geojson" save_path = os.path.abspath(os.path.join(filepath, filename)) - logger.info(f"Saving config gdf:{config} \nSaved to {save_path}") os.makedirs(os.path.dirname(filepath), exist_ok=True) config.to_file(save_path, driver="GeoJSON") + logger.info(f"Saved {filename} saved to {save_path}") def filter_files(files: List[str], avoid_patterns: List[str]) -> List[str]: diff --git a/src/coastseg/filters.py b/src/coastseg/filters.py index 0a19803f..f203c655 100644 --- a/src/coastseg/filters.py +++ b/src/coastseg/filters.py @@ -159,10 +159,8 @@ def handle_files_and_directories( """ os.makedirs(dest_folder_bad, exist_ok=True) os.makedirs(dest_folder_good, exist_ok=True) - logger.info(f"Copying {len(files_bad)} files to {dest_folder_bad} \n {files_bad}") - logger.info( - f"Copying {len(files_good)} files to {dest_folder_good} \n {files_good}" - ) + logger.info(f"Copying {len(files_bad)} files to {dest_folder_bad}") + logger.info(f"Copying {len(files_good)} files to {dest_folder_good}") copy_files(files_bad, dest_folder_bad) copy_files(files_good, dest_folder_good) diff --git a/src/coastseg/models_UI.py b/src/coastseg/models_UI.py index 117dbb57..bd74f838 100644 --- a/src/coastseg/models_UI.py +++ b/src/coastseg/models_UI.py @@ -1,4 +1,3 @@ -import glob import logging import os @@ -462,7 +461,6 @@ def handle_model_implementation(self, change): def handle_model_type(self, change): # 2 class model has not been selected disable otsu threhold self.model_dict["model_type"] = change["new"] - logger.info(f"self.model_dict['model_type']: {self.model_dict['model_type']}") if "2class" not in change["new"]: if self.otsu_radio.value == "Enabled": self.model_dict["otsu"] = False @@ -471,7 +469,6 @@ def handle_model_type(self, change): # 2 class model was selected enable otsu threhold radio button if "2class" in change["new"]: self.otsu_radio.disabled = False - logger.info(f"change: {change}") def handle_otsu(self, change): if change["new"] == "Enabled": @@ -514,6 +511,13 @@ def run_model_button_clicked(self, button): position=1, ) return + if self.model_dict["sample_direc"] == "": + self.launch_error_box( + "Cannot Run Model", + "Must click select images first and select a directory of jpgs.\n Example : C:/Users/username/CoastSeg/data/ID_lla12_datetime11-07-23__08_14_11/jpg_files/preprocessed/RGB/", + position=1, + ) + return print("Running the model. Please wait.") zoo_model_instance = self.get_model_instance() img_type = self.model_input_dropdown.value @@ -582,18 +586,29 @@ def extract_shorelines_button_clicked(self, button): @run_model_view.capture(clear_output=True) def select_RGB_callback(self, filechooser: FileChooser) -> None: + """Handle the selection of a directory and check for the presence of JPG files. + + Args: + filechooser: The file chooser widget used to select the directory. + """ + from pathlib import Path + if filechooser.selected: - sample_direc = os.path.abspath(filechooser.selected) + sample_direc = Path(filechooser.selected).resolve() print(f"The images in the folder will be segmented :\n{sample_direc} ") - jpgs = glob.glob1(sample_direc + os.sep, "*jpg") - if jpgs == []: + # Using itertools.chain to combine the results of two glob calls + has_jpgs = any(sample_direc.glob("*.jpg")) or any( + sample_direc.glob("*.jpeg") + ) + if not has_jpgs: self.launch_error_box( - "File Not Found", - "The directory contains no jpgs! Please select a directory with jpgs.", + "No jpgs found", + f"The directory {sample_direc} contains no jpgs! Please select a directory with jpgs. Make sure to select 'RGB' folder located in the 'preprocessed' folder.'", position=1, ) - elif jpgs != []: - self.model_dict["sample_direc"] = sample_direc + self.model_dict["sample_direc"] = "" + else: + self.model_dict["sample_direc"] = str(sample_direc) @run_model_view.capture(clear_output=True) def use_select_images_button_clicked(self, button): @@ -645,7 +660,7 @@ def select_extracted_shorelines_button_clicked(self, button): def launch_error_box(self, title: str = None, msg: str = None, position: int = 1): # Show user error message warning_box = common.create_warning_box( - title=title, msg=msg, instructions=None, msg_width="60%", box_width="30%" + title=title, msg=msg, instructions=None, msg_width="95%", box_width="30%" ) # clear row and close all widgets before adding new warning_box common.clear_row(self.get_warning_box(position)) diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index 8da7fb48..260dac41 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -1,4 +1,3 @@ -import copy import os from pathlib import Path import re @@ -7,7 +6,7 @@ import platform import json import logging -import shutil +from itertools import islice from typing import List, Set, Tuple from coastsat import SDS_tools @@ -76,9 +75,6 @@ def has_image_files(file_list, extensions): if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"): img = Image.open(file) percentage = percentage_of_black_pixels(img) - logger.info( - f"percentage black pixels in {os.path.basename(file)} is {percentage}" - ) if percentage <= percent_no_data: valid_images.append(file) @@ -164,8 +160,6 @@ def get_imagery_directory(img_type: str, RGB_path: str) -> str: Returns: str: The path to the output directory for the specified imagery type. """ - logger.info(f"img_type: {img_type}") - logger.info(f"RGB_path: {RGB_path}") img_type = img_type.upper() output_path = os.path.dirname(RGB_path) if img_type == "RGB": @@ -190,7 +184,6 @@ def get_imagery_directory(img_type: str, RGB_path: str) -> str: raise ValueError( f"{img_type} not reconigzed as one of the valid types 'RGB', 'NDWI', 'MNDWI',or 'RGB+MNDWI+NDWI'" ) - logger.info(f"output_path: {output_path}") return output_path @@ -229,7 +222,6 @@ def get_five_band_imagery( ) np.savez_compressed(segfile, **datadict) del datadict, im - logger.info(f"segfile: {segfile}") return output_path @@ -377,7 +369,7 @@ def RGB_to_infrared( files = get_files(RGB_path, infrared_path) # output_path: directory to store MNDWI or NDWI outputs output_path = os.path.join(output_path, output_type.upper()) - logger.info(f"output_path {output_path}") + if not os.path.exists(output_path): os.mkdir(output_path) @@ -681,6 +673,7 @@ def preprocess_data( logger.info(f"img_type: {img_type}") # get full path to directory named 'RGB' containing RGBs RGB_path = file_utilities.find_directory_recursively(src_directory, name="RGB") + logger.info(f"RGB_path: {RGB_path}") # convert RGB to MNDWI, NDWI,or 5 band model_dict["sample_direc"] = get_imagery_directory(img_type, RGB_path) logger.info(f"model_dict: {model_dict}") @@ -719,9 +712,7 @@ def extract_shorelines_with_unet( logger.error(f"{roi_id} ROI settings did not exist: {e}") if roi_id is None: logger.error(f"roi_id was None config: {config}") - raise Exception( - f"This session is likely not a model sessuin because its config file did not contain an ROI ID \n config: {config}" - ) + raise Exception(f"The session loaded was \n config: {config}") else: logger.error( f"roi_id {roi_id} existed but not found in config: {config}" @@ -742,7 +733,9 @@ def extract_shorelines_with_unet( logger.error( f"{roi_id} ROI ID did not exist in geodataframe: {config_geojson_location}" ) - raise ValueError + raise ValueError( + f"{roi_id} ROI ID did not exist in geodataframe: {config_geojson_location}" + ) # get roi_id from source directory path in model settings model_settings = file_utilities.load_json_data_from_file( @@ -808,7 +801,14 @@ def extract_shorelines_with_unet( cross_distance_transects = extracted_shoreline.compute_transects_from_roi( extracted_shorelines.dictionary, transects_gdf, settings ) - logger.info(f"cross_distance_transects: {cross_distance_transects}") + + first_key = next(iter(cross_distance_transects)) + logger.info( + f"cross_distance_transects.keys(): {cross_distance_transects.keys()}" + ) + logger.info( + f"Sample of transect intersections for first key: {list(islice(cross_distance_transects[first_key], 3))}" + ) # save transect shoreline intersections to csv file if they exist if cross_distance_transects == 0: @@ -1014,11 +1014,7 @@ def get_files_for_seg( model_ready_files = file_utilities.filter_files( model_ready_files, avoid_patterns ) - logger.info(f"Filtered files for {avoid_patterns}: {model_ready_files}\n") model_ready_files = filter_no_data_pixels(model_ready_files, percent_no_data) - logger.info( - f"Files ready for segmentation with no data pixels below {percent_no_data}% : {model_ready_files}\n" - ) return model_ready_files def compute_segmentation( @@ -1215,10 +1211,6 @@ def get_model(self, weights_list: list): self.model_types.append(MODEL) self.model_list.append(model) config_files.append(config_file) - - logger.info(f"self.N_DATA_BANDS: {self.N_DATA_BANDS}") - logger.info(f"self.TARGET_SIZE: {self.TARGET_SIZE}") - logger.info(f"self.TARGET_SIZE: {self.TARGET_SIZE}") return model, self.model_list, config_files, self.model_types def get_metadatadict( @@ -1277,12 +1269,11 @@ def get_weights_list(self, model_choice: str = "ENSEMBLE") -> List[str]: print(best_weights_list) # Output: ['/path/to/weights/best_model.h5'] """ + logger.info(f"{model_choice}") if model_choice == "ENSEMBLE": weights_list = glob(os.path.join(self.weights_directory, "*.h5")) - logger.info(f"ENSEMBLE: weights_list: {weights_list}") - logger.info( - f"ENSEMBLE: {len(weights_list)} sets of model weights were found " - ) + logger.info(f"weights_list: {weights_list}") + logger.info(f"{len(weights_list)} sets of model weights were found ") return weights_list elif model_choice == "BEST": # read model name (fullmodel.h5) from BEST_MODEL.txt @@ -1292,9 +1283,13 @@ def get_weights_list(self, model_choice: str = "ENSEMBLE") -> List[str]: # remove any leading or trailing whitespace and newline characters model_name = model_name.strip() weights_list = [os.path.join(self.weights_directory, model_name)] - logger.info(f"BEST: weights_list: {weights_list}") - logger.info(f"BEST: {len(weights_list)} sets of model weights were found ") + logger.info(f"weights_list: {weights_list}") + logger.info(f"{len(weights_list)} sets of model weights were found ") return weights_list + else: + raise ValueError( + f"Invalid model_choice: {model_choice}. Valid choices are 'ENSEMBLE' or 'BEST'." + ) def download_best( self, available_files: List[dict], model_path: str, model_id: str @@ -1419,7 +1414,7 @@ def download_ensemble( ) download_dict = check_if_files_exist(download_dict) # download the files that don't exist - logger.info(f"URLs to download: {download_dict}") + logger.info(f"download_dict: {download_dict}") # if any files are not found locally download them asynchronous if download_dict != {}: download_status = downloads.download_url_dict(download_dict) From a5389ac95cc9f6b816184b6f7a8a8bc32b01483c Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:28:07 -0800 Subject: [PATCH 69/87] #207 improve logging for roi.py --- src/coastseg/roi.py | 73 ++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/src/coastseg/roi.py b/src/coastseg/roi.py index af3da234..1a165d34 100644 --- a/src/coastseg/roi.py +++ b/src/coastseg/roi.py @@ -1,5 +1,4 @@ # Standard library imports -from typing import Collection import logging from typing import Union, List @@ -14,7 +13,7 @@ from shapely import geometry from ipyleaflet import GeoJSON -from coastseg.extracted_shoreline import Extracted_Shoreline +# from coastseg.extracted_shoreline import Extracted_Shoreline logger = logging.getLogger(__name__) @@ -57,10 +56,40 @@ def __init__( ) def __str__(self): - return f"ROI: geodataframe {self.gdf} \nextracted_shorelines {self.extracted_shorelines}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + extracted_shoreline_info = "" + for key in self.extracted_shorelines.keys(): + if hasattr(self.extracted_shorelines[key], "gdf") and ( + isinstance(self.extracted_shorelines[key].gdf, gpd.GeoDataFrame) + ): + if not self.extracted_shorelines[key].gdf.empty: + extracted_shoreline_info.join( + f"ROI ID {key}:\n{len(self.extracted_shorelines[key].gdf)}\n" + ) + return f"ROI:\nROI IDs: {self.get_ids()}\nROI IDs with extracted shorelines: {extracted_shoreline_info}\nROI IDs with shoreline transect intersections: {list(self.cross_shore_distances.keys())}\n gdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" def __repr__(self): - return f"ROI: geodataframe {self.gdf}\nextracted_shorelines {self.extracted_shorelines}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + extracted_shoreline_info = "" + for key in self.extracted_shorelines.keys(): + if hasattr(self.extracted_shorelines[key], "gdf") and ( + isinstance(self.extracted_shorelines[key].gdf, gpd.GeoDataFrame) + ): + if not self.extracted_shorelines[key].gdf.empty: + extracted_shoreline_info.join( + f"ROI ID {key}:\n{len(self.extracted_shorelines[key].gdf)}\n" + ) + return f"ROI:\nROI IDs: {self.get_ids()}\nROI IDs with extracted shorelines: {extracted_shoreline_info}\nROI IDs with shoreline transect intersections: {list(self.cross_shore_distances.keys())}\n gdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" def remove_by_id( self, ids_to_drop: list | set | tuple | str | int @@ -113,7 +142,6 @@ def _initialize_from_roi_gdf(self, rois_gdf: gpd.GeoDataFrame) -> None: rois_gdf, max_area=ROI.MAX_SIZE, min_area=ROI.MIN_SIZE ) if drop_ids: - print(f"Dropping IDs {drop_ids}") logger.info(f"Dropping ROIs that are an invalid size {drop_ids}") rois_gdf.drop(index=drop_ids, axis=0, inplace=True) if rois_gdf.empty: @@ -232,6 +260,8 @@ def get_all_extracted_shorelines(self) -> dict: Returns: dict: A dictionary containing all extracted shorelines, indexed by ROI ID. """ + if not hasattr(self, "extracted_shorelines"): + self.extracted_shorelines = {} return self.extracted_shorelines def remove_extracted_shorelines( @@ -246,10 +276,13 @@ def remove_extracted_shorelines( if roi_id in self.extracted_shorelines: del self.extracted_shorelines[roi_id] if remove_all: + del self.extracted_shorelines self.extracted_shorelines = {} def add_extracted_shoreline( - self, extracted_shoreline: Extracted_Shoreline, roi_id: str + self, + extracted_shoreline: "coastseg.extracted_shoreline.Extracted_Shoreline", + roi_id: str, ) -> None: """Adds an extracted shoreline dictionary to the collection, indexed by the specified ROI ID. @@ -258,7 +291,8 @@ def add_extracted_shoreline( roi_id (str): The ID of the ROI to associate the shoreline with. """ self.extracted_shorelines[roi_id] = extracted_shoreline - logger.info(f"New self.extracted_shorelines: {self.extracted_shorelines}") + logger.info(f"New extracted shoreline added for ROI {roi_id}") + # logger.info(f"New extracted shoreline added for ROI {roi_id}: {self.extracted_shorelines}") def get_cross_shore_distances(self, roi_id: str) -> Union[None, dict]: """Returns the cross shore distance for the specified ROI ID. @@ -270,9 +304,9 @@ def get_cross_shore_distances(self, roi_id: str) -> Union[None, dict]: Union[None, dict]: Thecross shore distance dictionary for the specified ROI ID, or None if it does not exist. """ logger.info( - f"ROI: {roi_id} cross distance : {self.cross_shore_distances.get(roi_id)}" + f"ROI: {roi_id} cross distance with keys : {self.cross_shore_distances.get(roi_id,{}).keys()}" ) - return self.cross_shore_distances.get(roi_id) + return self.cross_shore_distances.get(roi_id, 0) def add_cross_shore_distances( self, cross_shore_distance: dict, roi_id: str @@ -284,7 +318,7 @@ def add_cross_shore_distances( roi_id (str): The ID of the ROI to associate the cross_shore_distance dictionary """ self.cross_shore_distances[roi_id] = cross_shore_distance - logger.info(f"Newly added cross_shore_distance: {cross_shore_distance}") + # logger.info(f"Newly added cross_shore_distance: {cross_shore_distance}") def get_all_cross_shore_distances( self, @@ -329,19 +363,19 @@ def create_geodataframe( large_length and small_length """ # Create a single set of fishnets with square size = small and/or large side lengths that overlap each other - logger.info(f"Small Length: {small_length} Large Length: {large_length}") + # logger.info(f"Small Length: {small_length} Large Length: {large_length}") if small_length == 0 or large_length == 0: - logger.info("Creating one fishnet") + # logger.info("Creating one fishnet") # create a fishnet geodataframe with square size of either large_length or small_length fishnet_size = large_length if large_length != 0 else small_length fishnet_intersect_gdf = self.get_fishnet_gdf(bbox, shoreline, fishnet_size) else: - logger.info("Creating two fishnets") + # logger.info("Creating two fishnets") # Create two fishnets, one big (2000m) and one small(1500m) so they overlap each other fishnet_gpd_large = self.get_fishnet_gdf(bbox, shoreline, large_length) fishnet_gpd_small = self.get_fishnet_gdf(bbox, shoreline, small_length) - logger.info(f"fishnet_gpd_large : {fishnet_gpd_large}") - logger.info(f"fishnet_gpd_small : {fishnet_gpd_small}") + # logger.info(f"fishnet_gpd_large : {fishnet_gpd_large}") + # logger.info(f"fishnet_gpd_small : {fishnet_gpd_small}") # Concat the fishnets together to create one overlapping set of rois fishnet_intersect_gdf = gpd.GeoDataFrame( pd.concat([fishnet_gpd_large, fishnet_gpd_small], ignore_index=True) @@ -356,17 +390,14 @@ def create_geodataframe( ], create_ids=True, ) - validate_geometry_types( - fishnet_intersect_gdf, set(["Polygon", "MultiPolygon"]), feature_type="ROI" - ) # make sure all the ids are unique fishnet_intersect_gdf = common.create_unique_ids( fishnet_intersect_gdf, prefix_length=3 ) - logger.info(f"Created fishnet_intersect_gdf: {fishnet_intersect_gdf}") + # logger.info(f"Created fishnet_intersect_gdf: {fishnet_intersect_gdf}") return fishnet_intersect_gdf - def style_layer(self, geojson: dict, layer_name: str) -> "ipyleaflet.GeoJSON": + def style_layer(self, geojson: dict, layer_name: str) -> GeoJSON: """Return styled GeoJson object with layer name Args: @@ -487,7 +518,7 @@ def create_fishnet( # create geodataframe to hold all the (rois)squares fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs(input_espg) logger.info( - f"\n ROIs area before conversion to {output_epsg}:\n {fishnet.area}" + f"\n ROIs area before conversion to {output_epsg}:\n {fishnet.area} for CRS: {input_espg}" ) fishnet = fishnet.to_crs(output_epsg) return fishnet From 1fdf9992ee22e260e3a4228bd4dc00a4f6cce3d9 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:28:21 -0800 Subject: [PATCH 70/87] #207 improve logging for shoreline.py --- src/coastseg/shoreline.py | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/coastseg/shoreline.py b/src/coastseg/shoreline.py index 47d05f3f..9bb86b95 100644 --- a/src/coastseg/shoreline.py +++ b/src/coastseg/shoreline.py @@ -80,10 +80,26 @@ def filename(self, value): self._filename = value def __str__(self): - return f"Shoreline: geodataframe {self.gdf}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + if "id" in self.gdf.columns: + transect_ids = self.gdf["id"].astype(str) + return f"Shoreline:\nself.gdf:\n\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{transect_ids}" def __repr__(self): - return f"Shoreline: geodataframe {self.gdf}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + if "id" in self.gdf.columns: + transect_ids = self.gdf["id"].astype(str) + return f"Shoreline:\nself.gdf:\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{transect_ids}" def initialize_shorelines( self, @@ -116,7 +132,11 @@ def initialize_shorelines_with_shorelines(self, shorelines: gpd.GeoDataFrame): """ Initalize shorelines with the provided shorelines in a geodataframe """ - if not shorelines.empty: + if not isinstance(shorelines, gpd.GeoDataFrame): + raise ValueError("Shorelines must be a geodataframe") + elif shorelines.empty: + raise logger.warning("Shorelines cannot be an empty geodataframe") + else: columns_to_keep = [ "id", "geometry", @@ -182,9 +202,6 @@ def get_intersecting_shoreline_files( intersecting_files = get_intersecting_files(bbox, bounding_boxes_location) if not intersecting_files: - logger.error( - f"No intersecting shorelines found within the bounding box: {bbox}" - ) raise ValueError( "No intersecting shorelines shorelines were available within the bounding box:. Try drawing a new bounding box elsewhere." ) @@ -194,9 +211,6 @@ def get_intersecting_shoreline_files( intersecting_files, self._download_location ) if not shoreline_files: - logger.error( - f"No shoreline files found.Intersecting files were {intersecting_files}" - ) raise FileNotFoundError( f"No shoreline files were found at {self._download_location}." ) @@ -210,6 +224,7 @@ def create_geodataframe( Downloads the shorelines from online. Args: bbox (gpd.GeoDataFrame): Bounding box being searched for shorelines. + shoreline_files (List[str]): List of filepaths for available shoreline files. crs (str, optional): Coordinate reference system string. Defaults to 'EPSG:4326'. Returns: @@ -332,10 +347,7 @@ def download_shoreline( url = construct_download_url(root_url, dataset_id, filename) # Download shorelines from Zenodo - logger.info(f"Retrieving: {url}") - logger.info(f"Retrieving file: {save_location}") - print(f"Retrieving: {url}") - print(f"Retrieving file: {save_location}") + logger.info(f"Retrieving file: {save_location} from {url}") self.download_service(url, save_location, filename=filename) @@ -375,7 +387,7 @@ def get_intersecting_files( filenames_and_ids = zip(filenames, [dataset_id] * len(filenames)) # Add the filenames and their dataset IDs to intersecting_files intersecting_files.update(dict(filenames_and_ids)) - logger.info( + logger.debug( f"Found {len(intersecting_files)} intersecting files\n {intersecting_files}" ) return intersecting_files From 88457b98e56e0a8bb6c70a09fcfd161d7a4a4bdb Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:28:44 -0800 Subject: [PATCH 71/87] #207 improve logging for transects.py --- src/coastseg/transects.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/coastseg/transects.py b/src/coastseg/transects.py index ceb0fd20..4af5206d 100644 --- a/src/coastseg/transects.py +++ b/src/coastseg/transects.py @@ -114,7 +114,8 @@ class Transects: ] ) - # Define columns of interest and their descriptions: + # COLUMNS_TO_KEEP + # --------------- # id: unique identifier for each transect # geometry: the geometric shape, position, and configuration of the transect # slope: represents the beach face slope, used for tidal correction of transect-based data @@ -124,7 +125,6 @@ class Transects: # nearest_x: x-coordinate of the nearest slope location to the transect # nearest_y: y-coordinate of the nearest slope location to the transect - def __init__( self, bbox: gpd.GeoDataFrame = None, @@ -140,10 +140,28 @@ def __init__( self.initialize_transects(bbox, transects) def __str__(self): - return f"Transects: geodataframe {self.gdf}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + ids = "" + if "id" in self.gdf.columns: + ids = self.gdf["id"].astype(str) + return f"Transects:\nself.gdf:\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{ids}" def __repr__(self): - return f"Transects: geodataframe {self.gdf}" + # Get column names and their data types + col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() + # Get first 5 rows as a string + first_rows = self.gdf.head().to_string() + # Get CRS information + crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" + ids = "" + if "id" in self.gdf.columns: + ids = self.gdf["id"].astype(str) + return f"Transects:\nself.gdf:\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{ids}" def initialize_transects( self, @@ -205,7 +223,7 @@ def create_geodataframe( """ # create a new dataframe that only contains the geometry column of the bbox bbox = bbox[["geometry"]] - # get transect geosjson files that intersect with bounding box + # get transect geojson files that intersect with bounding box intersecting_transect_files = self.get_intersecting_files(bbox) script_dir = os.path.dirname(os.path.abspath(__file__)) transect_dir = os.path.abspath(os.path.join(script_dir, "transects")) @@ -218,6 +236,7 @@ def create_geodataframe( ) if transects_in_bbox.empty: logger.warning("No transects found here.") + return transects_in_bbox # remove z-axis from transects transects_in_bbox = preprocess_geodataframe( transects_in_bbox, From d99b954b291fe3e724f2ed000f4c2a94cf833724 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:28:58 -0800 Subject: [PATCH 72/87] #207 improve logging for extracted_shoreline.py --- src/coastseg/extracted_shoreline.py | 198 ++++++++++++++++------------ 1 file changed, 111 insertions(+), 87 deletions(-) diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 577f0fca..1e1d23b2 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -12,6 +12,7 @@ from typing import Optional, Union, List, Dict from time import perf_counter from typing import Dict, List, Optional, Union +from itertools import islice # External dependencies imports import dask @@ -652,9 +653,6 @@ def process_satellite_image( "shorelines": shoreline, "cloud_cover": cloud_cover, } - logger.info( - f"output shorelines {len(output['shorelines'])} and cloud cover: {output['cloud_cover']}" - ) return output @@ -1292,27 +1290,31 @@ def extract_shorelines_with_dask( filtered_files = get_filtered_files_dict(good_folder, "npz", sitename) # keep only the metadata for the files that were sorted as 'good' metadata = edit_metadata(metadata, filtered_files) - for key in metadata.keys(): - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('epsg',[]))} of epsg: {np.unique(metadata.get('epsg',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('dates',[]))} of dates: {np.unique(metadata.get('dates',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('filenames',[]))} of filenames: {np.unique(metadata.get('filenames',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata.get('im_dimensions',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('acc_georef',[]))} of acc_georef: {np.unique(metadata.get('acc_georef',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_quality',[]))} of im_quality: {np.unique(metadata.get('im_quality',[]))}" - ) - result_dict = {} + for satname in metadata.keys(): + if not metadata[satname]: + logger.warning(f"metadata['{satname}'] is empty") + else: + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('epsg',[]))} of epsg: {np.unique(metadata[satname].get('epsg',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('dates',[]))} of dates Sample first five: {list(islice(metadata[satname].get('dates',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('filenames',[]))} of filenames Sample first five: {list(islice(metadata[satname].get('filenames',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata[satname].get('im_dimensions',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('acc_georef',[]))} of acc_georef: {np.unique(metadata[satname].get('acc_georef',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_quality',[]))} of im_quality: {np.unique(metadata[satname].get('im_quality',[]))}" + ) + + shoreline_dict = {} for satname in metadata.keys(): satellite_dict = process_satellite( satname, @@ -1326,34 +1328,35 @@ def extract_shorelines_with_dask( **kwargs, ) if not satellite_dict: - result_dict[satname] = {} + shoreline_dict[satname] = {} elif not satname in satellite_dict.keys(): - result_dict[satname] = {} + shoreline_dict[satname] = {} else: - result_dict[satname] = satellite_dict[satname] + shoreline_dict[satname] = satellite_dict[satname] - for key in result_dict.keys(): - logger.info( - f"result_dict['{key}'] length {len(result_dict[key].get('dates',[]))} of dates: {np.unique(result_dict[key].get('dates',[]))}" - ) - logger.info( - f"result_dict['{key}'] length {len(result_dict[key].get('geoaccuracy',[]))} of geoaccuracy: {np.unique(result_dict[key].get('geoaccuracy',[]))}" - ) - logger.info( - f"result_dict['{key}'] length {len(result_dict[key].get('cloud_cover',[]))} of cloud_cover: {np.unique(result_dict[key].get('cloud_cover',[]))}" - ) - logger.info( - f"result_dict['{key}'] length {len(result_dict[key].get('filename',[]))} of filename: {np.unique(result_dict[key].get('filename',[]))}" - ) + for satname in shoreline_dict.keys(): # Check and log 'reference shoreline' if it exists - ref_sl = result_dict[key].get("shorelines", np.array([])) + ref_sl = shoreline_dict[satname].get("shorelines", np.array([])) if isinstance(ref_sl, np.ndarray): logger.info(f"shorelines.shape: {ref_sl.shape}") - logger.info(f"Number of 'shorelines': {len(ref_sl)}") + logger.info(f"Number of 'shorelines' for {satname}: {len(ref_sl)}") + if shoreline_dict[satname] == {}: + logger.info(f"No shorelines found for {satname}") + else: + logger.info( + f"result_dict['{satname}'] length {len(shoreline_dict[satname].get('dates',[]))} of dates[:3] {list(islice(shoreline_dict[satname].get('dates',[]),3))}" + ) + logger.info( + f"result_dict['{satname}'] length {len(shoreline_dict[satname].get('geoaccuracy',[]))} of geoaccuracy: {np.unique(shoreline_dict[satname].get('geoaccuracy',[]))}" + ) + logger.info( + f"result_dict['{satname}'] length {len(shoreline_dict[satname].get('cloud_cover',[]))} of cloud_cover: {np.unique(shoreline_dict[satname].get('cloud_cover',[]))}" + ) + logger.info( + f"result_dict['{satname}'] length {len(shoreline_dict[satname].get('filename',[]))} of filename[:3]{list(islice(shoreline_dict[satname].get('filename',[]),3))}" + ) # combine the extracted shorelines for each satellite - extracted_shorelines_data = combine_satellite_data(result_dict) - - return extracted_shorelines_data + return combine_satellite_data(shoreline_dict) def get_sorted_model_outputs_directory( @@ -1383,7 +1386,7 @@ def get_sorted_model_outputs_directory( except Exception as e: logger.error(f"Error finding files for satellite {satname}: {e}") continue - logger.info(f"{session_path} contained {satname} files: {files} ") + logger.info(f"{session_path} contained {satname} files: {len(files)} ") if len(files) != 0: filter_model_outputs(satname, files, good_folder, bad_folder) return good_folder @@ -1511,8 +1514,7 @@ def __str__(self): first_rows = self.gdf.head().to_string() # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" - - return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf: \nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}\n\n{crs_info}" + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" def __repr__(self): # Get column names and their data types @@ -1521,7 +1523,7 @@ def __repr__(self): first_rows = self.gdf.head().to_string() # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" - return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf: \nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}\n\n{crs_info}" + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" def get_roi_id(self) -> Optional[str]: """ @@ -1737,7 +1739,29 @@ def create_extracted_shorelines_from_session( self.dictionary = {} return self else: - logger.info(f"metadata: {metadata}") + # Log portions of the metadata because is massive + for satname in metadata.keys(): + if not metadata[satname]: + logger.warning(f"metadata['{satname}'] is empty") + else: + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('epsg',[]))} of epsg: {np.unique(metadata[satname].get('epsg',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('dates',[]))} of dates Sample first five: {list(islice(metadata[satname].get('dates',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('filenames',[]))} of filenames Sample first five: {list(islice(metadata[satname].get('filenames',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata[satname].get('im_dimensions',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('acc_georef',[]))} of acc_georef: {np.unique(metadata[satname].get('acc_georef',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_quality',[]))} of im_quality: {np.unique(metadata[satname].get('im_quality',[]))}" + ) extracted_shorelines_dict = extract_shorelines_with_dask( session_path, @@ -1749,10 +1773,23 @@ def create_extracted_shorelines_from_session( ) if extracted_shorelines_dict == {}: raise Exception(f"Failed to extract any shorelines.") - from itertools import islice + + # postprocessing by removing duplicates and removing in inaccurate georeferencing (set threshold to 10 m) + extracted_shorelines_dict = remove_duplicates( + extracted_shorelines_dict + ) # removes duplicates (images taken on the same date by the same satellite) + extracted_shorelines_dict = remove_inaccurate_georef( + extracted_shorelines_dict, 10 + ) # remove inaccurate georeferencing (set threshold to 10 m) + + # Check and log 'reference shoreline' if it exists + ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) + if isinstance(ref_sl, np.ndarray): + logger.info(f"shorelines.shape: {ref_sl.shape}") + logger.info(f"Number of 'shorelines': {len(ref_sl)}") logger.info( - f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('dates',[]))} of dates[:3]: {list(islice(extracted_shorelines_dict.get('dates',[]),3))}" + f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('dates',[]))} of dates: {list(islice(extracted_shorelines_dict.get('dates',[]),3))}" ) logger.info( f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('satname',[]))} of satname: {np.unique(extracted_shorelines_dict.get('satname',[]))}" @@ -1766,19 +1803,7 @@ def create_extracted_shorelines_from_session( logger.info( f"extracted_shorelines_dict length {len(extracted_shorelines_dict.get('filename',[]))} of filename[:3]: {list(islice(extracted_shorelines_dict.get('filename',[]),3))}" ) - # Check and log 'reference shoreline' if it exists - ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) - if isinstance(ref_sl, np.ndarray): - logger.info(f"shorelines.shape: {ref_sl.shape}") - logger.info(f"Number of 'shorelines': {len(ref_sl)}") - # postprocessing by removing duplicates and removing in inaccurate georeferencing (set threshold to 10 m) - extracted_shorelines_dict = remove_duplicates( - extracted_shorelines_dict - ) # removes duplicates (images taken on the same date by the same satellite) - extracted_shorelines_dict = remove_inaccurate_georef( - extracted_shorelines_dict, 10 - ) # remove inaccurate georeferencing (set threshold to 10 m) self.dictionary = extracted_shorelines_dict if is_list_empty(self.dictionary["shorelines"]): @@ -1862,25 +1887,29 @@ def extract_shorelines( logger.warning(f"No RGB files existed so no metadata.") return {} - for key in metadata.keys(): - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('epsg',[]))} of epsg: {np.unique(metadata.get('epsg',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('dates',[]))} of dates: {np.unique(metadata.get('dates',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('filenames',[]))} of filenames: {np.unique(metadata.get('filenames',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata.get('im_dimensions',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('acc_georef',[]))} of acc_georef: {np.unique(metadata.get('acc_georef',[]))}" - ) - logger.info( - f"edit_metadata metadata['{key}'] length {len(metadata[key].get('im_quality',[]))} of im_quality: {np.unique(metadata.get('im_quality',[]))}" - ) + for satname in metadata.keys(): + if not metadata[satname]: + logger.warning(f"metadata['{satname}'] is empty") + else: + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('epsg',[]))} of epsg: {np.unique(metadata[satname].get('epsg',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('dates',[]))} of dates Sample first five: {list(islice(metadata[satname].get('dates',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('filenames',[]))} of filenames Sample first five: {list(islice(metadata[satname].get('filenames',[]),5))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_dimensions',[]))} of im_dimensions: {np.unique(metadata[satname].get('im_dimensions',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('acc_georef',[]))} of acc_georef: {np.unique(metadata[satname].get('acc_georef',[]))}" + ) + logger.info( + f"edit_metadata metadata['{satname}'] length {len(metadata[satname].get('im_quality',[]))} of im_quality: {np.unique(metadata[satname].get('im_quality',[]))}" + ) + # extract shorelines from ROI if session_path is None: # extract shorelines with coastsat's models @@ -1945,10 +1974,7 @@ def create_shoreline_settings( "model_session_path", # path to model session file "apply_cloud_mask", ] - logger.info(f"settings used to create shoreline settings: {settings}") shoreline_settings = {k: v for k, v in settings.items() if k in SHORELINE_KEYS} - logger.info(f"Loading shoreline_settings: {shoreline_settings}") - shoreline_settings.update( { "reference_shoreline": reference_shoreline, @@ -1958,8 +1984,6 @@ def create_shoreline_settings( "inputs": roi_settings, # copy settings for ROI shoreline will be extracted from } ) - - logger.info(f"shoreline_settings: {shoreline_settings}") return shoreline_settings def create_geodataframe( From 25aa469c22e01e5e71a7456c31fca01e7e477a69 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:29:19 -0800 Subject: [PATCH 73/87] #207 improve logging for zoo_model.py --- src/coastseg/zoo_model.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index 260dac41..f6964385 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -670,13 +670,10 @@ def preprocess_data( raise FileNotFoundError( f"Config files config.json or config_gdf.geojson do not exist in roi directory { src_directory}\n This means that the download did not complete successfully." ) - logger.info(f"img_type: {img_type}") # get full path to directory named 'RGB' containing RGBs RGB_path = file_utilities.find_directory_recursively(src_directory, name="RGB") - logger.info(f"RGB_path: {RGB_path}") # convert RGB to MNDWI, NDWI,or 5 band model_dict["sample_direc"] = get_imagery_directory(img_type, RGB_path) - logger.info(f"model_dict: {model_dict}") return model_dict def extract_shorelines_with_unet( From d164ade7338bc7f9728bd4add3a50548ea78f65c Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 12:30:22 -0800 Subject: [PATCH 74/87] #207 v1.1.22 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5bd7eac2..a834d124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.21" +version = "1.1.22" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] From 8d5827562608ad93d72cdd5ec08ef16e210c4a7d Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Tue, 12 Dec 2023 16:55:06 -0800 Subject: [PATCH 75/87] #207 remove excess logging coastseg --- src/coastseg/coastseg_map.py | 71 ++++++++++-------------------------- 1 file changed, 19 insertions(+), 52 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 2b5bb753..0fe968f5 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -861,6 +861,7 @@ def set_settings(self, **kwargs): None """ logger.info(f"New Settings: {kwargs}") + # logger.info(f"OLD Settings: {self.settings}") # Check if any of the keys are missing # if any keys are missing set the default value self.default_settings = { @@ -909,7 +910,7 @@ def parse_date(date_str): for key, value in self.default_settings.items(): self.settings.setdefault(key, value) - logger.info(f"Settings: {self.settings}") + logger.info(f"Set Settings: {self.settings}") def get_settings(self): """ @@ -1104,7 +1105,7 @@ def load_extracted_shoreline_files(self) -> None: raise Exception("No ROIs found. Please load ROIs.") roi_ids = roi_ids[0] logger.info(f"roi_ids: {roi_ids}") - logger.info(f"self.rois.roi_settings: {self.rois.roi_settings}") + # logger.info(f"self.rois.roi_settings: {self.rois.roi_settings}") # set of roi ids that didn't have missing shorelines rois_no_extracted_shorelines = set() # for each ROI that has extracted shorelines load onto map @@ -1128,11 +1129,11 @@ def load_extracted_shoreline_files(self) -> None: file ) - logger.info(f"ROI {roi_id} extracted_sl_gdf: {extracted_sl_gdf}") - logger.info(f"ROI {roi_id} shoreline_settings: {shoreline_settings}") - logger.info( - f"ROI {roi_id} extracted_shoreline_dict: {extracted_shoreline_dict}" - ) + # logger.info(f"ROI {roi_id} extracted_sl_gdf: {extracted_sl_gdf}") + # logger.info(f"ROI {roi_id} shoreline_settings: {shoreline_settings}") + # logger.info( + # f"ROI {roi_id} extracted_shoreline_dict: {extracted_shoreline_dict}" + # ) # error handling for none if ( extracted_sl_gdf is None @@ -1445,7 +1446,8 @@ def save_timeseries_csv(self, session_path: str, roi_id: str, rois: ROI) -> None logger.info(f"No extracted shorelines for roi: {roi_id}") return cross_distance_transects = rois.get_cross_shore_distances(roi_id) - logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") + + # logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") # if no cross distance was 0 then skip if cross_distance_transects == 0: print( @@ -1630,15 +1632,11 @@ def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: out_dict = dict([]) # copy shoreline intersects for each transect out_dict[key] = cross_distance_transects[key] - logger.info( - f"out dict roi_ids columns : {[roi_id for _ in range(len(extracted_shorelines_dict['dates']))]}" - ) out_dict["roi_id"] = [ roi_id for _ in range(len(extracted_shorelines_dict["dates"])) ] out_dict["dates"] = extracted_shorelines_dict["dates"] out_dict["satname"] = extracted_shorelines_dict["satname"] - logger.info(f"out_dict : {out_dict}") df = pd.DataFrame(out_dict) df.index = df["dates"] df.pop("dates") @@ -1650,7 +1648,6 @@ def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: session_path = file_utilities.create_directory( session_path, ROI_directory ) - logger.info(f"session_path: {session_path}") # save source data self.save_config(session_path) # save to csv file session path @@ -1658,12 +1655,6 @@ def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: if os.path.exists(fn): os.remove(fn) df.to_csv(fn, sep=",") - logger.info( - f"ROI: {roi_id} time-series of shoreline change along transects" - ) - logger.info( - f"Time-series of the shoreline change along the transects saved as:{fn}" - ) rois_computed_transects.add(roi_id) print(f"Computed transects for the following ROIs: {rois_computed_transects}") @@ -1700,7 +1691,7 @@ def save_cross_distance_to_file(self, roi_ids: list, rois: ROI) -> None: continue cross_distance_transects = rois.get_cross_shore_distances(roi_id) - logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") + # logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") # if no cross distance was 0 then skip if cross_distance_transects == 0: print( @@ -1712,20 +1703,18 @@ def save_cross_distance_to_file(self, roi_ids: list, rois: ROI) -> None: cross_distance_df = common.get_cross_distance_df( extracted_shorelines, cross_distance_transects ) - logger.info(f"ROI: {roi_id} cross_distance_df : {cross_distance_df}") + # logger.info(f"ROI: {roi_id} cross_distance_df : {cross_distance_df}") # Save extracted shoreline info to session directory session_name = self.get_session_name() ROI_directory = rois.roi_settings[roi_id]["sitename"] session_path = os.path.join(os.getcwd(), "sessions", session_name) session_path = file_utilities.create_directory(session_path, ROI_directory) - logger.info(f"session_path: {session_path}") # save source data self.save_config(session_path) filepath = os.path.join(session_path, "transect_time_series.csv") if os.path.exists(filepath): - print(f"Overwriting:{filepath}") os.remove(filepath) cross_distance_df.to_csv(filepath, sep=",") print(f"ROI: {roi_id} time-series of shoreline change along transects") @@ -1773,7 +1762,6 @@ def remove_layer_by_name(self, layer_name: str): existing_layer = self.map.find_layer(layer_name) if existing_layer is not None: self.map.remove(existing_layer) - logger.info(f"Removed layer {layer_name}") def remove_shoreline(self): del self.shoreline @@ -1800,9 +1788,6 @@ def replace_layer_by_name( """ if new_layer is None: return - logger.info( - f"layer_name {layer_name} \non_hover {on_hover}\n on_click {on_click}" - ) self.remove_layer_by_name(layer_name) # when feature is hovered over on_hover function is called if on_hover is not None: @@ -1811,11 +1796,9 @@ def replace_layer_by_name( # when feature is clicked on on_click function is called new_layer.on_click(on_click) self.map.add_layer(new_layer) - logger.info(f"Add layer to map: {layer_name}") def remove_all_rois(self) -> None: """Removes all the unselected rois from the map""" - logger.info("Removing all ROIs from map") # Remove the selected and unselected rois self.remove_layer_by_name(ROI.SELECTED_LAYER_NAME) self.remove_layer_by_name(ROI.LAYER_NAME) @@ -1826,7 +1809,6 @@ def remove_all_rois(self) -> None: def remove_selected_shorelines(self) -> None: """Removes all the unselected rois from the map""" - logger.info("Removing selected shorelines from map") # Remove the selected and unselected rois self.remove_layer_by_name(SELECTED_LAYER_NAME) self.remove_layer_by_name(Shoreline.LAYER_NAME) @@ -1843,7 +1825,6 @@ def remove_selected_shorelines(self) -> None: def remove_selected_rois(self) -> None: """Removes all the unselected rois from the map""" - logger.info("Removing selected ROIs from map") # Remove the selected and unselected rois self.remove_layer_by_name(ROI.SELECTED_LAYER_NAME) self.remove_layer_by_name(ROI.LAYER_NAME) @@ -1907,7 +1888,6 @@ def handle_draw( exception_handler.handle_bbox_error(bbox_too_small, self.warning_box) else: # if no exceptions occur create new bbox, remove old bbox, and load new bbox - logger.info(f"Made it with bbox area: {bbox_area}") self.load_feature_on_map("bbox") if self.draw_control.last_action == "deleted": @@ -1952,9 +1932,6 @@ def load_extracted_shorelines_to_map(self, row_number: int = 0) -> None: Returns: None: This function does not return anything, but rather loads the extracted shorelines onto the map. """ - - logger.info(f"row_number: {row_number}") - # Remove any existing extracted shorelines self.remove_extracted_shoreline_layers() @@ -1991,10 +1968,6 @@ def load_extracted_shorelines_to_map(self, row_number: int = 0) -> None: # Load extracted shorelines for the first ROI ID with extracted shorelines for selected_id in roi_ids_with_extracted_shorelines: extracted_shorelines = self.rois.get_extracted_shoreline(selected_id) - logger.info( - f"ROI ID {selected_id} extracted shorelines {extracted_shorelines}" - ) - if extracted_shorelines is not None: logger.info(f"Extracted shorelines found for ROI {selected_id}") self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) @@ -2015,7 +1988,6 @@ def load_extracted_shorelines_on_map( if extracted_shorelines is None: return # Loads stylized extracted shorelines onto map for single roi - logger.info(f"row_number: {row_number}") # Convert the extracted shoreline's geometry to points points_gdf = extracted_shoreline.convert_linestrings_to_multipoints( extracted_shorelines.gdf @@ -2029,12 +2001,12 @@ def load_extracted_shorelines_on_map( "radius": 1, }, ) - layer_name = extracted_shorelines.get_layer_name() - logger.info( - f"Extracted shoreline layer: {new_layer}\n" - f"Layer name: {layer_name}\n" - f"Extracted shoreline layers: {new_layer}\n" - ) + # layer_name = extracted_shorelines.get_layer_name() + # logger.info( + # f"Extracted shoreline layer: {new_layer}\n" + # f"Layer name: {layer_name}\n" + # f"Extracted shoreline layers: {new_layer}\n" + # ) # new_layer.on_hover(self.update_extracted_shoreline_html) self.map.add_layer(new_layer) # update the extracted shoreline layer and number of shorelines available @@ -2068,8 +2040,6 @@ def load_feature_on_map( if new_feature is None: return - logger.info(f"new_feature: {new_feature} \ngdf: {gdf}") - # load the features onto the map self.add_feature_on_map( new_feature, @@ -2096,9 +2066,6 @@ def add_feature_on_map( Returns: - None """ - logger.info( - f"feature_name: {feature_name.lower()}\n layer_name: {layer_name}\n new_feature: {new_feature}" - ) # get on hover and on click handlers for feature on_hover = self.get_on_hover_handler(feature_name) on_click = self.get_on_click_handler(feature_name) @@ -2171,7 +2138,7 @@ def load_on_map( def create_layer(self, feature, layer_name: str): if feature.gdf.empty: logger.warning("Cannot add an empty geodataframe layer to the map.") - print("Cannot add an empty geodataframe layer to the map.") + print("Cannot add an empty layer to the map.") return None layer_geojson = json.loads(feature.gdf.to_json()) # convert layer to GeoJson and style it accordingly From adfc36650f7e33bd4d8685cf500c71de37d02938 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Wed, 13 Dec 2023 22:26:21 -0800 Subject: [PATCH 76/87] #207 drop transects whose shoreline intersections are NaN --- src/coastseg/common.py | 82 ++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index d5286616..62295a78 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -1573,13 +1573,29 @@ def save_transect_intersections( cross_distance_transects: dict, filename: str = "transect_time_series.csv", ) -> str: + """ + Saves the saves the dates from the extracted shorelines to the dictionart containing the cross distance transect intersections to a CSV file. + + This function processes intersection data between shorelines and transects, removing columns with all NaN values. + It then saves the processed data to a CSV file at the specified path. + + Args: + - save_path (str): The directory path where the CSV file will be saved. + - extracted_shorelines (dict): A dictionary containing shoreline data. + - cross_distance_transects (dict): A dictionary containing transect data with cross-distance measurements. + - filename (str, optional): The name of the CSV file to be saved. Default is "transect_time_series.csv". + + Returns: + - str: The full file path of the saved CSV file. + + The function first combines the shoreline and transect data into a DataFrame and then removes any columns + that contain only NaN values before saving to CSV. + """ cross_distance_df = get_cross_distance_df( extracted_shorelines, cross_distance_transects ) + cross_distance_df.dropna(axis="columns", how="all", inplace=True) filepath = os.path.join(save_path, filename) - if os.path.exists(filepath): - print(f"Overwriting:{filepath}") - os.remove(filepath) cross_distance_df.to_csv(filepath, sep=",") return filepath @@ -1630,29 +1646,43 @@ def create_csv_per_transect( save_path: str, cross_distance_transects: dict, extracted_shorelines_dict: dict, - filename: str = "_timeseries_raw.csv", -): - for key in cross_distance_transects.keys(): - df = pd.DataFrame() - out_dict = dict([]) - # copy shoreline intersects for each transect - out_dict[key] = cross_distance_transects[key] - out_dict["roi_id"] = [ - roi_id for _ in range(len(extracted_shorelines_dict["dates"])) - ] - out_dict["dates"] = extracted_shorelines_dict["dates"] - out_dict["satname"] = extracted_shorelines_dict["satname"] - # logger.info(f"out_dict : {out_dict}") - df = pd.DataFrame(out_dict) - df.index = df["dates"] - df.pop("dates") - # save to csv file session path - csv_filename = f"{key}{filename}" - fn = os.path.join(save_path, csv_filename) - if os.path.exists(fn): - logger.info(f"Overwriting:{fn}") - os.remove(fn) - df.to_csv(fn, sep=",") + file_extension: str = "_timeseries_raw.csv", +) -> None: + """ + Generates CSV files from transect and shoreline data. + + For each transect in cross_distance_transects, this function creates a CSV file if the transect contains + non-NaN values. The CSV includes dates, transect data, region of interest ID, and satellite name. + + Args: + - roi_id (str): ID for the region of interest. + - save_path (str): Path to save CSV files. + - cross_distance_transects (dict): Transect data with cross-distance measurements. + - extracted_shorelines_dict (dict): Contains 'dates' and 'satname'. + - file_extension (str, optional): File extension for CSV files. Default is "_timeseries_raw.csv". + + Notes: + - CSV files are named using transect keys and file_extension. + - Transects with only NaN values are skipped. + """ + for key, transect in cross_distance_transects.items(): + if pd.notna(transect).any(): # Check if there's any non-NaN value + # Create DataFrame directly + df = pd.DataFrame( + { + "dates": extracted_shorelines_dict["dates"], + key: transect, + "roi_id": [roi_id] * len(extracted_shorelines_dict["dates"]), + "satname": extracted_shorelines_dict["satname"], + }, + index=extracted_shorelines_dict["dates"], + ) + # Save to csv file + fn = f"{key}{file_extension}" + file_path = os.path.join(save_path, fn) + df.to_csv( + file_path, sep=",", index=False + ) # Set index=False if you don't want 'dates' as index in CSV def move_report_files( From fabd5709d1f06aa74b82b93c209aa1dcf5f2e878 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 13:54:39 -0800 Subject: [PATCH 77/87] #207 no longer save blank transect shoreline intersections + refactor --- src/coastseg/coastseg_map.py | 62 +++++++++++------------------------- src/coastseg/common.py | 6 ++-- 2 files changed, 22 insertions(+), 46 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 0fe968f5..0470e054 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -1544,52 +1544,28 @@ def save_session(self, roi_ids: list[str], save_transects: bool = True): # save transects to session folder if save_transects: - # Saves the cross distances of the transects & extracted shorelines to csv file within each ROI's directory - self.save_timeseries_csv(session_path, roi_id, self.rois) - self.save_csv_per_transect_for_roi(session_path, roi_id, self.rois) - - save_path = os.path.join(session_path, "transects_cross_distances.json") + # get extracted_shorelines from extracted shoreline object in rois + extracted_shorelines_dict = extracted_shoreline.dictionary + # if no shorelines were extracted then skip + if extracted_shorelines_dict == {}: + logger.info(f"No extracted shorelines for roi: {roi_id}") + continue cross_shore_distance = self.rois.get_cross_shore_distances(roi_id) - file_utilities.to_file(cross_shore_distance, save_path) + # if no cross distance was 0 then skip + if cross_shore_distance == 0: + print( + f"ROI: {roi_id} had no time-series of shoreline change along transects" + ) + logger.info(f"ROI: {roi_id} cross distance is 0") + continue - # save transect settings to file - transect_settings = common.get_transect_settings(self.get_settings()) - transect_settings_path = os.path.join( - session_path, "transects_settings.json" + common.save_transects( + roi_id, + session_path, + cross_shore_distance, + extracted_shorelines_dict, + self.get_settings(), ) - file_utilities.to_file(transect_settings, transect_settings_path) - - def save_csv_per_transect_for_roi( - self, session_path: str, roi_id: list, rois: ROI - ) -> None: - """Saves cross distances of transects and - extracted shorelines in ROI to csv file within each ROI's directory. - If no shorelines were extracted for an ROI then nothing is saved - Args: - roi_ids (list): list of roi ids - rois (ROI): ROI instance containing keys: - 'extracted_shorelines': extracted shoreline from roi - 'cross_distance_transects': cross distance of transects and extracted shoreline from roi - """ - # get extracted shorelines for this roi id - roi_extracted_shorelines = rois.get_extracted_shoreline(roi_id) - # if roi does not have extracted shoreline skip it - if roi_extracted_shorelines is None: - return - # get extracted_shorelines from extracted shoreline object in rois - extracted_shorelines_dict = roi_extracted_shorelines.dictionary - cross_distance_transects = rois.get_cross_shore_distances(roi_id) - logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines_dict}") - # if no cross distance was 0 then skip - if cross_distance_transects == 0: - return - # if no shorelines were extracted then skip - if extracted_shorelines_dict == {}: - return - # for each transect id in cross_distance_transects make a new csv file - common.create_csv_per_transect( - roi_id, session_path, cross_distance_transects, extracted_shorelines_dict - ) def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: """Saves cross distances of transects and diff --git a/src/coastseg/common.py b/src/coastseg/common.py index 62295a78..eff62107 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -761,7 +761,7 @@ def save_transects( save_location, cross_distance_transects, extracted_shorelines, - filename="_timeseries_raw.csv", + file_extension="_timeseries_raw.csv", ) save_transect_intersections( save_location, @@ -1576,7 +1576,7 @@ def save_transect_intersections( """ Saves the saves the dates from the extracted shorelines to the dictionart containing the cross distance transect intersections to a CSV file. - This function processes intersection data between shorelines and transects, removing columns with all NaN values. + This function processes intersection data between shorelines and transects, removing columns with all NaN values. It then saves the processed data to a CSV file at the specified path. Args: @@ -1588,7 +1588,7 @@ def save_transect_intersections( Returns: - str: The full file path of the saved CSV file. - The function first combines the shoreline and transect data into a DataFrame and then removes any columns + The function first combines the shoreline and transect data into a DataFrame and then removes any columns that contain only NaN values before saving to CSV. """ cross_distance_df = get_cross_distance_df( From 5ca7d3c70d2feca4a4d77cfb1a53787e24d15c20 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 15:04:59 -0800 Subject: [PATCH 78/87] #207 preview imagery only shows tier 1 now --- src/coastseg/coastseg_map.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 0470e054..1af46ccd 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -566,11 +566,13 @@ def preview_available_images(self): for roi_id in tqdm(self.selected_set, desc="Processing", leave=False): polygon = common.get_roi_polygon(self.rois.gdf, roi_id) if polygon: + # only get the imagery in tier 1 images_count = count_images_in_ee_collection( polygon, start_date, end_date, satellites=set(self.settings["sat_list"]), + tiers=[1], ) satellite_messages = [f"\nROI ID: {roi_id}"] for sat in self.settings["sat_list"]: From 17f8ca9b8d2e18c9b1b93e57c302f3efccfcc240 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 15:08:27 -0800 Subject: [PATCH 79/87] #207 count_images_in_ee_collection tiers =[ 1] --- src/coastseg/downloads.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coastseg/downloads.py b/src/coastseg/downloads.py index 111a0015..9629ce77 100644 --- a/src/coastseg/downloads.py +++ b/src/coastseg/downloads.py @@ -28,7 +28,6 @@ logger = logging.getLogger(__name__) - def get_collection_by_tier( polygon: List[List[float]], start_date: Union[str, datetime], @@ -109,6 +108,7 @@ def count_images_in_ee_collection( end_date: Union[str, datetime], max_cloud_cover: float = 95, satellites: Collection[str] = ("L5", "L7", "L8", "L9", "S2"), + tiers: list[str] = None, ) -> dict: """ Count the number of images in specified satellite collections over a certain area and time period. @@ -119,7 +119,7 @@ def count_images_in_ee_collection( end_date (str or datetime): The end date of the time period. If a string, it should be in 'YYYY-MM-DD' format. max_cloud_cover (float, optional): The maximum cloud cover percentage. Images with a cloud cover percentage higher than this will be excluded. Defaults to 99. satellites (Collection[str], optional): A collection of satellite names. The function will return image counts for these satellites. Defaults to ("L5","L7","L8","L9","S2"). - + tiers (list[str], optional): A list of tiers. The function will return image counts for these tiers. Defaults to [1,2] Returns: dict: A dictionary where the keys are the satellite names and the values are the image counts. @@ -153,11 +153,14 @@ def count_images_in_ee_collection( except: ee.Initialize() + if tiers is None: + tiers = [1, 2] + image_counts = {} images_in_tier_count = 0 for satellite in satellites: images_in_tier_count = 0 - for tier in [1, 2]: + for tier in tiers: collection = get_collection_by_tier( polygon, start_date, end_date, satellite, tier, max_cloud_cover ) From 77ae905391e6bbf3f3e90a15a667687d9d96696f Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 15:42:14 -0800 Subject: [PATCH 80/87] #207 fix str and repr to shoreten geom for shoreline/extracted shoreline --- src/coastseg/extracted_shoreline.py | 22 +++++++++++++++++----- src/coastseg/shoreline.py | 28 ++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 1e1d23b2..3b7f76d0 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1510,20 +1510,32 @@ def __init__( def __str__(self): # Get column names and their data types col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() - # Get first 5 rows as a string - first_rows = self.gdf.head().to_string() + # Get first 3 rows as a string + first_rows = self.gdf + geom_str = "" + if isinstance(self.gdf, gpd.GeoDataFrame): + if "geometry" in self.gdf.columns: + first_rows = self.gdf.head(3).drop(columns="geometry").to_string() + if not self.gdf.empty: + geom_str = str(self.gdf.iloc[0]["geometry"])[:100] + "...)" # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" - return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 3 Rows:\n{first_rows}\n geometry: {geom_str}" def __repr__(self): # Get column names and their data types col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() # Get first 5 rows as a string - first_rows = self.gdf.head().to_string() + first_rows = self.gdf + geom_str = "" + if isinstance(self.gdf, gpd.GeoDataFrame): + if "geometry" in self.gdf.columns: + first_rows = self.gdf.head(3).drop(columns="geometry").to_string() + if not self.gdf.empty: + geom_str = str(self.gdf.iloc[0]["geometry"])[:100] + "...)" # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" - return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 5 Rows:\n{first_rows}" + return f"Extracted Shoreline:\nROI ID: {self.roi_id}\ngdf:\n{crs_info}\nColumns and Data Types:\n{col_info}\n\nFirst 3 Rows:\n{first_rows}\n geometry: {geom_str}" def get_roi_id(self) -> Optional[str]: """ diff --git a/src/coastseg/shoreline.py b/src/coastseg/shoreline.py index 9bb86b95..c6acafc3 100644 --- a/src/coastseg/shoreline.py +++ b/src/coastseg/shoreline.py @@ -82,24 +82,36 @@ def filename(self, value): def __str__(self): # Get column names and their data types col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() - # Get first 5 rows as a string - first_rows = self.gdf.head().to_string() + # Get first 3 rows as a string + first_rows = self.gdf + geom_str = "" + if isinstance(self.gdf, gpd.GeoDataFrame): + if "geometry" in self.gdf.columns: + first_rows = self.gdf.head(3).drop(columns="geometry").to_string() + if not self.gdf.empty: + geom_str = str(self.gdf.iloc[0]["geometry"])[:100] + "...)" # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" if "id" in self.gdf.columns: - transect_ids = self.gdf["id"].astype(str) - return f"Shoreline:\nself.gdf:\n\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{transect_ids}" + ids = self.gdf["id"].astype(str) + return f"Shoreline:\nself.gdf:\n\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 3 Rows:\n{first_rows}\n geometry: {geom_str}\nIDs:\n{ids}" def __repr__(self): # Get column names and their data types col_info = self.gdf.dtypes.apply(lambda x: x.name).to_string() - # Get first 5 rows as a string - first_rows = self.gdf.head().to_string() + # Get first 3 rows as a string + first_rows = self.gdf + geom_str = "" + if isinstance(self.gdf, gpd.GeoDataFrame): + if "geometry" in self.gdf.columns: + first_rows = self.gdf.head(3).drop(columns="geometry").to_string() + if not self.gdf.empty: + geom_str = str(self.gdf.iloc[0]["geometry"])[:100] + "...)" # Get CRS information crs_info = f"CRS: {self.gdf.crs}" if self.gdf.crs else "CRS: None" if "id" in self.gdf.columns: - transect_ids = self.gdf["id"].astype(str) - return f"Shoreline:\nself.gdf:\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 5 Rows:\n{first_rows}\nIDs:\n{transect_ids}" + ids = self.gdf["id"].astype(str) + return f"Shoreline:\nself.gdf:\n\n{crs_info}\n- Columns and Data Types:\n{col_info}\n\n- First 3 Rows:\n{first_rows}\n geometry: {geom_str}\nIDs:\n{ids}" def initialize_shorelines( self, From 1ef7439cbda4f092f0a98ed9e225382ab3875191 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 20:35:23 -0800 Subject: [PATCH 81/87] #207 remove logger from bbox --- src/coastseg/bbox.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coastseg/bbox.py b/src/coastseg/bbox.py index 47b78746..851c6c20 100644 --- a/src/coastseg/bbox.py +++ b/src/coastseg/bbox.py @@ -11,8 +11,6 @@ from shapely.geometry import shape from ipyleaflet import GeoJSON -logger = logging.getLogger(__name__) - __all__ = ["Bounding_Box"] From 741ef2806a28cb143712a1dcc1b3acb432b3afaa Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 20:36:09 -0800 Subject: [PATCH 82/87] #207 include_T2 = false add to create_roi_settings --- src/coastseg/common.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index eff62107..ac97478b 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -282,7 +282,7 @@ def get_satellite_name(filename: str): try: return filename.split("_")[2].split(".")[0] except IndexError: - # logger.error(f"Unable to extract satellite name from filename: {filename}") + logger.error(f"Unable to extract satellite name from filename: {filename}") return None @@ -1259,7 +1259,6 @@ def download_url(url: str, save_path: str, filename: str = None, chunk_size: int if content_length: total_length = int(content_length) else: - # Content-Length not available logger.warning("Content length not found in response headers") total_length = None @@ -1723,16 +1722,13 @@ def move_report_files( def save_extracted_shoreline_figures(settings: dict, save_path: str): """ - Save extracted shoreline figures to a specified save path. + Save extracted shoreline figures to the specified save path. - The function first constructs the path to the extracted shoreline figures - and checks if the path exists. If the path exists, it moves the files to a - new directory specified by save_path. - - :param extracted_shorelines:An Extracted_Shoreline object containing the extracted shorelines and shoreline settings. - :param save_path: The path where the output figures will be saved. + Args: + settings (dict): A dictionary containing the settings for the extraction process. + save_path (str): The path where the extracted shoreline figures will be saved. """ - # Attempt to get the data_path and sitename + # Get the data_path and sitename from the settings data_path = settings.get("filepath") or settings.get("inputs", {}).get("filepath") sitename = settings.get("sitename") or settings.get("inputs", {}).get("sitename") @@ -1748,7 +1744,7 @@ def save_extracted_shoreline_figures(settings: dict, save_path: str): if os.path.exists(extracted_shoreline_figure_path): dst_path = os.path.join(save_path, "jpg_files", "detection") - logger.info(f"dst_path : {dst_path }") + logger.info(f"Moving extracted shoreline figures to : {dst_path }") file_utilities.move_files( extracted_shoreline_figure_path, dst_path, delete_src=True ) @@ -2197,6 +2193,7 @@ def create_roi_settings( "landsat_collection": landsat_collection, "sitename": sitename, "filepath": filepath, + "include_T2": False, } roi_settings[roi_id] = inputs_dict return roi_settings From e027792260a3a2681d44ea0cd651825fa73a44e0 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Thu, 14 Dec 2023 21:06:24 -0800 Subject: [PATCH 83/87] #207 refactor saving transects to be same in both workflows + refactoring --- src/coastseg/coastseg_map.py | 292 ++++++----------------------------- src/coastseg/zoo_model.py | 1 - 2 files changed, 51 insertions(+), 242 deletions(-) diff --git a/src/coastseg/coastseg_map.py b/src/coastseg/coastseg_map.py index 1af46ccd..95079105 100644 --- a/src/coastseg/coastseg_map.py +++ b/src/coastseg/coastseg_map.py @@ -10,7 +10,6 @@ import traceback # Third-party imports -import pandas as pd import geopandas as gpd from ipyleaflet import DrawControl, LayersControl, WidgetControl, GeoJSON from leafmap import Map @@ -210,6 +209,7 @@ def compute_tidal_corrections( def load_metadata(self, settings: dict = {}, ids: Collection = set([])): """ Loads metadata either based on user-provided settings or a collection of ROI IDs. + This also creates a metadata file for each ROI in the data directory. This method either takes in a dictionary with site-specific settings to load metadata for a particular site, or iterates over a collection of ROI IDs to load their respective @@ -603,7 +603,6 @@ def download_imagery(self) -> None: # selected_layer contains the selected ROIs selected_layer = self.map.find_layer(ROI.SELECTED_LAYER_NAME) - logger.info(f"selected_layer: {selected_layer}") # Create a list of download settings for each ROI roi_settings = common.create_roi_settings( settings, selected_layer.data, file_path, date_str @@ -794,7 +793,7 @@ def save_config(self, filepath: str = None) -> None: self.rois.set_roi_settings(roi_settings) # create dictionary of settings for each ROI to be saved to config.json - roi_ids = self.get_selected_roi_ids() + roi_ids = self.get_roi_ids(is_selected=True) selected_roi_settings = { roi_id: self.rois.roi_settings[roi_id] for roi_id in roi_ids } @@ -806,7 +805,8 @@ def save_config(self, filepath: str = None) -> None: ) transects_gdf = getattr(self.transects, "gdf", None) if self.transects else None bbox_gdf = getattr(self.bbox, "gdf", None) if self.bbox else None - selected_rois = self.get_selected_rois(roi_ids) + # get the geodataframe containing all the selected rois + selected_rois = self.rois.gdf[self.rois.gdf["id"].isin(roi_ids)] logger.info(f"selected_rois: {selected_rois}") # save all selected rois, shorelines, transects and bbox to config geodataframe @@ -928,7 +928,6 @@ def get_settings(self): SETTINGS_NOT_FOUND = ( "No settings found. Click save settings or load a config file." ) - logger.info(f"self.settings: {self.settings}") if self.settings is None or self.settings == {}: raise Exception(SETTINGS_NOT_FOUND) return self.settings @@ -1094,20 +1093,25 @@ def get_all_roi_ids(self) -> List[str]: return [] if self.rois.gdf.empty: return [] + if not hasattr(self.rois, "gdf"): + return [] + if "id" not in self.rois.gdf.columns: + return [] return self.rois.gdf["id"].tolist() - def load_extracted_shoreline_files(self) -> None: - exception_handler.config_check_if_none(self.rois, "ROIs") - # if no rois are selected throw an error - # exception_handler.check_selected_set(self.selected_set) - roi_ids = self.get_selected_roi_ids() + def get_any_available_roi_id(self) -> List[str]: + roi_ids = self.get_roi_ids(is_selected=True) if roi_ids == []: roi_ids = self.get_all_roi_ids() if roi_ids == []: - raise Exception("No ROIs found. Please load ROIs.") + return roi_ids roi_ids = roi_ids[0] - logger.info(f"roi_ids: {roi_ids}") - # logger.info(f"self.rois.roi_settings: {self.rois.roi_settings}") + return roi_ids + + def load_extracted_shoreline_files(self) -> None: + exception_handler.config_check_if_none(self.rois, "ROIs") + # load extracted shorelines for either a selected ROI or the first ROI if no ROI is selected + roi_ids = self.get_any_available_roi_id() # set of roi ids that didn't have missing shorelines rois_no_extracted_shorelines = set() # for each ROI that has extracted shorelines load onto map @@ -1130,18 +1134,8 @@ def load_extracted_shoreline_files(self) -> None: extracted_shoreline_dict = file_utilities.load_data_from_json( file ) - - # logger.info(f"ROI {roi_id} extracted_sl_gdf: {extracted_sl_gdf}") - # logger.info(f"ROI {roi_id} shoreline_settings: {shoreline_settings}") - # logger.info( - # f"ROI {roi_id} extracted_shoreline_dict: {extracted_shoreline_dict}" - # ) - # error handling for none - if ( - extracted_sl_gdf is None - or extracted_sl_gdf is None - or extracted_sl_gdf is None - ): + # If any of the extracted shoreline files are missing, skip to next ROI + if extracted_sl_gdf is None: logger.info( f"ROI {roi_id} didn't have extracted shoreline files to load" ) @@ -1156,7 +1150,7 @@ def load_extracted_shoreline_files(self) -> None: ) self.rois.add_extracted_shoreline(extracted_shorelines, roi_id) logger.info( - f"ROI {roi_id} successfully loaded extracted shorelines: {self.rois.get_extracted_shoreline(roi_id).dictionary}" + f"ROI {roi_id} successfully loaded extracted shorelines: {self.rois.get_extracted_shoreline(roi_id)}" ) if len(rois_no_extracted_shorelines) > 0: @@ -1290,7 +1284,7 @@ def validate_extract_shoreline_inputs(self): ) # raise error if selected rois were not downloaded exception_handler.check_if_rois_downloaded( - self.rois.roi_settings, self.get_selected_roi_ids() + self.rois.roi_settings, self.get_roi_ids(is_selected=True) ) def validate_download_imagery_inputs(self): @@ -1306,13 +1300,24 @@ def validate_download_imagery_inputs(self): exception_handler.check_empty_layer(selected_layer, ROI.SELECTED_LAYER_NAME) exception_handler.check_empty_roi_layer(selected_layer) - def get_roi_ids_with_extracted_shorelines(self, is_selected: bool = True) -> list: - # ids of ROIs that have had their shorelines extracted - roi_ids = set(self.rois.get_ids_with_extracted_shorelines()) - logger.info(f"extracted_shoreline_ids:{roi_ids}") - # Get ROI ids that are selected on map and have had their shorelines extracted + def get_roi_ids( + self, is_selected: bool = True, has_shorelines: bool = False + ) -> list: + """ + Get the IDs of the regions of interest (ROIs) that meet the specified criteria. + + Args: + is_selected (bool, optional): Whether to consider only the selected ROIs on the map. Defaults to True. + has_shorelines (bool, optional): Whether to consider only the ROIs that have extracted shorelines. Defaults to False. + + Returns: + list: The IDs of the ROIs that meet the specified criteria. + """ + roi_ids = self.get_all_roi_ids() + if has_shorelines: + roi_ids = set(self.rois.get_ids_with_extracted_shorelines()) if is_selected: - roi_ids = list(roi_ids & self.selected_set) + roi_ids = list(set(roi_ids) & self.selected_set) return roi_ids def extract_all_shorelines(self) -> None: @@ -1321,9 +1326,8 @@ def extract_all_shorelines(self) -> None: download_imagery() and extracts a shoreline for each of them """ self.validate_extract_shoreline_inputs() - roi_ids = self.get_selected_roi_ids() + roi_ids = self.get_roi_ids(is_selected=True) logger.info(f"roi_ids to extract shorelines from: {roi_ids}") - # update the settings with the most accurate epsg self.update_settings_with_accurate_epsg() # update configs with new output_epsg @@ -1338,8 +1342,8 @@ def extract_all_shorelines(self) -> None: self.rois.add_extracted_shoreline(extracted_shorelines, roi_id) # save the ROI IDs that had extracted shoreline to observable variable roi_ids_with_extracted_shorelines - ids_with_extracted_shorelines = self.get_roi_ids_with_extracted_shorelines( - is_selected=False + ids_with_extracted_shorelines = self.get_roi_ids( + is_selected=False, has_shorelines=True ) if ids_with_extracted_shorelines is None: self.id_container.ids = [] @@ -1352,24 +1356,12 @@ def extract_all_shorelines(self) -> None: self.save_session(roi_ids, save_transects=False) # Get ROI ids that are selected on map and have had their shorelines extracted - roi_ids = self.get_roi_ids_with_extracted_shorelines(is_selected=True) - self.compute_transects(self.transects.gdf, self.get_settings(), roi_ids) + roi_ids = self.get_roi_ids(is_selected=True, has_shorelines=True) + if hasattr(self.transects, "gdf"): + self.compute_transects(self.transects.gdf, self.get_settings(), roi_ids) # load extracted shorelines to map self.load_extracted_shorelines_to_map() - def get_selected_rois(self, roi_ids: list) -> gpd.GeoDataFrame: - """Returns a geodataframe of all rois whose ids are in given list - roi_ids. - - Args: - roi_ids (list[str]): ids of ROIs - - Returns: - gpd.GeoDataFrame: geodataframe of all rois selected by the roi_ids - """ - selected_rois_gdf = self.rois.gdf[self.rois.gdf["id"].isin(roi_ids)] - return selected_rois_gdf - def get_cross_distance( self, roi_id: str, @@ -1426,45 +1418,6 @@ def get_cross_distance( return cross_distance, failure_reason - def save_timeseries_csv(self, session_path: str, roi_id: str, rois: ROI) -> None: - """Saves cross distances of transects and - extracted shorelines in ROI to csv file within each ROI's directory. - If no shorelines were extracted for an ROI then nothing is saved - Args: - roi_ids (list): list of roi ids - rois (ROI): ROI instance containing keys: - 'extracted_shorelines': extracted shoreline from roi - 'cross_distance_transects': cross distance of transects and extracted shoreline from roi - """ - roi_extracted_shorelines = rois.get_extracted_shoreline(roi_id) - # if roi does not have extracted shoreline skip it - if roi_extracted_shorelines is None: - logger.info(f"No extracted shorelines for roi: {roi_id}") - return - # get extracted_shorelines from extracted shoreline object in rois - extracted_shorelines = roi_extracted_shorelines.dictionary - # if no shorelines were extracted then skip - if extracted_shorelines == {}: - logger.info(f"No extracted shorelines for roi: {roi_id}") - return - cross_distance_transects = rois.get_cross_shore_distances(roi_id) - - # logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") - # if no cross distance was 0 then skip - if cross_distance_transects == 0: - print( - f"ROI: {roi_id} cross distance is 0 will not have time-series of shoreline change along transects " - ) - logger.info(f"ROI: {roi_id} cross distance is 0") - return - # saves all transects in a single directory - filepath = common.save_transect_intersections( - session_path, extracted_shorelines, cross_distance_transects - ) - print( - f"ROI: {roi_id} Time-series of the shoreline change along the transects saved as:{filepath}" - ) - def compute_transects( self, transects_gdf: gpd.GeoDataFrame, settings: dict, roi_ids: list[str] ) -> dict: @@ -1569,137 +1522,6 @@ def save_session(self, roi_ids: list[str], save_transects: bool = True): self.get_settings(), ) - def save_csv_per_transect(self, roi_ids: list, rois: ROI) -> None: - """Saves cross distances of transects and - extracted shorelines in ROI to csv file within each ROI's directory. - If no shorelines were extracted for an ROI then nothing is saved - Args: - roi_ids (list): list of roi ids - rois (ROI): ROI instance containing keys: - 'extracted_shorelines': extracted shoreline from roi - 'roi_settings': must have keys 'filepath' and 'sitename' - 'cross_distance_transects': cross distance of transects and extracted shoreline from roi - """ - # set of roi ids that have add their transects successfully computed - rois_computed_transects = set() - for roi_id in tqdm(roi_ids, desc="Saving csv for each transect for ROIs"): - roi_extracted_shorelines = rois.get_extracted_shoreline(roi_id) - # if roi does not have extracted shoreline skip it - if roi_extracted_shorelines is None: - logger.info(f"ROI: {roi_id} had no extracted shorelines ") - continue - - # get extracted_shorelines from extracted shoreline object in rois - extracted_shorelines_dict = roi_extracted_shorelines.dictionary - cross_distance_transects = rois.get_cross_shore_distances(roi_id) - logger.info( - f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines_dict}" - ) - # if no cross distance was 0 then skip - if cross_distance_transects == 0: - logger.info(f"ROI: {roi_id} cross distance is 0") - continue - # if no shorelines were extracted then skip - if extracted_shorelines_dict == {}: - logger.info(f"ROI: {roi_id} had no extracted shorelines ") - continue - - # for each transect id in cross_distance_transects make a new csv file - for key in cross_distance_transects.keys(): - df = pd.DataFrame() - out_dict = dict([]) - # copy shoreline intersects for each transect - out_dict[key] = cross_distance_transects[key] - out_dict["roi_id"] = [ - roi_id for _ in range(len(extracted_shorelines_dict["dates"])) - ] - out_dict["dates"] = extracted_shorelines_dict["dates"] - out_dict["satname"] = extracted_shorelines_dict["satname"] - df = pd.DataFrame(out_dict) - df.index = df["dates"] - df.pop("dates") - - # Save extracted shoreline info to session directory - session_name = self.get_session_name() - session_path = os.path.join(os.getcwd(), "sessions", session_name) - ROI_directory = rois.roi_settings[roi_id]["sitename"] - session_path = file_utilities.create_directory( - session_path, ROI_directory - ) - # save source data - self.save_config(session_path) - # save to csv file session path - fn = os.path.join(session_path, "%s_timeseries_raw.csv" % key) - if os.path.exists(fn): - os.remove(fn) - df.to_csv(fn, sep=",") - rois_computed_transects.add(roi_id) - print(f"Computed transects for the following ROIs: {rois_computed_transects}") - - def save_cross_distance_to_file(self, roi_ids: list, rois: ROI) -> None: - """Saves cross distances of transects and - extracted shorelines in ROI to csv file within each ROI's directory. - If no shorelines were extracted for an ROI then nothing is saved - Args: - roi_ids (list): list of roi ids - rois (ROI): ROI instance containing keys: - 'extracted_shorelines': extracted shoreline from roi - 'roi_settings': must have keys 'filepath' and 'sitename' - 'cross_distance_transects': cross distance of transects and extracted shoreline from roi - """ - for roi_id in tqdm(roi_ids, desc="Saving ROI cross distance transects"): - roi_extracted_shorelines = rois.get_extracted_shoreline(roi_id) - # if roi does not have extracted shoreline skip it - if roi_extracted_shorelines is None: - print( - f"ROI: {roi_id} had no extracted shorelines and therfore has no time-series of shoreline change along transects" - ) - logger.info( - f"ROI: {roi_id} had no extracted shorelines.ROI: {roi_id} will not have time-series of shoreline change along transects." - ) - continue - # get extracted_shorelines from extracted shoreline object in rois - extracted_shorelines = roi_extracted_shorelines.dictionary - # if no shorelines were extracted then skip - if extracted_shorelines == {}: - print( - f"ROI: {roi_id} had no extracted shorelines and will not have time-series of shoreline change along transects " - ) - logger.info(f"ROI: {roi_id} had no extracted shorelines ") - continue - - cross_distance_transects = rois.get_cross_shore_distances(roi_id) - # logger.info(f"ROI: {roi_id} extracted_shorelines : {extracted_shorelines}") - # if no cross distance was 0 then skip - if cross_distance_transects == 0: - print( - f"ROI: {roi_id} cross distance is 0 will not have time-series of shoreline change along transects " - ) - logger.info(f"ROI: {roi_id} cross distance is 0") - continue - - cross_distance_df = common.get_cross_distance_df( - extracted_shorelines, cross_distance_transects - ) - # logger.info(f"ROI: {roi_id} cross_distance_df : {cross_distance_df}") - - # Save extracted shoreline info to session directory - session_name = self.get_session_name() - ROI_directory = rois.roi_settings[roi_id]["sitename"] - session_path = os.path.join(os.getcwd(), "sessions", session_name) - session_path = file_utilities.create_directory(session_path, ROI_directory) - # save source data - self.save_config(session_path) - - filepath = os.path.join(session_path, "transect_time_series.csv") - if os.path.exists(filepath): - os.remove(filepath) - cross_distance_df.to_csv(filepath, sep=",") - print(f"ROI: {roi_id} time-series of shoreline change along transects") - print( - f"Time-series of the shoreline change along the transects saved as:{filepath}" - ) - def remove_all(self): """Remove the bbox, shoreline, all rois from the map""" self.remove_bbox() @@ -1871,14 +1693,6 @@ def handle_draw( if self.draw_control.last_action == "deleted": self.remove_bbox() - def get_selected_roi_ids(self) -> list: - """Gets the ids of the selected rois - - Returns: - list: list of ids of selected rois - """ - return list(self.selected_set) - def load_extracted_shoreline_by_id(self, selected_id: str, row_number: int = 0): """ Loads extracted shorelines onto a map for a single region of interest specified by its ID. @@ -1892,9 +1706,9 @@ def load_extracted_shoreline_by_id(self, selected_id: str, row_number: int = 0): # get the extracted shorelines for the selected roi if self.rois is not None: extracted_shorelines = self.rois.get_extracted_shoreline(selected_id) - logger.info( - f"ROI ID { selected_id} extracted shorelines {extracted_shorelines}" - ) + # logger.info( + # f"ROI ID { selected_id} extracted shorelines {extracted_shorelines}" + # ) # if extracted shorelines exist, load them onto map, if none exist nothing loads self.load_extracted_shorelines_on_map(extracted_shorelines, row_number) @@ -1979,13 +1793,6 @@ def load_extracted_shorelines_on_map( "radius": 1, }, ) - # layer_name = extracted_shorelines.get_layer_name() - # logger.info( - # f"Extracted shoreline layer: {new_layer}\n" - # f"Layer name: {layer_name}\n" - # f"Extracted shoreline layers: {new_layer}\n" - # ) - # new_layer.on_hover(self.update_extracted_shoreline_html) self.map.add_layer(new_layer) # update the extracted shoreline layer and number of shorelines available self.extract_shorelines_container.geo_data = new_layer @@ -2274,20 +2081,23 @@ def save_feature_to_file( Default value is an empty string. """ exception_handler.can_feature_save_to_file(feature, feature_type) + logger.info(f"Saving feature type({feature}) to file") if isinstance(feature, ROI): # raise exception if no rois were selected exception_handler.check_selected_set(self.selected_set) + # save only the selected ROIs to file feature.gdf[feature.gdf["id"].isin(self.selected_set)].to_file( feature.filename, driver="GeoJSON" ) print(f"Saved selected ROIs to {feature.filename}") + logger.info(f"Save {feature.LAYER_NAME} to {feature.filename}") else: - logger.info(f"Saving feature type( {feature}) to file") if hasattr(feature, "gdf"): feature.gdf.to_file(feature.filename, driver="GeoJSON") print(f"Save {feature.LAYER_NAME} to {feature.filename}") logger.info(f"Save {feature.LAYER_NAME} to {feature.filename}") else: + logger.warning(f"Empty {feature.LAYER_NAME} cannot be saved to file") print(f"Empty {feature.LAYER_NAME} cannot be saved to file") def convert_selected_set_to_geojson( diff --git a/src/coastseg/zoo_model.py b/src/coastseg/zoo_model.py index f6964385..80a09b7c 100644 --- a/src/coastseg/zoo_model.py +++ b/src/coastseg/zoo_model.py @@ -23,7 +23,6 @@ import tqdm from PIL import Image import numpy as np -import pandas as pd from glob import glob import tqdm.asyncio import nest_asyncio From 94e0ed17b85c5a58d5590012dadde5dbb6a3a680 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 15 Dec 2023 07:14:15 -0800 Subject: [PATCH 84/87] v1.1.23.dev1 comment out remove_inaccurate_georef --- pyproject.toml | 2 +- src/coastseg/extracted_shoreline.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a834d124..d2fdce02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.22" +version = "1.1.23.dev1" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 3b7f76d0..fa0ed65e 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1790,9 +1790,10 @@ def create_extracted_shorelines_from_session( extracted_shorelines_dict = remove_duplicates( extracted_shorelines_dict ) # removes duplicates (images taken on the same date by the same satellite) - extracted_shorelines_dict = remove_inaccurate_georef( - extracted_shorelines_dict, 10 - ) # remove inaccurate georeferencing (set threshold to 10 m) + # @TODO: uncomment this for the standard release + # extracted_shorelines_dict = remove_inaccurate_georef( + # extracted_shorelines_dict, 10 + # ) # remove inaccurate georeferencing (set threshold to 10 m) # Check and log 'reference shoreline' if it exists ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) From 7d4a60e2fb41c318c5818df0de7445a0d702038d Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 15 Dec 2023 07:30:23 -0800 Subject: [PATCH 85/87] v1.1.23 close #207 uncomment remove_inaccurate_georef --- pyproject.toml | 2 +- src/coastseg/extracted_shoreline.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d2fdce02..723ea62b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.23.dev1" +version = "1.1.23" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index fa0ed65e..33c7ee0e 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1789,11 +1789,10 @@ def create_extracted_shorelines_from_session( # postprocessing by removing duplicates and removing in inaccurate georeferencing (set threshold to 10 m) extracted_shorelines_dict = remove_duplicates( extracted_shorelines_dict - ) # removes duplicates (images taken on the same date by the same satellite) - # @TODO: uncomment this for the standard release - # extracted_shorelines_dict = remove_inaccurate_georef( - # extracted_shorelines_dict, 10 - # ) # remove inaccurate georeferencing (set threshold to 10 m) + ) # removes duplicates (images taken on the same date by the same satellite + extracted_shorelines_dict = remove_inaccurate_georef( + extracted_shorelines_dict, 10 + ) # remove inaccurate georeferencing (set threshold to 10 m) # Check and log 'reference shoreline' if it exists ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) From 8275beb242c56e32016b7e297563b23d8f7090a3 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 15 Dec 2023 07:48:28 -0800 Subject: [PATCH 86/87] v1.1.23dev2 comment out both remove_inaccurate_georef --- pyproject.toml | 2 +- src/coastseg/extracted_shoreline.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 723ea62b..2aa312ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "coastseg" dynamic = ["readme"] -version = "1.1.23" +version = "1.1.23dev2" authors = [ { name=" Sharon Fitzpatrick", email="sharon.fitzpatrick23@gmail.com" }, ] diff --git a/src/coastseg/extracted_shoreline.py b/src/coastseg/extracted_shoreline.py index 33c7ee0e..9ec3480f 100644 --- a/src/coastseg/extracted_shoreline.py +++ b/src/coastseg/extracted_shoreline.py @@ -1790,9 +1790,9 @@ def create_extracted_shorelines_from_session( extracted_shorelines_dict = remove_duplicates( extracted_shorelines_dict ) # removes duplicates (images taken on the same date by the same satellite - extracted_shorelines_dict = remove_inaccurate_georef( - extracted_shorelines_dict, 10 - ) # remove inaccurate georeferencing (set threshold to 10 m) + # extracted_shorelines_dict = remove_inaccurate_georef( + # extracted_shorelines_dict, 10 + # ) # remove inaccurate georeferencing (set threshold to 10 m) # Check and log 'reference shoreline' if it exists ref_sl = extracted_shorelines_dict.get("shorelines", np.array([])) @@ -1940,9 +1940,9 @@ def extract_shorelines( extracted_shorelines = remove_duplicates( extracted_shorelines ) # removes duplicates (images taken on the same date by the same satellite) - extracted_shorelines = remove_inaccurate_georef( - extracted_shorelines, 10 - ) # remove inaccurate georeferencing (set threshold to 10 m) + # extracted_shorelines = remove_inaccurate_georef( + # extracted_shorelines, 10 + # ) # remove inaccurate georeferencing (set threshold to 10 m) return extracted_shorelines def create_shoreline_settings( From 827356b8d5a795e5c7eeca6c0b96f3b3bafd6724 Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 15 Dec 2023 08:01:54 -0800 Subject: [PATCH 87/87] only print settings keys --- src/coastseg/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coastseg/common.py b/src/coastseg/common.py index ac97478b..ebf10188 100644 --- a/src/coastseg/common.py +++ b/src/coastseg/common.py @@ -105,7 +105,9 @@ def load_settings( if isinstance(keys, set): keys = list(keys) new_settings = file_utilities.read_json_file(filepath, raise_error=False) - logger.info(f"all of new settings read from file : {filepath} \n {new_settings}") + logger.info( + f"all of new settings read from file : {filepath} \n {new_settings.keys()}" + ) # if no keys are passed then use all of the keys in the settings file if not keys: keys = new_settings.keys() @@ -115,7 +117,7 @@ def load_settings( nested_settings = new_settings.get("settings", {}) nested_settings = {k: nested_settings[k] for k in keys if k in nested_settings} logger.info( - f"all of new nested settings read from file : {filepath} \n {nested_settings }" + f"all of new nested settings read from file : {filepath} \n {new_settings.keys()}" ) # combine the settings into one dictionary WARNING this could overwrite items in both settings filtered_settings.update(**nested_settings)