diff --git a/scripts/wet_bulb/generate_wet_bulb_events.py b/scripts/wet_bulb/generate_wet_bulb_events.py new file mode 100644 index 000000000..688b13da2 --- /dev/null +++ b/scripts/wet_bulb/generate_wet_bulb_events.py @@ -0,0 +1,88 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to generate WetBulb events using the events pipeline. + +The events pipeline is setup with the following stages: + 1. earthengine: + It uses the NASA/GSFC/MERRA/slv/2 data set from EarthEngine to get regions + with high wetbulb temperature as a geoTif per day. + + 2. raster_csv + The geoTif is converted into a CSV with each pixel with regions with high + wet bulb temperature converted into an S2 cell of level 10 + using the raster_to_csv.py utilities. + + 3. events + The resulting CSV with s2 cells is aggregated into events using + the process_events.py. Neighbouring s2 cells with high wet-bulb temperatures + are collated into a single event. + s2 cells with web bulb temperature over successive days are also + added into the same event. + + Once the regions with wet bulb temperature collated into events, + the process_events.py utilities also generate a CSV with + StatVar Observations for Area_WetBulbTemperatureEvent and + Count_WetBulbTemperatureEvent for each events s2 cells at level 10, and places it is + contained in, such as AdministrativeArea, Country, Continent and Earth. + + The generated files including the geoTif, CSVs with S2 cells and events are saved + in GCS. + + The script runs once a month to update wetbulb events for the current year. +""" + +import os +import re +import sys +import time + +from absl import app +from absl import flags +from absl import logging + +_SCRIPTS_DIR = os.path.dirname(__file__) +sys.path.append(_SCRIPTS_DIR) +sys.path.append(os.path.dirname(_SCRIPTS_DIR)) +sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR))) +sys.path.append(os.path.join(os.path.dirname(_SCRIPTS_DIR), 'earthengine')) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)), 'util')) + +flags.DEFINE_string( + 'wet_bulb_pipeline_config', + os.path.join(_SCRIPTS_DIR, 'wet_bulb_events_pipeline_config.py'), + 'Config for the pipeline as a py dictionary of json') + +flags.DEFINE_list( + 'wet_bulb_pipeline_stages', [], + 'List of stages in the wetbulb events pipeline config to be run.') + +_FLAGS = flags.FLAGS + +import file_util + +from config_map import ConfigMap +from events_pipeline import EventPipeline + + +def main(_): + config = ConfigMap(filename=_FLAGS.wet_bulb_pipeline_config) + if _FLAGS.start_date: + config.get('defaults', {})['start_date'] = _FLAGS.start_date + pipeline = EventPipeline(config=config) + pipeline.run(run_stages=_FLAGS.wet_bulb_pipeline_stages) + + +if __name__ == '__main__': + app.run(main) diff --git a/scripts/wet_bulb/wet_bulb_events.tmcf b/scripts/wet_bulb/wet_bulb_events.tmcf new file mode 100644 index 000000000..be7392d7a --- /dev/null +++ b/scripts/wet_bulb/wet_bulb_events.tmcf @@ -0,0 +1,13 @@ +Node: E:Events->E0 +dcid: C:Events->dcid +typeOf: C:Events->typeOf +name: C:Events->name +startDate: C:Events->startDate +endDate: C:Events->endDate +observationPeriod: C:Events->observationPeriod +startLocation: C:Events->startLocation +affectedPlace: C:Events->affectedPlace +area: C:Events->area +observationDate: C:Events->observationDate +geoJsonCoordinates: C:Events->geoJsonCoordinates +wetBulbTemperature: C:Events->wetBulbTemperature diff --git a/scripts/wet_bulb/wet_bulb_events_pipeline_config.py b/scripts/wet_bulb/wet_bulb_events_pipeline_config.py new file mode 100644 index 000000000..e7c281ed9 --- /dev/null +++ b/scripts/wet_bulb/wet_bulb_events_pipeline_config.py @@ -0,0 +1,188 @@ +# Config to generate WetBulbTemperatureEvent through the script: events_pipeline.py +{ + 'defaults': { + 'import_name': 'NASAWetBulb', + # Set start_date to start of year to be processed. + # Defaults to Jan 1 of current year if left empty. + 'start_date': '', + 'end_date': '', + 'time_period': 'P1D', + + # GCS settings + 'gcs_project': 'datcom', + 'gcs_bucket': 'datcom-prod-imports', + 'gcs_folder': 'scripts/wet_bulb', + }, + # State of previous run of the pipeline with input/output for each stage. + 'pipeline_state_file': + 'gs://datcom-prod-imports/scripts/wet_bulb/wet_bulb_event_pipeline_state_{year}.py', + + # Pipeline stages to generate wet_bulb events. + 'stages': [ + # Generate geoTiff from EarthEngine Dynamic World data set. + { + 'stage': 'earthengine', + + # EE dataset from NASA/GSFC/MERRA + # https://developers.google.com/earth-engine/datasets/catalog/NASA_GSFC_MERRA_slv_2#description + 'ee_image_collection': 'NASA/GSFC/MERRA/slv/2', + # Image processing settings. + 'ee_reducer': 'max', + + # Filter by min web bulb temperature of 32 deg C + 'band': 'T2MWET', + 'band_min': 303, # 273.15(K) + 30, + # preserve the original temp after filtering by min threshold. + 'ee_band_bool': False, + 'ee_mask': 'land', + + # Output image settings + 'ee_output_data_type': 'float', + 'scale': 10000, + 'gcs_folder': 'scripts/wet_bulb/{stage}/{year}', + 'ee_export_image': True, + # Generate daily images for a year at a time. + # Events are processed annually from Jan-Dec. + 'ee_image_count': 365, + # 'ee_image_count': 31, + 'skip_existing_output': True, + }, + + # Convert geoTiff to CSV with S2 cells. + { + 'stage': + 'raster_csv', + # debug + #'debug': True, + #'limit_points': 10, + 's2_level': + 10, + 'aggregate': + 'max', + 'rename_columns': { + 'band:1': 'T2MWET', + }, + 'input_data_filter': { + # Convert WetBulbTemperature to Celsius + 'area': { + # pick max area for s2 cell. + 'aggregate': 'max' + }, + 'T2MWET': { + # convert value from Kelvin to Celsius + 'eval': '{T2MWET}-273.15', + # Pick s2Cells with a min wetBulbTemperature + 'min': 28, + 'aggregate': 'max', + } + }, + + # use output from download stage as input + 'input_files': + 'gs://{gcs_bucket}/{gcs_folder}/earthengine/*{year}*.tif', + 'output_dir': + 'gs://{gcs_bucket}/{gcs_folder}/{stage}/{year}', + 'skip_existing_output': + True, + }, + + # Generate events from the CSV with wet_bulbed S2 cells per month. + { + 'stage': + 'events', + + # Process all data files for the whole year. + 'input_files': + 'tmp/fixed-temp/*.csv', + #'input_files': + # 'gs://{gcs_bucket}/{gcs_folder}/raster_csv/{year}/*{year}*raster_csv.csv', + 'output_dir': + 'gs://{gcs_bucket}/{gcs_folder}/{stage}/{import_name}-{stage}-{year}-', + 'event_type': + 'WetBulbTemperatureEvent', + + # Input settings. + # Columms of input_csv that are added as event properties + 'data_columns': ['area', 'T2MWET'], + 'input_rename_columns': { + 'date': 'observationDate', + 'T2MWET': 'wetBulbTemperature', + }, + # Input column for date. + 'date_column': + 'observationDate', + # Columns of input_csv that contains the s2 cell id. + 'place_column': + 's2CellId', + 'input_filter_config': { + 'wetBulbTemperature': { + 'min': 30, + }, + }, + + # Processing settings + # Maximum distance within which 2 events are merged. + 'max_overlap_distance_km': + 0, + # Maximum number of cells of same level in between 2 events to be merged. + 'max_overlap_place_hop': + 2, + # S2 level to which data is aggregated. + 's2_level': + 10, # Events are at resolution of level-10 S2 cells. + 'aggregate': + 'max', # default aggregation for all properties + # Per property settings + 'property_config': { + 'area': { + 'aggregate': 'sum', + 'unit': 'SquareKilometer', + }, + 'wetBulbTemperature': { + 'aggregate': 'max', + 'unit': 'Celsius', + }, + 'affectedPlace': { + 'aggregate': 'list', + }, + }, + # Treat events at the same location more than 7 days apart as separate events. + 'max_event_interval_days': + 7, + + # Enable DC API lookup for place properties + 'dc_api_enabled': + False, + 'dc_api_batch_size': + 200, + # Cache file for place properties like name, location, typeOf + # Cache is updated with new places looked up. + 'place_property_cache_file': + 'gs://datcom-prod-imports/place_cache/place_properties_cache_with_s2_10.pkl', + + # Output settings. + #'output_delimiter': ';', + 'output_delimiter': + ',', + 'output_svobs': + True, + 'output_affected_place_polygon': + 'geoJsonCoordinates', + 'polygon_simplification_factor': + None, + 'output_geojon_string': + True, + + # Output svobs per place + 'output_place_svobs': + True, + 'output_place_svobs_properties': ['area', 'count'], + 'output_place_svobs_dates': ['YYYY-MM-DD', 'YYYY-MM', 'YYYY'], + # Generate stats for all containedInPlaces for the event. + # Uses the containedInPlace property in the + # place_property_cache_file. + 'aggregate_by_contained_in_place': + True, + }, + ], +} diff --git a/scripts/wet_bulb/wet_bulb_place_svobs.tmcf b/scripts/wet_bulb/wet_bulb_place_svobs.tmcf new file mode 100644 index 000000000..7699bbcff --- /dev/null +++ b/scripts/wet_bulb/wet_bulb_place_svobs.tmcf @@ -0,0 +1,16 @@ +Node: E:EventPlaces->E0 +typeOf: dcs:StatVarObservation +variableMeasured: dcs:Area_WetBulbTemperatureEvent +observationAbout: C:EventPlaces->observationAbout +observationDate: C:EventPlaces->observationDate +observationPeriod: C:EventPlaces->observationPeriod +value: C:EventPlaces->area +unit: SquareKilometer + +Node: E:EventPlaces->E1 +typeOf: dcs:StatVarObservation +variableMeasured: dcs:Count_WetBulbTemperatureEvent +observationAbout: C:EventPlaces->observationAbout +observationDate: C:EventPlaces->observationDate +observationPeriod: C:EventPlaces->observationPeriod +value: C:EventPlaces->count \ No newline at end of file