Skip to content

Commit

Permalink
scripts to import wetbulb from earthengine
Browse files Browse the repository at this point in the history
  • Loading branch information
ajaits committed Dec 17, 2024
1 parent 80f6d01 commit f411cc4
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 0 deletions.
89 changes: 89 additions & 0 deletions scripts/wet_bulb/generate_wet_bulb_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to generate WetBulb events using the events pipeline.
The events pipeline is setup with the following stages:
1. earthengine:
It uses the NASA/GSFC/MERRA/slv/2 data set from EarthEngine to get regions
with high wetbulb temperature as a geoTif per day.
2. raster_csv
The geoTif is converted into a CSV with each pixel with regions with high
wet bulb temperature converted into an S2 cell of level 10
using the raster_to_csv.py utilities.
3. events
The resulting CSV with s2 cells is aggregated into events using
the process_events.py. Neighbouring s2 cells with high wet-bulb temperatures
are collated into a single event.
s2 cells with web bulb temperature over successive days are also
added into the same event.
Once the regions with wet bulb temperature collated into events,
the process_events.py utilities also generate a CSV with
StatVar Observations for Area_WetBulbTemperatureEvent and
Count_WetBulbTemperatureEvent for each events s2 cells at level 10, and places it is
contained in, such as AdministrativeArea, Country, Continent and Earth.
The generated files including the geoTif, CSVs with S2 cells and events are saved
in GCS.
The script runs once a month to update wetbulb events for the current year.
"""

import os
import re
import sys
import time

from absl import app
from absl import flags
from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
sys.path.append(os.path.join(os.path.dirname(_SCRIPTS_DIR), 'earthengine'))
sys.path.append(
os.path.join(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)), 'util'))

flags.DEFINE_string(
'wet_bulb_pipeline_config',
os.path.join(_SCRIPTS_DIR, 'wet_bulb_events_pipeline_config.py'),
'Config for the pipeline as a py dictionary of json')

flags.DEFINE_list(
'wet_bulb_pipeline_stages', [],
'List of stages in the wetbulb events pipeline config to be run.')

_FLAGS = flags.FLAGS

import file_util

from config_map import ConfigMap
from events_pipeline import EventPipeline


def main(_):
config=ConfigMap(
filename=_FLAGS.wet_bulb_pipeline_config)
if _FLAGS.start_date:
config.get('defaults', {})['start_date'] = _FLAGS.start_date
pipeline = EventPipeline(config=config)
pipeline.run(run_stages=_FLAGS.wet_bulb_pipeline_stages)


if __name__ == '__main__':
app.run(main)
13 changes: 13 additions & 0 deletions scripts/wet_bulb/wet_bulb_events.tmcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Node: E:Events->E0
dcid: C:Events->dcid
typeOf: C:Events->typeOf
name: C:Events->name
startDate: C:Events->startDate
endDate: C:Events->endDate
observationPeriod: C:Events->observationPeriod
startLocation: C:Events->startLocation
affectedPlace: C:Events->affectedPlace
area: C:Events->area
observationDate: C:Events->observationDate
geoJsonCoordinates: C:Events->geoJsonCoordinates
wetBulbTemperature: C:Events->wetBulbTemperature
187 changes: 187 additions & 0 deletions scripts/wet_bulb/wet_bulb_events_pipeline_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Config to generate WetBulbTemperatureEvent through the script: events_pipeline.py
{
'defaults': {
'import_name': 'NASAWetBulb',
# Set start_date to start of year to be processed.
# Defaults to Jan 1 of current year if left empty.
'start_date': '',
'end_date': '',
'time_period': 'P1D',

# GCS settings
'gcs_project': 'datcom',
'gcs_bucket': 'datcom-prod-imports',
'gcs_folder': 'scripts/wet_bulb',
},
# State of previous run of the pipeline with input/output for each stage.
'pipeline_state_file':
'gs://datcom-prod-imports/scripts/wet_bulb/wet_bulb_event_pipeline_state_{year}.py',

# Pipeline stages to generate wet_bulb events.
'stages': [
# Generate geoTiff from EarthEngine Dynamic World data set.
{
'stage': 'earthengine',

# EE dataset from NASA/GSFC/MERRA
# https://developers.google.com/earth-engine/datasets/catalog/NASA_GSFC_MERRA_slv_2#description
'ee_image_collection': 'NASA/GSFC/MERRA/slv/2',
# Image processing settings.
'ee_reducer': 'max',

# Filter by min web bulb temperature of 32 deg C
'band': 'T2MWET',
'band_min': 303, # 273.15(K) + 30,
# preserve the original temp after filtering by min threshold.
'ee_band_bool': False,
'ee_mask': 'land',

# Output image settings
'ee_output_data_type': 'float',
'scale': 10000,
'gcs_folder': 'scripts/wet_bulb/{stage}/{year}',
'ee_export_image': True,
# Generate daily images for a year at a time.
# Events are processed annually from Jan-Dec.
'ee_image_count': 365,
# 'ee_image_count': 31,
'skip_existing_output': True,
},

# Convert geoTiff to CSV with S2 cells.
{
'stage':
'raster_csv',
# debug
#'debug': True,
#'limit_points': 10,
's2_level':
10,
'aggregate':
'max',
'rename_columns': {
'band:1': 'T2MWET',
},
'input_data_filter': {
# Convert WetBulbTemperature to Celsius
'area': {
# pick max area for s2 cell.
'aggregate': 'max'
},
'T2MWET': {
# convert value from Kelvin to Celsius
'eval': '{T2MWET}-273.15',
# Pick s2Cells with a min wetBulbTemperature
'min': 28,
'aggregate': 'max',
}
},

# use output from download stage as input
'input_files':
'gs://{gcs_bucket}/{gcs_folder}/earthengine/*{year}*.tif',
'output_dir':
'gs://{gcs_bucket}/{gcs_folder}/{stage}/{year}',
'skip_existing_output':
True,
},

# Generate events from the CSV with wet_bulbed S2 cells per month.
{
'stage':
'events',

# Process all data files for the whole year.
'input_files': 'tmp/fixed-temp/*.csv',
#'input_files':
# 'gs://{gcs_bucket}/{gcs_folder}/raster_csv/{year}/*{year}*raster_csv.csv',
'output_dir':
'gs://{gcs_bucket}/{gcs_folder}/{stage}/{import_name}-{stage}-{year}-',
'event_type':
'WetBulbTemperatureEvent',

# Input settings.
# Columms of input_csv that are added as event properties
'data_columns': ['area', 'T2MWET'],
'input_rename_columns': {
'date': 'observationDate',
'T2MWET': 'wetBulbTemperature',
},
# Input column for date.
'date_column':
'observationDate',
# Columns of input_csv that contains the s2 cell id.
'place_column':
's2CellId',
'input_filter_config': {
'wetBulbTemperature': {
'min': 30,
},
},

# Processing settings
# Maximum distance within which 2 events are merged.
'max_overlap_distance_km':
0,
# Maximum number of cells of same level in between 2 events to be merged.
'max_overlap_place_hop':
2,
# S2 level to which data is aggregated.
's2_level':
10, # Events are at resolution of level-10 S2 cells.
'aggregate':
'max', # default aggregation for all properties
# Per property settings
'property_config': {
'area': {
'aggregate': 'sum',
'unit': 'SquareKilometer',
},
'wetBulbTemperature': {
'aggregate': 'max',
'unit': 'Celsius',
},
'affectedPlace': {
'aggregate': 'list',
},
},
# Treat events at the same location more than 7 days apart as separate events.
'max_event_interval_days':
7,

# Enable DC API lookup for place properties
'dc_api_enabled':
False,
'dc_api_batch_size':
200,
# Cache file for place properties like name, location, typeOf
# Cache is updated with new places looked up.
'place_property_cache_file':
'gs://datcom-prod-imports/place_cache/place_properties_cache_with_s2_10.pkl',

# Output settings.
#'output_delimiter': ';',
'output_delimiter':
',',
'output_svobs':
True,
'output_affected_place_polygon':
'geoJsonCoordinates',
'polygon_simplification_factor':
None,
'output_geojon_string':
True,

# Output svobs per place
'output_place_svobs':
True,
'output_place_svobs_properties': ['area', 'count'],
'output_place_svobs_dates': ['YYYY-MM-DD', 'YYYY-MM', 'YYYY'],
# Generate stats for all containedInPlaces for the event.
# Uses the containedInPlace property in the
# place_property_cache_file.
'aggregate_by_contained_in_place':
True,
},
],
}
16 changes: 16 additions & 0 deletions scripts/wet_bulb/wet_bulb_place_svobs.tmcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Node: E:EventPlaces->E0
typeOf: dcs:StatVarObservation
variableMeasured: dcs:Area_WetBulbTemperatureEvent
observationAbout: C:EventPlaces->observationAbout
observationDate: C:EventPlaces->observationDate
observationPeriod: C:EventPlaces->observationPeriod
value: C:EventPlaces->area
unit: SquareKilometer

Node: E:EventPlaces->E1
typeOf: dcs:StatVarObservation
variableMeasured: dcs:Count_WetBulbTemperatureEvent
observationAbout: C:EventPlaces->observationAbout
observationDate: C:EventPlaces->observationDate
observationPeriod: C:EventPlaces->observationPeriod
value: C:EventPlaces->count

0 comments on commit f411cc4

Please sign in to comment.