-
Notifications
You must be signed in to change notification settings - Fork 113
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Code modification and created readme
- Loading branch information
Showing
23 changed files
with
81,951 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# GPCC SPI Import | ||
|
||
## Overview | ||
These script cover two imports which are: | ||
1. NOAA_GPCC_StandardardizedPrecipitationIndex | ||
2. NOAA_GPCC_StandardardizedPrecipitationIndex_AggPlace | ||
|
||
|
||
## Scripts | ||
``` | ||
download.py | ||
``` | ||
Above scipt creates necessary input files with .nc extention required for processing files and store it in a tmp directory. (/tmp/gpcc_spi/) | ||
|
||
|
||
``` | ||
preprocess_gpcc_spi.py | ||
``` | ||
Reads downloaded files from tmp directory and create output csv files in tmp directory itself inside 'out' folder.(/tmp/gpcc_spi/out/) | ||
This script generate output for 'NOAA_GPCC_StandardardizedPrecipitationIndex' Import. | ||
Tmcf for this can be taken from 'scripts/noaa/gpcc_spi/gpcc_spi.tmcf' | ||
|
||
|
||
``` | ||
gpcc_spi_aggregation.py | ||
``` | ||
Reads the files generated from 'preprocess_gpcc_spi.py' script from tmp directory, aggragte data based on place and create output csv files in tmp directory itself inside 'agg' folder.(/tmp/gpcc_spi/agg/) | ||
This script generate output for 'NOAA_GPCC_StandardardizedPrecipitationIndex_AggPlace' Import. | ||
Tmcf for this can be taken from 'scripts/noaa/gpcc_spi/gpcc_spi_aggregation.tmcf' |
Empty file.
168 changes: 168 additions & 0 deletions
168
scripts/noaa/gpcc_spi/create_place_to_grid_area_mapping.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Processes a list of places into %area of place within 1 degree grids. | ||
Input: | ||
csv with a single column that represents the places where geojson exists in dc. | ||
Output: | ||
A single json file that maps | ||
place id -> list of ({"grid": grid key , "ratio": %area within grid}) | ||
""" | ||
|
||
from shapely import geometry | ||
import datacommons as dc | ||
import concurrent.futures | ||
from typing import List, Optional | ||
import json | ||
import csv | ||
|
||
from absl import flags | ||
from absl import app | ||
|
||
FLAGS = flags.FLAGS | ||
|
||
flags.DEFINE_string('gpcc_spi_places_with_csv', | ||
'geojson_data/places_with_geojson.csv', | ||
'Path to places with geojson.') | ||
flags.DEFINE_string('gpcc_spi_place_grid_ratio_output_dir', | ||
'geojson_data/place_to_one_degree_grid_mapping.json', | ||
'Path where the result is written to.') | ||
|
||
LAT_MIN = -90 | ||
LAT_MAX = 90 | ||
LNG_MIN = -180 | ||
LNG_MAX = 180 | ||
|
||
|
||
def construct_one_degree_grid_polygons() -> List[geometry.box]: | ||
"""Returns a list of all 1 degree grids.""" | ||
grids = dict() | ||
for lat in range(LAT_MIN, LAT_MAX): | ||
for lng in range(LNG_MIN, LNG_MAX): | ||
key = f"{lat}^{lng}" | ||
grids[key] = geometry.box(lng, lat, lng + 1, lat + 1) | ||
return grids | ||
|
||
|
||
def get_place_by_type(parent_places, places_types: List[str]) -> List[str]: | ||
"""Return the place ids of all places contained in a set of parent places.""" | ||
all_types_of_places = [] | ||
for place_type in places_types: | ||
parent_place_to_places = dc.get_places_in(parent_places, place_type) | ||
for places in parent_place_to_places.values(): | ||
for place in places: | ||
all_types_of_places.extend(place) | ||
return all_types_of_places | ||
|
||
|
||
def places_to_geo_jsons(places: List[str]): | ||
"""Get geojsons for a list of places.""" | ||
resp = dc.get_property_values(places, 'geoJsonCoordinates') | ||
geojsons = {} | ||
for p, gj in resp.items(): | ||
if not gj: | ||
continue | ||
geojsons[p] = geometry.shape(json.loads(gj[0])) | ||
return geojsons | ||
|
||
|
||
def fit_shape_in_grids(shape, grids): | ||
containment_data = [] | ||
for grid_key, box in grids.items(): | ||
# Place is completely inside the grid. | ||
if box.covers(shape): | ||
return [{'grid': grid_key, 'ratio': 1}] | ||
|
||
# 100% of the grid belongs to the place. | ||
if shape.covers(box): | ||
containment_data.append({ | ||
'grid': grid_key, | ||
'ratio': box.area / shape.area | ||
}) | ||
continue | ||
|
||
if not shape.intersects(box): | ||
continue | ||
# Part of the place belongs to the grid. | ||
intersection = shape.intersection(box) | ||
ratio = intersection.area / shape.area | ||
containment_data.append({'grid': grid_key, 'ratio': ratio}) | ||
|
||
return containment_data | ||
|
||
|
||
def read_input(places_with_csv: str): | ||
places_with_geojson = [] | ||
with open(places_with_csv, newline='') as csvfile: | ||
reader = csv.DictReader(csvfile) | ||
for row in reader: | ||
places_with_geojson.append(row['id']) | ||
return places_with_geojson | ||
|
||
|
||
def get_geojsons(places_with_geojson: List[str]): | ||
"""Batch call DC api to get geojsons.""" | ||
BATCH_SIZE = 100 | ||
batch = [] | ||
|
||
geojsons = dict() | ||
with concurrent.futures.ThreadPoolExecutor() as executor: | ||
futures = [] | ||
for place in places_with_geojson: | ||
batch.append(place) | ||
if len(batch) == BATCH_SIZE: | ||
futures.append(executor.submit(places_to_geo_jsons, batch)) | ||
batch = [] | ||
|
||
if batch: | ||
futures.append(executor.submit(places_to_geo_jsons, batch)) | ||
|
||
for future in concurrent.futures.as_completed(futures): | ||
geojsons_partial = future.result() | ||
geojsons.update(geojsons_partial) | ||
|
||
return geojsons | ||
|
||
|
||
def create_place_to_grid_mapping(places_with_geojson: List[str], | ||
output: Optional[str] = None, | ||
write_results: bool = True): | ||
"""Fit all places in grids.""" | ||
geojsons = get_geojsons(places_with_geojson) | ||
|
||
one_degree_grids = construct_one_degree_grid_polygons() | ||
|
||
place_to_grid_ratios = dict() | ||
for place, geojson in geojsons.items(): | ||
grid_ratios = fit_shape_in_grids(geojson, one_degree_grids) | ||
place_to_grid_ratios[place] = grid_ratios | ||
|
||
if write_results: | ||
with open(output, 'w') as f: | ||
json.dump(place_to_grid_ratios, f) | ||
return | ||
|
||
return place_to_grid_ratios | ||
|
||
|
||
def main(_): | ||
places_with_geojson = read_input(FLAGS.gpcc_spi_places_with_csv) | ||
create_place_to_grid_mapping(places_with_geojson, | ||
FLAGS.gpcc_spi_place_grid_ratio_output_dir) | ||
|
||
|
||
if __name__ == "__main__": | ||
app.run(main) |
83 changes: 83 additions & 0 deletions
83
scripts/noaa/gpcc_spi/create_place_to_grid_area_mapping_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
''' | ||
Unit tests for create_place_to_grid_area_mapping.py | ||
Usage: python -m unittest discover -v -s ../ -p "create_place_to_grid_area_mapping_test.py" | ||
''' | ||
import unittest | ||
from .create_place_to_grid_area_mapping import create_place_to_grid_mapping | ||
|
||
|
||
class PlaceToGridMappingTest(unittest.TestCase): | ||
|
||
def test_create_place_to_grid_mapping(self): | ||
want = { | ||
# San Bernardino County | ||
# maps to many grids. | ||
'geoId/06071': [{ | ||
'grid': '33^-118', | ||
'ratio': 0.0033239325139923634 | ||
}, { | ||
'grid': '34^-118', | ||
'ratio': 0.1276123022731056 | ||
}, { | ||
'grid': '34^-117', | ||
'ratio': 0.18870566451400175 | ||
}, { | ||
'grid': '34^-116', | ||
'ratio': 0.18532960437637513 | ||
}, { | ||
'grid': '34^-115', | ||
'ratio': 0.10690315005766632 | ||
}, { | ||
'grid': '35^-118', | ||
'ratio': 0.09802330851363907 | ||
}, { | ||
'grid': '35^-117', | ||
'ratio': 0.1547896428427359 | ||
}, { | ||
'grid': '35^-116', | ||
'ratio': 0.12463595784741413 | ||
}, { | ||
'grid': '35^-115', | ||
'ratio': 0.010676437061070328 | ||
}], | ||
# San Francisco County | ||
# maps to two grids (one is an island). | ||
'geoId/06075': [{ | ||
'grid': '37^-124', | ||
'ratio': 0.006601930938213778 | ||
}, { | ||
'grid': '37^-123', | ||
'ratio': 0.9933980690617857 | ||
}], | ||
# Arlington County | ||
# Fully contained in a grid. | ||
'geoId/51013': [{ | ||
'grid': '38^-78', | ||
'ratio': 1 | ||
}] | ||
} | ||
places_with_geojson = [ | ||
'geoId/06071', # San Bernardino County | ||
'geoId/06075', # San Francisco County | ||
'geoId/51013' # Arlington County | ||
] | ||
got = create_place_to_grid_mapping(places_with_geojson, | ||
write_results=False) | ||
self.assertDictEqual(got, want) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""""Downloads GRCC Global Standarized Precipitation Index (SPI) data. | ||
Please see the data readme for release schedule. | ||
https://www.ncei.noaa.gov/pub/data/nidis/gpcc/nidis_gpcc_readme_c20220520.txt | ||
""" | ||
|
||
from absl import app | ||
from absl import flags | ||
import concurrent.futures | ||
import logging | ||
import urllib | ||
import urllib.request | ||
import shutil | ||
from typing import List | ||
import os | ||
|
||
FLAGS = flags.FLAGS | ||
|
||
flags.DEFINE_string('download_dir', '/tmp/gpcc_spi', 'Output directory.') | ||
|
||
flags.DEFINE_list('periods', None, ( | ||
'Comma separated list of periods. ' | ||
'A period is the duration in months for a particular SPI value.' | ||
'Possible values are 1, 2, 3, 6, 9, 12, 24, 36, 48, 60, and 72, defaults to\ | ||
all.')) | ||
DEFAULT_PERIODS = ['1', '3', '6', '9', '36', '72'] | ||
|
||
flags.DEFINE_string('distribution', 'pearson', | ||
('Distribution fucntion used to compute the SPI.' | ||
'One of (pearson, gamma). Defaults to pearson.')) | ||
DEFAULT_DISTRIBUTION = 'pearson' | ||
|
||
|
||
def download_one(url, path: str): | ||
"""Download a single spi nc file.""" | ||
logging.info('Starting to download %s to %s', url, path) | ||
with urllib.request.urlopen(url) as source: | ||
with open(path, 'wb') as dest: | ||
shutil.copyfileobj(source, dest) | ||
logging.info('Finished downloading: %s', path) | ||
|
||
|
||
def download_all(download_dir: str, distribution: str, periods: List[str]): | ||
"""Download spi nc files for all periods.""" | ||
os.makedirs(download_dir, exist_ok=True) | ||
|
||
periods = DEFAULT_PERIODS | ||
distribution = DEFAULT_DISTRIBUTION | ||
|
||
with concurrent.futures.ThreadPoolExecutor() as executor: | ||
futures = [] | ||
for period in periods: | ||
p = period if int( | ||
period | ||
) >= 10 else f"0{int(period)}" # url format is '01' instead '1' | ||
url = f'https://www.ncei.noaa.gov/pub/data/nidis/gpcc/spi-pearson/\ | ||
gpcc-spi-{distribution}-{p}.nc' | ||
|
||
dest = os.path.join(download_dir, f'gpcc_spi_{distribution}_{p}.nc') | ||
futures.append(executor.submit(download_one, url, dest)) | ||
|
||
for future in concurrent.futures.as_completed(futures): | ||
if future.exception(): | ||
raise Exception( | ||
"Part of the download failed. Please re-run or fix the\ | ||
script.") | ||
|
||
|
||
def main(_): | ||
"""Download all nc files.""" | ||
download_all(FLAGS.download_dir, FLAGS.periods, FLAGS.distribution) | ||
|
||
|
||
if __name__ == "__main__": | ||
app.run(main) |
Oops, something went wrong.