-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from JannisHoch/dev
aligned runner.py with notebook
- Loading branch information
Showing
10 changed files
with
435 additions
and
720 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
{ | ||
"python.pythonPath": "C:\\Users\\hoch0001\\AppData\\Local\\Continuum\\anaconda3\\python.exe", | ||
"python.pythonPath": "C:\\Users\\hoch0001\\AppData\\Local\\Continuum\\anaconda3\\envs\\conflict_model\\python.exe", | ||
"restructuredtext.confPath": "${workspaceFolder}\\docs" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,9 @@ | |
|
||
from . import selection | ||
from . import utils | ||
from . import analysis | ||
from . import env_vars_nc | ||
from . import get_boolean_conflict | ||
from . import get_var_from_nc | ||
|
||
__author__ = """Jannis M. Hoch""" | ||
__email__ = '[email protected]' | ||
__version__ = '0.0.1-beta' | ||
__version__ = '0.0.1' |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import geopandas as gpd | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import os | ||
|
||
def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year): | ||
"""Creates a list for each timestep with boolean information whether a conflict took place in a polygon or not. | ||
Args: | ||
conflict_gdf (geodataframe): geo-dataframe containing georeferenced information of conflict (tested with PRIO/UCDP data) | ||
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted | ||
config (config): parsed configuration settings of run | ||
sim_year (int): year for which data is extracted | ||
Raises: | ||
AssertionError: raised if the length of output list does not match length of input geo-dataframe | ||
Returns: | ||
list: list containing 0/1 per polygon depending on conflict occurence | ||
""" | ||
|
||
print('determining whether a conflict took place or not') | ||
|
||
# select the entries which occured in this year | ||
temp_sel_year = conflict_gdf.loc[conflict_gdf.year == sim_year] | ||
|
||
# merge the dataframes with polygons and conflict information, creating a sub-set of polygons/regions | ||
data_merged = gpd.sjoin(temp_sel_year, extent_gdf) | ||
|
||
# determine the aggregated amount of fatalities in one region (e.g. water province) | ||
fatalities_per_watProv = data_merged['best'].groupby(data_merged['watprovID']).sum().to_frame().rename(columns={"best": 'total_fatalities'}) | ||
|
||
# loop through all regions and check if exists in sub-set | ||
# if so, this means that there was conflict and thus assign value 1 | ||
list_out = [] | ||
for i in range(len(extent_gdf)): | ||
i_watProv = extent_gdf.iloc[i]['watprovID'] | ||
if i_watProv in fatalities_per_watProv.index.values: | ||
list_out.append(1) | ||
else: | ||
list_out.append(0) | ||
|
||
if not len(extent_gdf) == len(list_out): | ||
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_out))) | ||
|
||
print('...DONE' + os.linesep) | ||
|
||
return list_out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import xarray as xr | ||
import rasterio as rio | ||
import pandas as pd | ||
import geopandas as gpd | ||
import rasterstats as rstats | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import os, sys | ||
|
||
def nc_with_integer_timestamp(extent_gdf, config, var_name, sim_year, stat_func='mean'): | ||
"""This function extracts a statistical value from a netCDF-file (specified in the config-file) for each polygon specified in extent_gdf for a given year. | ||
By default, the mean value of all cells within a polygon is computed. | ||
The resulting list does not contain additional meta-information about the files or polygons and is mostly intended for data-driven approaches such as machine learning. | ||
NOTE: | ||
The var_name must be identical to the key in the config-file. | ||
NOTE: | ||
This function is specifically written for netCDF-files where the time variable contains integer (year-)values, e.g. 1995, 1996, ... | ||
NOTE: | ||
Works only with nc-files with annual data. | ||
Args: | ||
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted | ||
config (config): parsed configuration settings of run | ||
var_name (str): name of variable in nc-file, must also be the same under which path to nc-file is specified in cfg-file | ||
sim_year (int): year for which data is extracted | ||
stat_func (str, optional): Statistical function to be applied, choose from available options in rasterstats package. Defaults to 'mean'. | ||
Raises: | ||
ValueError: raised if the extracted variable at a time step does not contain data | ||
Returns: | ||
list: list containing statistical value per polygon, i.e. with same length as extent_gdf | ||
""" | ||
# get path to netCDF-file. | ||
nc_fo = os.path.join(config.get('general', 'input_dir'), | ||
config.get('env_vars', var_name)) | ||
|
||
print('calculating mean {0} per aggregation unit from file {1} for year {2}'.format(var_name, nc_fo, sim_year)) | ||
|
||
# open nc-file with xarray as dataset | ||
nc_ds = xr.open_dataset(nc_fo) | ||
# get xarray data-array for specified variable | ||
nc_var = nc_ds[var_name] | ||
|
||
# open nc-file with rasterio to get affine information | ||
affine = rio.open(nc_fo).transform | ||
|
||
# get values from data-array for specified year | ||
nc_arr = nc_var.sel(time=sim_year) | ||
nc_arr_vals = nc_arr.values | ||
if nc_arr_vals.size == 0: | ||
raise ValueError('the data was found for this year in the nc-file {}, check if all is correct'.format(nc_fo)) | ||
|
||
# initialize output list | ||
list_out = [] | ||
# loop through all polygons in geo-dataframe and compute statistics, then append to output file | ||
for i in range(len(extent_gdf)): | ||
prov = extent_gdf.iloc[i] | ||
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func) | ||
list_out.append(zonal_stats[0][stat_func]) | ||
|
||
print('...DONE' + os.linesep) | ||
|
||
return list_out | ||
|
||
def nc_with_continous_regular_timestamp(extent_gdf, config, var_name, sim_year, stat_func='mean'): | ||
"""This function extracts a statistical value from a netCDF-file (specified in the config-file) for each polygon specified in extent_gdf for a given year. | ||
By default, the mean value of all cells within a polygon is computed. | ||
The resulting list does not contain additional meta-information about the files or polygons and is mostly intended for data-driven approaches such as machine learning. | ||
NOTE: | ||
The var_name must be identical to the key in the config-file. | ||
NOTE: | ||
Works only with nc-files with annual data. | ||
Args: | ||
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted | ||
config (config): parsed configuration settings of run | ||
var_name (str): name of variable in nc-file, must also be the same under which path to nc-file is specified in cfg-file | ||
sim_year (int): year for which data is extracted | ||
stat_func (str, optional): Statistical function to be applied, choose from available options in rasterstats package. Defaults to 'mean'. | ||
Raises: | ||
ValueError: raised if specfied year cannot be found in years in nc-file | ||
ValueError: raised if the extracted variable at a time step does not contain data | ||
Returns: | ||
list: list containing statistical value per polygon, i.e. with same length as extent_gdf | ||
""" | ||
# get path to netCDF-file. | ||
nc_fo = os.path.join(config.get('general', 'input_dir'), | ||
config.get('env_vars', var_name)) | ||
|
||
print('calculating mean {0} per aggregation unit from file {1} for year {2}'.format(var_name, nc_fo, sim_year)) | ||
|
||
# open nc-file with xarray as dataset | ||
nc_ds = xr.open_dataset(nc_fo) | ||
# get xarray data-array for specified variable | ||
nc_var = nc_ds[var_name] | ||
# get years contained in nc-file as integer array to be compatible with sim_year | ||
years = pd.to_datetime(nc_ds.time.values).to_period(freq='Y').strftime('%Y').to_numpy(dtype=int) | ||
if sim_year not in years: | ||
raise ValueError('the simulation year {0} can not be found in file {1}'.format(sim_year, nc_fo)) | ||
|
||
# get index which corresponds with sim_year in years in nc-file | ||
sim_year_idx = int(np.where(years == sim_year)[0]) | ||
# get values from data-array for specified year based on index | ||
nc_arr = nc_var.sel(time=nc_ds.time.values[sim_year_idx]) | ||
nc_arr_vals = nc_arr.values | ||
if nc_arr_vals.size == 0: | ||
raise ValueError('no data was found for this year in the nc-file {}, check if all is correct'.format(nc_fo)) | ||
|
||
# open nc-file with rasterio to get affine information | ||
affine = rio.open(nc_fo).transform | ||
|
||
# initialize output list | ||
list_out = [] | ||
# loop through all polygons in geo-dataframe and compute statistics, then append to output file | ||
for i in range(len(extent_gdf)): | ||
prov = extent_gdf.iloc[i] | ||
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func) | ||
list_out.append(zonal_stats[0][stat_func]) | ||
|
||
print('...DONE' + os.linesep) | ||
|
||
return list_out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.