Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Leave one out #31

Merged
merged 4 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,5 @@ dmypy.json
# Pyre type checker
.pyre/

# run settings
*/run_setting.cfg

#output folders
OUT*/
15 changes: 9 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ To install the conflict model, first clone the code from GitHub. It is advised t

$ git clone https://github.com/JannisHoch/conflict_model.git
$ cd path/to/conflict_model
$ conda-env create -f=environment.yml
$ conda env create -f environment.yml
$ conda activate conflict_model
$ python setup.py develop

Expand All @@ -38,6 +38,8 @@ To run the example jupyter notebook, follow these instructions

This automatically executes the notebook and converts it to a html-file, also stored in the example folder.

.. note:: It is of course also possible to execute the notebook cell by cell using jupyter notebook

with runner script
^^^^^^^^^^^^^^^^^^

Expand All @@ -48,14 +50,15 @@ To run the model from command line, a command line script is provided. In the mo
$ cd path/to/conflict_model/scripts
$ python runner.py path/to/conflict_model/data/run_setting.cfg

.. note:: by default, no output is stored in the current version of the model!

If output is to be stored in an output map, this currently needs to be specified in the runner scipt explictely (-s option).
By default, output is stored to the output directory specified in the settings-file. Alternatively, this can be provided via command line too (-o option)
If output is to be stored in an output map, this currently needs to be specified in the runner scipt explictely (-so option).
By default, output is stored to the output directory specified in the settings-file. Alternatively, this can be provided via command line too (-o option).
For some minimal verbose output, please specify this using the -v option.

.. code-block:: console

$ python runner.py -s True -o path/to/output/folder path/to/conflict_model/data/run_setting.cfg
$ python runner.py -o path/to/output/folder path/to/conflict_model/data/run_setting.cfg -so -v

.. note:: for convenience, there is a 'run_script.sh' file executing just this command.

For help, try this if you are in the scripts folder:

Expand Down
1 change: 1 addition & 0 deletions conflict_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from . import utils
from . import get_boolean_conflict
from . import get_var_from_nc
from . import machine_learning

__author__ = """Jannis M. Hoch"""
__email__ = '[email protected]'
Expand Down
2 changes: 0 additions & 2 deletions conflict_model/get_boolean_conflict.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,5 @@ def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year):

if not len(extent_gdf) == len(list_out):
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_out)))

print('...DONE' + os.linesep)

return list_out
6 changes: 1 addition & 5 deletions conflict_model/get_var_from_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ def nc_with_float_timestamp(extent_gdf, config, var_name, sim_year, stat_func='m
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func)
list_out.append(zonal_stats[0][stat_func])

print('...DONE' + os.linesep)

return list_out

def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year, stat_func='mean'):
Expand Down Expand Up @@ -92,7 +90,7 @@ def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year,
list: list containing statistical value per polygon, i.e. with same length as extent_gdf
"""
# get path to netCDF-file.
nc_fo = os.path.join(config.get('general', 'input_dir'),
nc_fo = os.path.join(os.path.abspath(config.get('general', 'input_dir')),
config.get('env_vars', var_name))

print('calculating mean {0} per aggregation unit from file {1} for year {2}'.format(var_name, nc_fo, sim_year))
Expand Down Expand Up @@ -125,6 +123,4 @@ def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year,
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func)
list_out.append(zonal_stats[0][stat_func])

print('...DONE' + os.linesep)

return list_out
66 changes: 66 additions & 0 deletions conflict_model/machine_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
from sklearn import svm, neighbors, preprocessing

def define_scaling(config):
"""[summary]

Args:
config ([type]): [description]

Raises:
ValueError: [description]

Returns:
[type]: [description]
"""

if config.getboolean('general', 'sensitivity_analysis'):
scalers = [preprocessing.MinMaxScaler(),
preprocessing.StandardScaler(),
preprocessing.RobustScaler(),
preprocessing.QuantileTransformer(random_state=42)]

elif not config.getboolean('general', 'sensitivity_analysis'):
if config.get('machine_learning', 'scaler') == 'MinMaxScaler':
scalers = [preprocessing.MinMaxScaler()]
elif config.get('machine_learning', 'scaler') == 'StandardScaler':
scalers = [preprocessing.StandardScaler()]
elif config.get('machine_learning', 'scaler') == 'RobustScaler':
scalers = [preprocessing.RobustScaler()]
elif config.get('machine_learning', 'scaler') == 'QuantileTransformer':
scalers = [preprocessing.QuantileTransformer(random_state=42)]
else:
raise ValueError('no supported scaling-algorithm selected - choose between MinMaxScaler, StandardScaler, RobustScaler or QuantileTransformer')

print('chosen scaling method is {}'.format(scalers[0]))

return scalers

def define_model(config):
"""[summary]

Args:
config ([type]): [description]

Raises:
ValueError: [description]

Returns:
[type]: [description]
"""

if config.getboolean('general', 'sensitivity_analysis'):
clfs = [svm.NuSVC(nu=0.1, kernel='rbf', class_weight={1: 100}, random_state=42, probability=True, degree=10, gamma=10),
neighbors.KNeighborsClassifier(n_neighbors=10, weights='distance')]

elif not config.getboolean('general', 'sensitivity_analysis'):
if config.get('machine_learning', 'model') == 'NuSVC':
clfs = [svm.NuSVC(nu=0.1, kernel='rbf', class_weight={1: 100}, random_state=42, probability=True, degree=10, gamma=10)]
elif config.get('machine_learning', 'model') == 'KNeighborsClassifier':
clfs = [neighbors.KNeighborsClassifier(n_neighbors=10, weights='distance')]
else:
raise ValueError('no supported ML model selected - choose between NuSVC or KNeighborsClassifier')

print('chosen ML model is {}'.format(clfs[0]))

return clfs
9 changes: 8 additions & 1 deletion data/run_setting.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[general]
input_dir=../data
output_dir=../data\OUT
# if True, all possible scaler and model combinations (see 'machine_learning' section) are run
# if False, only the selected scaler and model combination is run
sensitivity_analysis=False

[settings]
y_start=2000
Expand All @@ -21,7 +24,7 @@ zones=BWh,BSh
code2class=KoeppenGeiger/classification_codes.txt

[env_vars]
#variable name here needs to be identical with variable name in nc-file
# variable name here needs to be identical with variable name in nc-file
GDP_per_capita_PPP=GDP_HDI/GDP_per_capita_PPP_1990_2015_Africa.nc
total_evaporation=PCRGLOBWB/totalEvap/totalEvaporation_monthTot_output_2000_2015_Africa_yearmean.nc
precipitation=PCRGLOBWB/precip/precipitation_monthTot_output_2000-01-31_to_2015-12-31_Africa_yearmean.nc
Expand All @@ -33,4 +36,8 @@ int_grazing=IMAGE/intensityGrazing.nc
ext_grazing=IMAGE/extensiveGrazing.nc

[machine_learning]
# choose from: MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer
scaler=QuantileTransformer
# choose from: NuSVC, KNeighborsClassifier
model= KNeighborsClassifier
train_fraction=0.9
Loading