Skip to content

Commit

Permalink
Merge pull request #31 from JannisHoch/leave_one_out
Browse files Browse the repository at this point in the history
v0.0.2
  • Loading branch information
JannisHoch authored Jul 16, 2020
2 parents 56ce0b6 + 6089f38 commit e6dda7e
Show file tree
Hide file tree
Showing 11 changed files with 2,486 additions and 236 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,5 @@ dmypy.json
# Pyre type checker
.pyre/

# run settings
*/run_setting.cfg

#output folders
OUT*/
15 changes: 9 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ To install the conflict model, first clone the code from GitHub. It is advised t
$ git clone https://github.com/JannisHoch/conflict_model.git
$ cd path/to/conflict_model
$ conda-env create -f=environment.yml
$ conda env create -f environment.yml
$ conda activate conflict_model
$ python setup.py develop
Expand All @@ -38,6 +38,8 @@ To run the example jupyter notebook, follow these instructions
This automatically executes the notebook and converts it to a html-file, also stored in the example folder.

.. note:: It is of course also possible to execute the notebook cell by cell using jupyter notebook

with runner script
^^^^^^^^^^^^^^^^^^

Expand All @@ -48,14 +50,15 @@ To run the model from command line, a command line script is provided. In the mo
$ cd path/to/conflict_model/scripts
$ python runner.py path/to/conflict_model/data/run_setting.cfg
.. note:: by default, no output is stored in the current version of the model!

If output is to be stored in an output map, this currently needs to be specified in the runner scipt explictely (-s option).
By default, output is stored to the output directory specified in the settings-file. Alternatively, this can be provided via command line too (-o option)
If output is to be stored in an output map, this currently needs to be specified in the runner scipt explictely (-so option).
By default, output is stored to the output directory specified in the settings-file. Alternatively, this can be provided via command line too (-o option).
For some minimal verbose output, please specify this using the -v option.

.. code-block:: console
$ python runner.py -s True -o path/to/output/folder path/to/conflict_model/data/run_setting.cfg
$ python runner.py -o path/to/output/folder path/to/conflict_model/data/run_setting.cfg -so -v
.. note:: for convenience, there is a 'run_script.sh' file executing just this command.

For help, try this if you are in the scripts folder:

Expand Down
1 change: 1 addition & 0 deletions conflict_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from . import utils
from . import get_boolean_conflict
from . import get_var_from_nc
from . import machine_learning

__author__ = """Jannis M. Hoch"""
__email__ = '[email protected]'
Expand Down
2 changes: 0 additions & 2 deletions conflict_model/get_boolean_conflict.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,5 @@ def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year):

if not len(extent_gdf) == len(list_out):
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_out)))

print('...DONE' + os.linesep)

return list_out
6 changes: 1 addition & 5 deletions conflict_model/get_var_from_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ def nc_with_float_timestamp(extent_gdf, config, var_name, sim_year, stat_func='m
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func)
list_out.append(zonal_stats[0][stat_func])

print('...DONE' + os.linesep)

return list_out

def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year, stat_func='mean'):
Expand Down Expand Up @@ -92,7 +90,7 @@ def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year,
list: list containing statistical value per polygon, i.e. with same length as extent_gdf
"""
# get path to netCDF-file.
nc_fo = os.path.join(config.get('general', 'input_dir'),
nc_fo = os.path.join(os.path.abspath(config.get('general', 'input_dir')),
config.get('env_vars', var_name))

print('calculating mean {0} per aggregation unit from file {1} for year {2}'.format(var_name, nc_fo, sim_year))
Expand Down Expand Up @@ -125,6 +123,4 @@ def nc_with_continous_datetime_timestamp(extent_gdf, config, var_name, sim_year,
zonal_stats = rstats.zonal_stats(prov.geometry, nc_arr_vals, affine=affine, stats=stat_func)
list_out.append(zonal_stats[0][stat_func])

print('...DONE' + os.linesep)

return list_out
66 changes: 66 additions & 0 deletions conflict_model/machine_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
from sklearn import svm, neighbors, preprocessing

def define_scaling(config):
"""[summary]
Args:
config ([type]): [description]
Raises:
ValueError: [description]
Returns:
[type]: [description]
"""

if config.getboolean('general', 'sensitivity_analysis'):
scalers = [preprocessing.MinMaxScaler(),
preprocessing.StandardScaler(),
preprocessing.RobustScaler(),
preprocessing.QuantileTransformer(random_state=42)]

elif not config.getboolean('general', 'sensitivity_analysis'):
if config.get('machine_learning', 'scaler') == 'MinMaxScaler':
scalers = [preprocessing.MinMaxScaler()]
elif config.get('machine_learning', 'scaler') == 'StandardScaler':
scalers = [preprocessing.StandardScaler()]
elif config.get('machine_learning', 'scaler') == 'RobustScaler':
scalers = [preprocessing.RobustScaler()]
elif config.get('machine_learning', 'scaler') == 'QuantileTransformer':
scalers = [preprocessing.QuantileTransformer(random_state=42)]
else:
raise ValueError('no supported scaling-algorithm selected - choose between MinMaxScaler, StandardScaler, RobustScaler or QuantileTransformer')

print('chosen scaling method is {}'.format(scalers[0]))

return scalers

def define_model(config):
"""[summary]
Args:
config ([type]): [description]
Raises:
ValueError: [description]
Returns:
[type]: [description]
"""

if config.getboolean('general', 'sensitivity_analysis'):
clfs = [svm.NuSVC(nu=0.1, kernel='rbf', class_weight={1: 100}, random_state=42, probability=True, degree=10, gamma=10),
neighbors.KNeighborsClassifier(n_neighbors=10, weights='distance')]

elif not config.getboolean('general', 'sensitivity_analysis'):
if config.get('machine_learning', 'model') == 'NuSVC':
clfs = [svm.NuSVC(nu=0.1, kernel='rbf', class_weight={1: 100}, random_state=42, probability=True, degree=10, gamma=10)]
elif config.get('machine_learning', 'model') == 'KNeighborsClassifier':
clfs = [neighbors.KNeighborsClassifier(n_neighbors=10, weights='distance')]
else:
raise ValueError('no supported ML model selected - choose between NuSVC or KNeighborsClassifier')

print('chosen ML model is {}'.format(clfs[0]))

return clfs
9 changes: 8 additions & 1 deletion data/run_setting.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[general]
input_dir=../data
output_dir=../data\OUT
# if True, all possible scaler and model combinations (see 'machine_learning' section) are run
# if False, only the selected scaler and model combination is run
sensitivity_analysis=False

[settings]
y_start=2000
Expand All @@ -21,7 +24,7 @@ zones=BWh,BSh
code2class=KoeppenGeiger/classification_codes.txt

[env_vars]
#variable name here needs to be identical with variable name in nc-file
# variable name here needs to be identical with variable name in nc-file
GDP_per_capita_PPP=GDP_HDI/GDP_per_capita_PPP_1990_2015_Africa.nc
total_evaporation=PCRGLOBWB/totalEvap/totalEvaporation_monthTot_output_2000_2015_Africa_yearmean.nc
precipitation=PCRGLOBWB/precip/precipitation_monthTot_output_2000-01-31_to_2015-12-31_Africa_yearmean.nc
Expand All @@ -33,4 +36,8 @@ int_grazing=IMAGE/intensityGrazing.nc
ext_grazing=IMAGE/extensiveGrazing.nc

[machine_learning]
# choose from: MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer
scaler=QuantileTransformer
# choose from: NuSVC, KNeighborsClassifier
model= KNeighborsClassifier
train_fraction=0.9
Loading

0 comments on commit e6dda7e

Please sign in to comment.