Skip to content

Commit

Permalink
debugged for model types 2 and 3
Browse files Browse the repository at this point in the history
  • Loading branch information
JannisHoch committed Sep 30, 2020
1 parent 7ba250b commit fa57058
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 22 deletions.
8 changes: 3 additions & 5 deletions conflict_model/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, sys
import warnings
from sklearn import metrics
import pandas as pd
import geopandas as gpd
Expand Down Expand Up @@ -290,18 +291,15 @@ def get_feature_importance(clf, out_dir, config):
out_dir (str): path to output folder. If None, output is not saved.
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
Raises:
Warning: raised if unsupported classifier is used.
Returns:
dataframe: dataframe containing feature importance.
"""

if config.get('machine_learning', 'model') == 'RFClassifier':
arr = clf.feature_importances_
else:
arr = np.empty()
raise Warning('feature importance not supported for this kind of ML model')
arr = np.zeros(len(config.items('env_vars')))
warnings.warn('WARNING: feature importance not supported for this kind of ML model', UserWarning)

dict_out = dict()
for key, x in zip(config.items('env_vars'), range(len(arr))):
Expand Down
12 changes: 4 additions & 8 deletions conflict_model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,13 @@ def leave_one_out(X, Y, config, scaler, clf, out_dir):

eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_loo, clf, config)

y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred)

X_df = pd.DataFrame(X_test)
utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics')

if not config.getboolean('general', 'verbose'):
sys.stdout = orig_stdout
f.close()

return X_df, y_df, eval_dict
sys.exit('With LEAVE-ONE-OUT model, execution stops here.')

def single_variables(X, Y, config, scaler, clf, out_dir):
"""Model workflow when the model is based on only one single variable.
Expand Down Expand Up @@ -138,15 +136,13 @@ def single_variables(X, Y, config, scaler, clf, out_dir):

eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_svmod, clf, config)

y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred)

X_df = pd.DataFrame(X_test)
utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics')

if not config.getboolean('general', 'verbose'):
sys.stdout = orig_stdout
f.close()

return X_df, y_df, eval_dict
sys.exit('With SINGLE VARIABLE model, execution stops here.')

def dubbelsteen(X, Y, config, scaler, clf, out_dir):
"""Model workflow when the relation between variables and conflict is based on randomness.
Expand Down
6 changes: 3 additions & 3 deletions conflict_model/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ def run(X, Y, config, scaler, clf, out_dir):
if config.getint('general', 'model') == 1:
X_df, y_df, eval_dict = models.all_data(X, Y, config, scaler, clf, out_dir)
elif config.getint('general', 'model') == 2:
y_df, y_gdf, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir)
X_df, y_df, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir)
elif config.getint('general', 'model') == 3:
y_df, y_gdf, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir)
X_df, y_df, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir)
elif config.getint('general', 'model') == 4:
y_df, y_gdf, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir)
X_df, y_df, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir)
else:
raise ValueError('the specified model type in the cfg-file is invalid - specify either 1, 2, 3 or 4.')

Expand Down
1 change: 1 addition & 0 deletions conflict_model/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def plot_ROC_curve_n_times(ax, clf, X_test, y_test, tprs, aucs, mean_fpr, **kwar
list: lists with true positive rates and area-under-curve values per plot.
"""

print(len(X_test), len(y_test))
viz = metrics.plot_roc_curve(clf, X_test, y_test, ax=ax,
alpha=0.15, color='b', lw=1, label=None, **kwargs)

Expand Down
9 changes: 8 additions & 1 deletion conflict_model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ def initiate_setup(settings_file):
if config['conflict']['conflict_file'] == 'download':
download_PRIO(config)

if (config.getint('general', 'model') == 2) or (config.getint('general', 'model') == 3):
config.set('settings', 'n_runs', str(1))
print('changed nr of runs to {}'.format(config.getint('settings', 'n_runs')))

return config, out_dir

def create_artificial_Y(Y):
Expand Down Expand Up @@ -225,7 +229,10 @@ def save_to_csv(arg, out_dir, fname):
"""

if isinstance(arg, dict):
arg = pd.DataFrame().from_dict(arg)
try:
arg = pd.DataFrame().from_dict(arg)
except:
arg = pd.DataFrame().from_dict(arg, orient='index')
arg.to_csv(os.path.join(out_dir, fname + '.csv'))

return
Expand Down
7 changes: 4 additions & 3 deletions example/example_settings.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ input_dir=../example/example_data
output_dir=../example/OUT
# 1: all data; 2: leave-one-out model; 3: single variable model; 4: dubbelsteenmodel
# Note that only 1 supports sensitivity_analysis
model=1
model=3
verbose=True

[settings]
Expand All @@ -17,7 +17,8 @@ n_runs=50
[pre_calc]
# if nothing is specified, the XY array will be stored in input_dir
# if XY already pre-calculated, then provide path to npy-file
XY=
# XY=
XY=XY.npy

[extent]
shp=waterProvinces/waterProvinces_Africa.shp
Expand Down Expand Up @@ -46,5 +47,5 @@ irr_water_demand=irrWaterDemand.nc
# choose from: MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer
scaler=QuantileTransformer
# choose from: NuSVC, KNeighborsClassifier, RFClassifier
model=RFClassifier
model=KNeighborsClassifier
train_fraction=0.7
2 changes: 1 addition & 1 deletion scripts/run_script.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python runner.py ../example/example_settings.cfg
python runner.py ../example/example_settings.cfg
2 changes: 1 addition & 1 deletion scripts/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("module")


@click.group()
Expand Down

0 comments on commit fa57058

Please sign in to comment.