debugged for model types 2 and 3

JannisHoch · Sep 30, 2020 · fa57058 · fa57058
1 parent 7ba250b
commit fa57058
Show file tree

Hide file tree

Showing 8 changed files with 25 additions and 22 deletions.
diff --git a/conflict_model/evaluation.py b/conflict_model/evaluation.py
@@ -1,4 +1,5 @@
 import os, sys
+import warnings
 from sklearn import metrics
 import pandas as pd
 import geopandas as gpd
@@ -290,18 +291,15 @@ def get_feature_importance(clf, out_dir, config):
         out_dir (str): path to output folder. If None, output is not saved.
         config (ConfigParser-object): object containing the parsed configuration-settings of the model.
 
-    Raises:
-        Warning: raised if unsupported classifier is used.
-
     Returns:
         dataframe: dataframe containing feature importance.
     """ 
 
     if config.get('machine_learning', 'model') == 'RFClassifier':
         arr = clf.feature_importances_
     else:
-        arr = np.empty()
-        raise Warning('feature importance not supported for this kind of ML model')
+        arr = np.zeros(len(config.items('env_vars')))
+        warnings.warn('WARNING: feature importance not supported for this kind of ML model', UserWarning)
 
     dict_out = dict()
     for key, x in zip(config.items('env_vars'), range(len(arr))):

diff --git a/conflict_model/models.py b/conflict_model/models.py
@@ -86,15 +86,13 @@ def leave_one_out(X, Y, config, scaler, clf, out_dir):
 
         eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_loo, clf, config)
 
-        y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred)
-
-        X_df = pd.DataFrame(X_test)
+        utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics')
 
     if not config.getboolean('general', 'verbose'):
         sys.stdout = orig_stdout
         f.close()
 
-    return X_df, y_df, eval_dict
+    sys.exit('With LEAVE-ONE-OUT model, execution stops here.')
 
 def single_variables(X, Y, config, scaler, clf, out_dir):
     """Model workflow when the model is based on only one single variable.
@@ -138,15 +136,13 @@ def single_variables(X, Y, config, scaler, clf, out_dir):
 
         eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_svmod, clf, config)
 
-        y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred)
-
-        X_df = pd.DataFrame(X_test)
+        utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics')
 
     if not config.getboolean('general', 'verbose'):
         sys.stdout = orig_stdout
         f.close()
 
-    return X_df, y_df, eval_dict
+    sys.exit('With SINGLE VARIABLE model, execution stops here.')
 
 def dubbelsteen(X, Y, config, scaler, clf, out_dir):
     """Model workflow when the relation between variables and conflict is based on randomness.

diff --git a/conflict_model/pipeline.py b/conflict_model/pipeline.py
@@ -79,11 +79,11 @@ def run(X, Y, config, scaler, clf, out_dir):
     if config.getint('general', 'model') == 1:
         X_df, y_df, eval_dict = models.all_data(X, Y, config, scaler, clf, out_dir)
     elif config.getint('general', 'model') == 2:
-        y_df, y_gdf, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir)
+        X_df, y_df, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir)
     elif config.getint('general', 'model') == 3:
-        y_df, y_gdf, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir)
+        X_df, y_df, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir)
     elif config.getint('general', 'model') == 4:
-        y_df, y_gdf, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir)
+        X_df, y_df, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir)
     else:
         raise ValueError('the specified model type in the cfg-file is invalid - specify either 1, 2, 3 or 4.')
 

diff --git a/conflict_model/plots.py b/conflict_model/plots.py
@@ -153,6 +153,7 @@ def plot_ROC_curve_n_times(ax, clf, X_test, y_test, tprs, aucs, mean_fpr, **kwar
         list: lists with true positive rates and area-under-curve values per plot.
     """    
 
+    print(len(X_test), len(y_test))
     viz = metrics.plot_roc_curve(clf, X_test, y_test, ax=ax,
                             	 alpha=0.15, color='b', lw=1, label=None, **kwargs)
 

diff --git a/conflict_model/utils.py b/conflict_model/utils.py
@@ -152,6 +152,10 @@ def initiate_setup(settings_file):
     if config['conflict']['conflict_file'] == 'download':
         download_PRIO(config)
 
+    if (config.getint('general', 'model') == 2) or (config.getint('general', 'model') == 3):
+        config.set('settings', 'n_runs', str(1))
+        print('changed nr of runs to {}'.format(config.getint('settings', 'n_runs')))
+
     return config, out_dir
 
 def create_artificial_Y(Y):
@@ -225,7 +229,10 @@ def save_to_csv(arg, out_dir, fname):
     """    
 
     if isinstance(arg, dict):
-        arg = pd.DataFrame().from_dict(arg)
+        try:
+            arg = pd.DataFrame().from_dict(arg)
+        except:
+            arg = pd.DataFrame().from_dict(arg, orient='index')
     arg.to_csv(os.path.join(out_dir, fname + '.csv'))
 
     return

diff --git a/example/example_settings.cfg b/example/example_settings.cfg
@@ -3,7 +3,7 @@ input_dir=../example/example_data
 output_dir=../example/OUT
 # 1: all data; 2: leave-one-out model; 3: single variable model; 4: dubbelsteenmodel
 # Note that only 1 supports sensitivity_analysis
-model=1
+model=3
 verbose=True
 
 [settings]
@@ -17,7 +17,8 @@ n_runs=50
 [pre_calc]
 # if nothing is specified, the XY array will be stored in input_dir
 # if XY already pre-calculated, then provide path to npy-file
-XY=
+# XY=
+XY=XY.npy
 
 [extent]
 shp=waterProvinces/waterProvinces_Africa.shp
@@ -46,5 +47,5 @@ irr_water_demand=irrWaterDemand.nc
 # choose from: MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer
 scaler=QuantileTransformer
 # choose from: NuSVC, KNeighborsClassifier, RFClassifier
-model=RFClassifier
+model=KNeighborsClassifier
 train_fraction=0.7
diff --git a/scripts/run_script.sh b/scripts/run_script.sh
@@ -1 +1 @@
-python runner.py ../example/example_settings.cfg
+python runner.py ../example/example_settings.cfg
diff --git a/scripts/runner.py b/scripts/runner.py
@@ -7,7 +7,7 @@
 import matplotlib.pyplot as plt
 
 import warnings
-warnings.filterwarnings("ignore")
+warnings.filterwarnings("module")
 
 
 @click.group()
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		python runner.py ../example/example_settings.cfg
		python runner.py ../example/example_settings.cfg