diff --git a/conflict_model/evaluation.py b/conflict_model/evaluation.py index 111cb54..5fe3a78 100644 --- a/conflict_model/evaluation.py +++ b/conflict_model/evaluation.py @@ -1,4 +1,5 @@ import os, sys +import warnings from sklearn import metrics import pandas as pd import geopandas as gpd @@ -290,9 +291,6 @@ def get_feature_importance(clf, out_dir, config): out_dir (str): path to output folder. If None, output is not saved. config (ConfigParser-object): object containing the parsed configuration-settings of the model. - Raises: - Warning: raised if unsupported classifier is used. - Returns: dataframe: dataframe containing feature importance. """ @@ -300,8 +298,8 @@ def get_feature_importance(clf, out_dir, config): if config.get('machine_learning', 'model') == 'RFClassifier': arr = clf.feature_importances_ else: - arr = np.empty() - raise Warning('feature importance not supported for this kind of ML model') + arr = np.zeros(len(config.items('env_vars'))) + warnings.warn('WARNING: feature importance not supported for this kind of ML model', UserWarning) dict_out = dict() for key, x in zip(config.items('env_vars'), range(len(arr))): diff --git a/conflict_model/models.py b/conflict_model/models.py index ca0f186..82d05b7 100644 --- a/conflict_model/models.py +++ b/conflict_model/models.py @@ -86,15 +86,13 @@ def leave_one_out(X, Y, config, scaler, clf, out_dir): eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_loo, clf, config) - y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred) - - X_df = pd.DataFrame(X_test) + utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics') if not config.getboolean('general', 'verbose'): sys.stdout = orig_stdout f.close() - return X_df, y_df, eval_dict + sys.exit('With LEAVE-ONE-OUT model, execution stops here.') def single_variables(X, Y, config, scaler, clf, out_dir): """Model workflow when the model is based on only one single variable. @@ -138,15 +136,13 @@ def single_variables(X, Y, config, scaler, clf, out_dir): eval_dict = evaluation.evaluate_prediction(y_test, y_pred, y_prob, X_test_svmod, clf, config) - y_df = conflict.get_pred_conflict_geometry(X_test_ID, X_test_geom, y_test, y_pred) - - X_df = pd.DataFrame(X_test) + utils.save_to_csv(eval_dict, sub_out_dir, 'evaluation_metrics') if not config.getboolean('general', 'verbose'): sys.stdout = orig_stdout f.close() - return X_df, y_df, eval_dict + sys.exit('With SINGLE VARIABLE model, execution stops here.') def dubbelsteen(X, Y, config, scaler, clf, out_dir): """Model workflow when the relation between variables and conflict is based on randomness. diff --git a/conflict_model/pipeline.py b/conflict_model/pipeline.py index 7c908ba..c458471 100644 --- a/conflict_model/pipeline.py +++ b/conflict_model/pipeline.py @@ -79,11 +79,11 @@ def run(X, Y, config, scaler, clf, out_dir): if config.getint('general', 'model') == 1: X_df, y_df, eval_dict = models.all_data(X, Y, config, scaler, clf, out_dir) elif config.getint('general', 'model') == 2: - y_df, y_gdf, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir) + X_df, y_df, eval_dict = models.leave_one_out(X, Y, config, scaler, clf, out_dir) elif config.getint('general', 'model') == 3: - y_df, y_gdf, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir) + X_df, y_df, eval_dict = models.single_variables(X, Y, config, scaler, clf, out_dir) elif config.getint('general', 'model') == 4: - y_df, y_gdf, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir) + X_df, y_df, eval_dict = models.dubbelsteen(X, Y, config, scaler, clf, out_dir) else: raise ValueError('the specified model type in the cfg-file is invalid - specify either 1, 2, 3 or 4.') diff --git a/conflict_model/plots.py b/conflict_model/plots.py index 9adf312..445e88f 100644 --- a/conflict_model/plots.py +++ b/conflict_model/plots.py @@ -153,6 +153,7 @@ def plot_ROC_curve_n_times(ax, clf, X_test, y_test, tprs, aucs, mean_fpr, **kwar list: lists with true positive rates and area-under-curve values per plot. """ + print(len(X_test), len(y_test)) viz = metrics.plot_roc_curve(clf, X_test, y_test, ax=ax, alpha=0.15, color='b', lw=1, label=None, **kwargs) diff --git a/conflict_model/utils.py b/conflict_model/utils.py index d71a7e3..e687557 100644 --- a/conflict_model/utils.py +++ b/conflict_model/utils.py @@ -152,6 +152,10 @@ def initiate_setup(settings_file): if config['conflict']['conflict_file'] == 'download': download_PRIO(config) + if (config.getint('general', 'model') == 2) or (config.getint('general', 'model') == 3): + config.set('settings', 'n_runs', str(1)) + print('changed nr of runs to {}'.format(config.getint('settings', 'n_runs'))) + return config, out_dir def create_artificial_Y(Y): @@ -225,7 +229,10 @@ def save_to_csv(arg, out_dir, fname): """ if isinstance(arg, dict): - arg = pd.DataFrame().from_dict(arg) + try: + arg = pd.DataFrame().from_dict(arg) + except: + arg = pd.DataFrame().from_dict(arg, orient='index') arg.to_csv(os.path.join(out_dir, fname + '.csv')) return diff --git a/example/example_settings.cfg b/example/example_settings.cfg index fa1e771..1c5936b 100644 --- a/example/example_settings.cfg +++ b/example/example_settings.cfg @@ -3,7 +3,7 @@ input_dir=../example/example_data output_dir=../example/OUT # 1: all data; 2: leave-one-out model; 3: single variable model; 4: dubbelsteenmodel # Note that only 1 supports sensitivity_analysis -model=1 +model=3 verbose=True [settings] @@ -17,7 +17,8 @@ n_runs=50 [pre_calc] # if nothing is specified, the XY array will be stored in input_dir # if XY already pre-calculated, then provide path to npy-file -XY= +# XY= +XY=XY.npy [extent] shp=waterProvinces/waterProvinces_Africa.shp @@ -46,5 +47,5 @@ irr_water_demand=irrWaterDemand.nc # choose from: MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer scaler=QuantileTransformer # choose from: NuSVC, KNeighborsClassifier, RFClassifier -model=RFClassifier +model=KNeighborsClassifier train_fraction=0.7 \ No newline at end of file diff --git a/scripts/run_script.sh b/scripts/run_script.sh index df9a72d..62d9d84 100644 --- a/scripts/run_script.sh +++ b/scripts/run_script.sh @@ -1 +1 @@ -python runner.py ../example/example_settings.cfg \ No newline at end of file +python runner.py ../example/example_settings.cfg diff --git a/scripts/runner.py b/scripts/runner.py index 5deab4d..00ab22d 100644 --- a/scripts/runner.py +++ b/scripts/runner.py @@ -7,7 +7,7 @@ import matplotlib.pyplot as plt import warnings -warnings.filterwarnings("ignore") +warnings.filterwarnings("module") @click.group()