From c3b4c21b505866c413ecec5f96c5f0f1b3809e2d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 5 May 2021 11:55:42 +0200
Subject: [PATCH 01/15] initial configurations added

---
 autoPyTorch/optimizer/greedy_portfolio.json | 509 ++++++++++++++++++++
 autoPyTorch/optimizer/smbo.py               |  27 +-
 2 files changed, 533 insertions(+), 3 deletions(-)
 create mode 100644 autoPyTorch/optimizer/greedy_portfolio.json

diff --git a/autoPyTorch/optimizer/greedy_portfolio.json b/autoPyTorch/optimizer/greedy_portfolio.json
new file mode 100644
index 000000000..a8e640a4e
--- /dev/null
+++ b/autoPyTorch/optimizer/greedy_portfolio.json
@@ -0,0 +1,509 @@
+[{"data_loader:batch_size": 60,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "MixUpTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 290,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 6,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": true,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.00020060142055000713,
+ "optimizer:AdamOptimizer:weight_decay": 0.0018320003468984575,
+ "trainer:MixUpTrainer:alpha": 0.8448753109694546,
+ "trainer:MixUpTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedMLPBackbone:max_dropout": 0.023271935735825866},
+ {"data_loader:batch_size": 255,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 2,
+ "network_backbone:ShapedResNetBackbone:max_units": 41,
+ "network_backbone:ShapedResNetBackbone:num_groups": 4,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.011526647986073339,
+ "optimizer:AdamOptimizer:weight_decay": 0.031290291410446765,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_backbone:ShapedResNetBackbone:max_shake_drop_probability": 0.30409463597128383,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.7662454727603789},
+ {"data_loader:batch_size": 165,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "SGDOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 3,
+ "network_backbone:ShapedResNetBackbone:max_units": 438,
+ "network_backbone:ShapedResNetBackbone:num_groups": 1,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:SGDOptimizer:lr": 0.07331878780908542,
+ "optimizer:SGDOptimizer:momentum": 0.44665514022476815,
+ "optimizer:SGDOptimizer:weight_decay": 0.006911333726469374,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 299,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 2,
+ "network_backbone:ShapedResNetBackbone:max_units": 279,
+ "network_backbone:ShapedResNetBackbone:num_groups": 2,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0007471732018616978,
+ "optimizer:AdamOptimizer:weight_decay": 0.0005438753720314742,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 183,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 2,
+ "network_backbone:ShapedResNetBackbone:max_units": 354,
+ "network_backbone:ShapedResNetBackbone:num_groups": 1,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": true,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.001780112494729604,
+ "optimizer:AdamOptimizer:weight_decay": 0.004224029178574147,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_backbone:ShapedResNetBackbone:max_shake_drop_probability": 0.4412292309825137,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.27204101593048097},
+ {"data_loader:batch_size": 21,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 201,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 3,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.015232504956212976,
+ "optimizer:AdamOptimizer:weight_decay": 9.906036909600088e-05,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 159,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "TruncatedSVD",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "feature_preprocessor:TruncatedSVD:target_dim": 151,
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 966,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 5,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0007790465032701478,
+ "optimizer:AdamOptimizer:weight_decay": 0.0016722444122252624,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 442,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "TruncatedSVD",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "feature_preprocessor:TruncatedSVD:target_dim": 115,
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 3,
+ "network_backbone:ShapedResNetBackbone:max_units": 467,
+ "network_backbone:ShapedResNetBackbone:num_groups": 1,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0008298747674239372,
+ "optimizer:AdamOptimizer:weight_decay": 0.0067071038164946365,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 140,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "TruncatedSVD",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "feature_preprocessor:TruncatedSVD:target_dim": 240,
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 1,
+ "network_backbone:ShapedResNetBackbone:max_units": 423,
+ "network_backbone:ShapedResNetBackbone:num_groups": 3,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": true,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0006164392898567234,
+ "optimizer:AdamOptimizer:weight_decay": 0.006605449457495538,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 48,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "SGDOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 529,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 3,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:SGDOptimizer:lr": 0.020107910011636462,
+ "optimizer:SGDOptimizer:momentum": 0.5818716367708677,
+ "optimizer:SGDOptimizer:weight_decay": 0.003995594064278902,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 168,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 1,
+ "network_backbone:ShapedResNetBackbone:max_units": 349,
+ "network_backbone:ShapedResNetBackbone:num_groups": 3,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0019867054473724295,
+ "optimizer:AdamOptimizer:weight_decay": 0.0067889732830148704,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.8992826006547855},
+ {"data_loader:batch_size": 21,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 278,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 3,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.001178107244651597,
+ "optimizer:AdamOptimizer:weight_decay": 0.010815452216436712,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 163,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 1,
+ "network_backbone:ShapedResNetBackbone:max_units": 171,
+ "network_backbone:ShapedResNetBackbone:num_groups": 3,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": false,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.002654293880282279,
+ "optimizer:AdamOptimizer:weight_decay": 0.010374059713414468,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.6341848343636569},
+ {"data_loader:batch_size": 150,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "NoFeaturePreprocessor",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 1,
+ "network_backbone:ShapedResNetBackbone:max_units": 314,
+ "network_backbone:ShapedResNetBackbone:num_groups": 1,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.003106362796390374,
+ "optimizer:AdamOptimizer:weight_decay": 0.010492136888557045,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_backbone:ShapedResNetBackbone:max_shake_drop_probability": 0.2808341606307928,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.7133813761319248},
+ {"data_loader:batch_size": 151,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "TruncatedSVD",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedMLPBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "feature_preprocessor:TruncatedSVD:target_dim": 147,
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedMLPBackbone:activation": "relu",
+ "network_backbone:ShapedMLPBackbone:max_units": 313,
+ "network_backbone:ShapedMLPBackbone:mlp_shape": "funnel",
+ "network_backbone:ShapedMLPBackbone:num_groups": 3,
+ "network_backbone:ShapedMLPBackbone:output_dim": 200,
+ "network_backbone:ShapedMLPBackbone:use_dropout": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0013791902792817097,
+ "optimizer:AdamOptimizer:weight_decay": 0.0016536079820230513,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128},
+ {"data_loader:batch_size": 42,
+ "encoder:__choice__": "OneHotEncoder",
+ "feature_preprocessor:__choice__": "TruncatedSVD",
+ "imputer:categorical_strategy": "most_frequent",
+ "imputer:numerical_strategy": "mean",
+ "lr_scheduler:__choice__": "CosineAnnealingLR",
+ "network_backbone:__choice__": "ShapedResNetBackbone",
+ "network_embedding:__choice__": "NoEmbedding",
+ "network_head:__choice__": "fully_connected",
+ "network_init:__choice__": "XavierInit",
+ "optimizer:__choice__": "AdamOptimizer",
+ "scaler:__choice__": "StandardScaler",
+ "trainer:__choice__": "StandardTrainer",
+ "feature_preprocessor:TruncatedSVD:target_dim": 151,
+ "lr_scheduler:CosineAnnealingLR:T_max": 50,
+ "network_backbone:ShapedResNetBackbone:activation": "relu",
+ "network_backbone:ShapedResNetBackbone:blocks_per_group": 2,
+ "network_backbone:ShapedResNetBackbone:max_units": 86,
+ "network_backbone:ShapedResNetBackbone:num_groups": 3,
+ "network_backbone:ShapedResNetBackbone:output_dim": 200,
+ "network_backbone:ShapedResNetBackbone:resnet_shape": "funnel",
+ "network_backbone:ShapedResNetBackbone:use_dropout": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_drop": true,
+ "network_backbone:ShapedResNetBackbone:use_shake_shake": false,
+ "network_head:fully_connected:num_layers": 2,
+ "network_init:XavierInit:bias_strategy": "Zero",
+ "optimizer:AdamOptimizer:beta1": 0.9,
+ "optimizer:AdamOptimizer:beta2": 0.9,
+ "optimizer:AdamOptimizer:lr": 0.0021530021937535334,
+ "optimizer:AdamOptimizer:weight_decay": 0.008386657635007597,
+ "trainer:StandardTrainer:weighted_loss": true,
+ "network_backbone:ShapedResNetBackbone:max_shake_drop_probability": 0.034431265307095615,
+ "network_head:fully_connected:activation": "relu",
+ "network_head:fully_connected:units_layer_1": 128,
+ "network_backbone:ShapedResNetBackbone:max_dropout": 0.6296079567189131}]
\ No newline at end of file
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index ddd6e95a1..092550175 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -4,6 +4,7 @@
 import typing
 
 import ConfigSpace
+from ConfigSpace.configuration_space import Configuration
 
 import dask.distributed
 
@@ -40,6 +41,7 @@ def get_smac_object(
     initial_budget: int,
     max_budget: int,
     dask_client: typing.Optional[dask.distributed.Client],
+    initial_configurations: typing.Optional[typing.List[Configuration]] = None,
 ) -> SMAC4AC:
     """
     This function returns an SMAC object that is gonna be used as
@@ -53,6 +55,8 @@ def get_smac_object(
         ta_kwargs (typing.Dict[str, typing.Any]): Arguments to the above ta
         n_jobs (int): Amount of cores to use for this task
         dask_client (dask.distributed.Client): User provided scheduler
+        initial_configurations (typing.List[Configuration]): List of initial
+            configurations which smac will run before starting the search process
 
     Returns:
         (SMAC4AC): sequential model algorithm configuration object
@@ -67,7 +71,7 @@ def get_smac_object(
         runhistory2epm=rh2EPM,
         tae_runner=ta,
         tae_runner_kwargs=ta_kwargs,
-        initial_configurations=None,
+        initial_configurations=initial_configurations,
         run_id=seed,
         intensifier=intensifier,
         intensifier_kwargs={'initial_budget': initial_budget, 'max_budget': max_budget,
@@ -205,6 +209,21 @@ def __init__(self,
                                               port=self.logger_port)
         self.logger.info("initialised {}".format(self.__class__.__name__))
 
+        # read and validate initial configurations
+        with open('greedy_portfolio.json', 'r') as fp:
+            initial_configurations = json.load(fp)
+
+        self.initial_configurations: typing.List[Configuration] = list()
+        for configuration_dict in initial_configurations:
+            try:
+                configuration = Configuration(self.config_space, configuration_dict)
+                self.initial_configurations.append(configuration)
+            except Exception as e:
+                self.logger.warning(f"Failed to convert {configuration_dict} into"
+                                    f" a Configuration with error {e.msg[0]}. "
+                                    f"Therefore, it can't be used as an initial "
+                                    f"configuration as it does not match the current config space. ")
+
     def reset_data_manager(self) -> None:
         if self.datamanager is not None:
             del self.datamanager
@@ -314,7 +333,8 @@ def run_smbo(self, func: typing.Optional[typing.Callable] = None
                                                  n_jobs=self.n_jobs,
                                                  initial_budget=initial_budget,
                                                  max_budget=max_budget,
-                                                 dask_client=self.dask_client)
+                                                 dask_client=self.dask_client,
+                                                 initial_configurations=self.initial_configurations)
         else:
             smac = get_smac_object(scenario_dict=scenario_dict,
                                    seed=seed,
@@ -323,7 +343,8 @@ def run_smbo(self, func: typing.Optional[typing.Callable] = None
                                    n_jobs=self.n_jobs,
                                    initial_budget=initial_budget,
                                    max_budget=max_budget,
-                                   dask_client=self.dask_client)
+                                   dask_client=self.dask_client,
+                                   initial_configurations=self.initial_configurations)
 
         if self.ensemble_callback is not None:
             smac.register_callback(self.ensemble_callback)

From 59e4536d576fbedf7445caebb00e194b349ad8b8 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Thu, 6 May 2021 12:04:01 +0200
Subject: [PATCH 02/15] In progress, adding flag in search function

---
 autoPyTorch/api/tabular_classification.py | 3 +++
 autoPyTorch/optimizer/smbo.py             | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index c7b77c4d0..84c88b19a 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -131,6 +131,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        run_greedy_portfolio: bool = False
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -199,6 +200,8 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
+            run_greedy_portfolio (bool), (default=False): If True,
+                runs initial
 
         Returns:
             self
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 092550175..c40453050 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -1,6 +1,7 @@
 import copy
 import json
 import logging.handlers
+import os
 import typing
 
 import ConfigSpace
@@ -210,8 +211,7 @@ def __init__(self,
         self.logger.info("initialised {}".format(self.__class__.__name__))
 
         # read and validate initial configurations
-        with open('greedy_portfolio.json', 'r') as fp:
-            initial_configurations = json.load(fp)
+        initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
 
         self.initial_configurations: typing.List[Configuration] = list()
         for configuration_dict in initial_configurations:
@@ -220,7 +220,7 @@ def __init__(self,
                 self.initial_configurations.append(configuration)
             except Exception as e:
                 self.logger.warning(f"Failed to convert {configuration_dict} into"
-                                    f" a Configuration with error {e.msg[0]}. "
+                                    f" a Configuration with error {e}. "
                                     f"Therefore, it can't be used as an initial "
                                     f"configuration as it does not match the current config space. ")
 

From af8fda673f62cabbe34f517d87437e192abdef1f Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 7 May 2021 14:11:58 +0200
Subject: [PATCH 03/15] Adds documentation, example and fixes setup.py

---
 autoPyTorch/api/base_task.py                  | 11 ++-
 autoPyTorch/api/tabular_classification.py     |  7 +-
 autoPyTorch/api/tabular_regression.py         |  9 ++-
 autoPyTorch/optimizer/smbo.py                 | 29 ++++----
 .../40_advanced/example_run_with_portfolio.py | 71 +++++++++++++++++++
 setup.py                                      |  7 +-
 6 files changed, 115 insertions(+), 19 deletions(-)
 create mode 100644 examples/tabular/40_advanced/example_run_with_portfolio.py

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 56d849205..a772f2223 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -702,6 +702,7 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        run_greedy_portfolio: bool = False
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -772,7 +773,12 @@ def _search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-
+            run_greedy_portfolio (bool), (default=False): If True,
+                runs initial configurations present in
+                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
         Returns:
             self
 
@@ -957,7 +963,8 @@ def _search(
                 # We do not increase the num_run here, this is something
                 # smac does internally
                 start_num_run=self._backend.get_next_num_run(peek=True),
-                search_space_updates=self.search_space_updates
+                search_space_updates=self.search_space_updates,
+                run_greedy_portfolio=run_greedy_portfolio
             )
             try:
                 run_history, self.trajectory, budget_type = \
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 84c88b19a..84ccbb312 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -201,7 +201,11 @@ def search(
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
             run_greedy_portfolio (bool), (default=False): If True,
-                runs initial
+                runs initial configurations present in
+                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
 
         Returns:
             self
@@ -248,6 +252,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
+            run_greedy_portfolio=run_greedy_portfolio
         )
 
     def predict(
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 098f2c506..dc6391105 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -123,6 +123,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        run_greedy_portfolio: bool = False
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -187,7 +188,12 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-
+            run_greedy_portfolio (bool), (default=False): If True,
+                            runs initial configurations present in
+                            'autoPyTorch/optimizer/greedy_portfolio.json'.
+                            These configurations are the best performing configurations
+                            when search was performed on meta training datasets.
+                            For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
         Returns:
             self
 
@@ -233,6 +239,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
+            run_greedy_portfolio=run_greedy_portfolio
         )
 
     def predict(
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index c40453050..548726ad9 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -108,7 +108,8 @@ def __init__(self,
                  all_supported_metrics: bool = True,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
-                 search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None
+                 search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
+                 run_greedy_portfolio: bool = False
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -157,7 +158,9 @@ def __init__(self,
                 Allows to create a user specified SMAC object
             ensemble_callback (typing.Optional[EnsembleBuilderManager]):
                 A callback used in this scenario to start ensemble building subtasks
-
+            run_greedy_portfolio (bool), (default=False): If True,
+                runs initial configurations present in
+                'autoPyTorch/optimizer/greedy_portfolio.json'.
         """
         super(AutoMLSMBO, self).__init__()
         # data related
@@ -213,16 +216,18 @@ def __init__(self,
         # read and validate initial configurations
         initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
 
-        self.initial_configurations: typing.List[Configuration] = list()
-        for configuration_dict in initial_configurations:
-            try:
-                configuration = Configuration(self.config_space, configuration_dict)
-                self.initial_configurations.append(configuration)
-            except Exception as e:
-                self.logger.warning(f"Failed to convert {configuration_dict} into"
-                                    f" a Configuration with error {e}. "
-                                    f"Therefore, it can't be used as an initial "
-                                    f"configuration as it does not match the current config space. ")
+        self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
+        if run_greedy_portfolio:
+            self.initial_configurations = list()
+            for configuration_dict in initial_configurations:
+                try:
+                    configuration = Configuration(self.config_space, configuration_dict)
+                    self.initial_configurations.append(configuration)
+                except Exception as e:
+                    self.logger.warning(f"Failed to convert {configuration_dict} into"
+                                        f" a Configuration with error {e}. "
+                                        f"Therefore, it can't be used as an initial "
+                                        f"configuration as it does not match the current config space. ")
 
     def reset_data_manager(self) -> None:
         if self.datamanager is not None:
diff --git a/examples/tabular/40_advanced/example_run_with_portfolio.py b/examples/tabular/40_advanced/example_run_with_portfolio.py
new file mode 100644
index 000000000..66312a708
--- /dev/null
+++ b/examples/tabular/40_advanced/example_run_with_portfolio.py
@@ -0,0 +1,71 @@
+"""
+============================================
+Tabular Classification with Greedy Portfolio
+============================================
+
+The following example shows how to fit a sample classification model
+with AutoPyTorch using the greedy portfolio
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+
+
+if __name__ == '__main__':
+
+    ############################################################################
+    # Data Loading
+    # ============
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=42,
+    )
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+    api = TabularClassificationTask(
+        seed=42,
+    )
+
+    ############################################################################
+    # Search for an ensemble of machine learning algorithms
+    # =====================================================
+    api.search(
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test.copy(),
+        y_test=y_test.copy(),
+        optimize_metric='accuracy',
+        total_walltime_limit=300,
+        func_eval_time_limit_secs=50,
+        # Setting this option to True
+        # will make smac run the configurations
+        # present in 'autoPyTorch/optimizer/greedy_portfolio.json'
+        run_greedy_portfolio=True
+    )
+
+    ############################################################################
+    # Print the final ensemble performance
+    # ====================================
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
+    # Print the final ensemble built by AutoPyTorch
+    print(api.show_models())
diff --git a/setup.py b/setup.py
index 4fd732fdd..74c730b07 100755
--- a/setup.py
+++ b/setup.py
@@ -11,10 +11,10 @@
 # noinspection PyInterpreter
 setuptools.setup(
     name="autoPyTorch",
-    version="0.0.3",
+    version="0.1.0",
     author="AutoML Freiburg",
     author_email="zimmerl@informatik.uni-freiburg.de",
-    description=("Auto-PyTorch searches neural architectures using BO-HB"),
+    description=("Auto-PyTorch searches neural architectures using smac"),
     long_description=long_description,
     url="https://github.com/automl/Auto-PyTorch",
     long_description_content_type="text/markdown",
@@ -59,5 +59,6 @@
         "docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"],
     },
     test_suite="pytest",
-    data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json'])]
+    data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json']),
+                ('portfolio', ['autoPyTorch/optimizer/greedy_portfolio.json'])]
 )

From 9929fb0da541e6feab6e95e552c0167f86cccb33 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 10 May 2021 14:31:43 +0200
Subject: [PATCH 04/15] Address comments from shuhei, change run_greedy to
 portfolio_selection

---
 autoPyTorch/api/base_task.py                  | 11 ++++++----
 autoPyTorch/api/tabular_classification.py     | 11 ++++++----
 autoPyTorch/api/tabular_regression.py         | 19 ++++++++++--------
 autoPyTorch/optimizer/smbo.py                 |  7 ++++---
 autoPyTorch/pipeline/base_pipeline.py         |  3 +++
 autoPyTorch/pipeline/image_classification.py  | 13 +++++++++++-
 .../pipeline/tabular_classification.py        | 13 +++++++++++-
 autoPyTorch/pipeline/tabular_regression.py    | 13 +++++++++++-
 .../traditional_tabular_classification.py     | 20 ++++++++++++++++---
 .../40_advanced/example_run_with_portfolio.py |  4 ++--
 10 files changed, 87 insertions(+), 27 deletions(-)
 rename examples/{tabular => }/40_advanced/example_run_with_portfolio.py (96%)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index a772f2223..9185b204f 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -121,6 +121,9 @@ class BaseTask:
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -702,7 +705,7 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -773,12 +776,12 @@ def _search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -964,7 +967,7 @@ def _search(
                 # smac does internally
                 start_num_run=self._backend.get_next_num_run(peek=True),
                 search_space_updates=self.search_space_updates,
-                run_greedy_portfolio=run_greedy_portfolio
+                portfolio_selection=portfolio_selection
             )
             try:
                 run_history, self.trajectory, budget_type = \
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 84ccbb312..22eb39c4a 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -57,6 +57,9 @@ class TabularClassificationTask(BaseTask):
             If None, all possible components are used. Otherwise
             specifies set of components not to use. Incompatible
             with include components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
     def __init__(
         self,
@@ -131,7 +134,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -200,12 +203,12 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
 
         Returns:
             self
@@ -252,7 +255,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            run_greedy_portfolio=run_greedy_portfolio
+            portfolio_selection=portfolio_selection
         )
 
     def predict(
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index dc6391105..2f1fd0619 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -48,6 +48,9 @@ class TabularRegressionTask(BaseTask):
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -123,7 +126,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -188,12 +191,12 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
-                            runs initial configurations present in
-                            'autoPyTorch/optimizer/greedy_portfolio.json'.
-                            These configurations are the best performing configurations
-                            when search was performed on meta training datasets.
-                            For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+            portfolio_selection (str), (default="none"): If "greedy",
+                runs initial configurations present in
+                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -239,7 +242,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            run_greedy_portfolio=run_greedy_portfolio
+            portfolio_selection=portfolio_selection
         )
 
     def predict(
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 548726ad9..9b85fd0b7 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -109,7 +109,7 @@ def __init__(self,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
                  search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
-                 run_greedy_portfolio: bool = False
+                 portfolio_selection: str = "none"
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -158,7 +158,7 @@ def __init__(self,
                 Allows to create a user specified SMAC object
             ensemble_callback (typing.Optional[EnsembleBuilderManager]):
                 A callback used in this scenario to start ensemble building subtasks
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
         """
@@ -217,7 +217,8 @@ def __init__(self,
         initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
 
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
-        if run_greedy_portfolio:
+        assert portfolio_selection in ['none', 'greedy']
+        if portfolio_selection == "greedy":
             self.initial_configurations = list()
             for configuration_dict in initial_configurations:
                 try:
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
index fc086c902..4eb6a5213 100644
--- a/autoPyTorch/pipeline/base_pipeline.py
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -41,6 +41,9 @@ class BasePipeline(Pipeline):
         random_state (np.random.RandomState): allows to produce reproducible results by
             setting a seed for randomized settings
         init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
 
     Attributes:
diff --git a/autoPyTorch/pipeline/image_classification.py b/autoPyTorch/pipeline/image_classification.py
index b31c8dbf2..bf15d738a 100644
--- a/autoPyTorch/pipeline/image_classification.py
+++ b/autoPyTorch/pipeline/image_classification.py
@@ -40,7 +40,18 @@ class ImageClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
index bb4cb10ac..ef57a8569 100644
--- a/autoPyTorch/pipeline/tabular_classification.py
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -60,7 +60,18 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples
diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py
index af8702695..2650868b6 100644
--- a/autoPyTorch/pipeline/tabular_regression.py
+++ b/autoPyTorch/pipeline/tabular_regression.py
@@ -58,7 +58,18 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples
diff --git a/autoPyTorch/pipeline/traditional_tabular_classification.py b/autoPyTorch/pipeline/traditional_tabular_classification.py
index 51d8e6616..49be2a1fa 100644
--- a/autoPyTorch/pipeline/traditional_tabular_classification.py
+++ b/autoPyTorch/pipeline/traditional_tabular_classification.py
@@ -10,6 +10,7 @@
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
 from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
 class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
@@ -19,7 +20,19 @@ class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
+
 
     Attributes:
     """
@@ -32,11 +45,12 @@ def __init__(
         include: Optional[Dict[str, Any]] = None,
         exclude: Optional[Dict[str, Any]] = None,
         random_state: Optional[np.random.RandomState] = None,
-        init_params: Optional[Dict[str, Any]] = None
+        init_params: Optional[Dict[str, Any]] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
     ):
         super().__init__(
             config, steps, dataset_properties, include, exclude,
-            random_state, init_params)
+            random_state, init_params, search_space_updates)
 
     def predict(self, X: np.ndarray, batch_size: Optional[int] = None
                 ) -> np.ndarray:
diff --git a/examples/tabular/40_advanced/example_run_with_portfolio.py b/examples/40_advanced/example_run_with_portfolio.py
similarity index 96%
rename from examples/tabular/40_advanced/example_run_with_portfolio.py
rename to examples/40_advanced/example_run_with_portfolio.py
index 66312a708..f314788ba 100644
--- a/examples/tabular/40_advanced/example_run_with_portfolio.py
+++ b/examples/40_advanced/example_run_with_portfolio.py
@@ -54,10 +54,10 @@
         optimize_metric='accuracy',
         total_walltime_limit=300,
         func_eval_time_limit_secs=50,
-        # Setting this option to True
+        # Setting this option to "greedy"
         # will make smac run the configurations
         # present in 'autoPyTorch/optimizer/greedy_portfolio.json'
-        run_greedy_portfolio=True
+        portfolio_selection="greedy"
     )
 
     ############################################################################

From c14e537b16f8925a363abb67a8f88f9b6f0a47a0 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Mon, 17 May 2021 23:49:35 +0200
Subject: [PATCH 05/15] address comments from fransisco, movie portfolio to
 configs

---
 autoPyTorch/api/base_task.py                             | 2 +-
 autoPyTorch/api/tabular_classification.py                | 2 +-
 autoPyTorch/api/tabular_regression.py                    | 2 +-
 autoPyTorch/{optimizer => configs}/greedy_portfolio.json | 0
 autoPyTorch/optimizer/smbo.py                            | 5 +++--
 examples/40_advanced/example_run_with_portfolio.py       | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)
 rename autoPyTorch/{optimizer => configs}/greedy_portfolio.json (100%)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 9185b204f..1b3adff56 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -778,7 +778,7 @@ def _search(
                 models after fitting AutoPyTorch.
             portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
-                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                'autoPyTorch/configs/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 22eb39c4a..f016f8ee1 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -205,7 +205,7 @@ def search(
                 models after fitting AutoPyTorch.
             portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
-                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                'autoPyTorch/configs/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 2f1fd0619..e6f67c0b5 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -193,7 +193,7 @@ def search(
                 models after fitting AutoPyTorch.
             portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
-                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                'autoPyTorch/configs/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
diff --git a/autoPyTorch/optimizer/greedy_portfolio.json b/autoPyTorch/configs/greedy_portfolio.json
similarity index 100%
rename from autoPyTorch/optimizer/greedy_portfolio.json
rename to autoPyTorch/configs/greedy_portfolio.json
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 9b85fd0b7..8c00120a6 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -160,7 +160,7 @@ def __init__(self,
                 A callback used in this scenario to start ensemble building subtasks
             portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
-                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                'autoPyTorch/configs/greedy_portfolio.json'.
         """
         super(AutoMLSMBO, self).__init__()
         # data related
@@ -214,7 +214,8 @@ def __init__(self,
         self.logger.info("initialised {}".format(self.__class__.__name__))
 
         # read and validate initial configurations
-        initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
+        initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__),
+                                                             '../configs/greedy_portfolio.json')))
 
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
         assert portfolio_selection in ['none', 'greedy']
diff --git a/examples/40_advanced/example_run_with_portfolio.py b/examples/40_advanced/example_run_with_portfolio.py
index f314788ba..4109a2378 100644
--- a/examples/40_advanced/example_run_with_portfolio.py
+++ b/examples/40_advanced/example_run_with_portfolio.py
@@ -56,7 +56,7 @@
         func_eval_time_limit_secs=50,
         # Setting this option to "greedy"
         # will make smac run the configurations
-        # present in 'autoPyTorch/optimizer/greedy_portfolio.json'
+        # present in 'autoPyTorch/configs/greedy_portfolio.json'
         portfolio_selection="greedy"
     )
 

From 21284fc5da3a4921c8d3c0ae85f0487c9a35787e Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 19 May 2021 16:13:07 +0200
Subject: [PATCH 06/15] Address comments from fransisco, add tests for greedy
 portfolio and tests

---
 autoPyTorch/api/base_task.py              | 11 +++++-
 autoPyTorch/api/tabular_classification.py | 46 ++++++++++++----------
 autoPyTorch/api/tabular_regression.py     |  9 ++++-
 autoPyTorch/optimizer/smbo.py             | 14 ++++---
 test/test_api/test_api.py                 | 48 +++++++++++++++++++++++
 5 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 1b3adff56..afad47ca3 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -705,7 +705,8 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none"
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -782,6 +783,8 @@ def _search(
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
         Returns:
             self
 
@@ -790,6 +793,9 @@ def _search(
             raise ValueError("Incompatible dataset entered for current task,"
                              "expected dataset to have task type :{} got "
                              ":{}".format(self.task_type, dataset.task_type))
+        if portfolio_selection not in ["none", "greedy"]:
+            raise ValueError("Expected portfolio_selection to be in ['none', 'greedy']"
+                             "got {}".format(portfolio_selection))
 
         # Initialise information needed for the experiment
         experiment_task_name: str = 'runSearch'
@@ -967,7 +973,8 @@ def _search(
                 # smac does internally
                 start_num_run=self._backend.get_next_num_run(peek=True),
                 search_space_updates=self.search_space_updates,
-                portfolio_selection=portfolio_selection
+                portfolio_selection=portfolio_selection,
+                portfolio_path=portfolio_path
             )
             try:
                 run_history, self.trajectory, budget_type = \
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index f016f8ee1..68e347b55 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -134,7 +134,8 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none"
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -147,8 +148,8 @@ def search(
                 A pair of features (X_train) and targets (y_train) used to fit a
                 pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
                 be provided to track the generalization performance of each stage.
-            optimize_metric (str): name of the metric that is used to
-                evaluate a pipeline.
+            optimize_metric (str):
+                name of the metric that is used to evaluate a pipeline.
             budget_type (Optional[str]):
                 Type of budget to be used when fitting the pipeline.
                 Either 'epochs' or 'runtime'. If not provided, uses
@@ -156,12 +157,12 @@ def search(
             budget (Optional[float]):
                 Budget to fit a single run of the pipeline. If not
                 provided, uses the default in the pipeline config
-            total_walltime_limit (int), (default=100): Time limit
-                in seconds for the search of appropriate models.
+            total_walltime_limit (int), (default=100):
+                Time limit in seconds for the search of appropriate models.
                 By increasing this value, autopytorch has a higher
                 chance of finding better models.
-            func_eval_time_limit_secs (int), (default=None): Time limit
-                for a single call to the machine learning model.
+            func_eval_time_limit_secs (int), (default=None):
+                Time limit for a single call to the machine learning model.
                 Model fitting will be terminated if the machine
                 learning algorithm runs over the time limit. Set
                 this value high enough so that typical machine
@@ -178,38 +179,40 @@ def search(
                 feature by turning this flag to False. All machine learning
                 algorithms that are fitted during search() are considered for
                 ensemble building.
-            memory_limit (Optional[int]), (default=4096): Memory
-                limit in MB for the machine learning algorithm. autopytorch
+            memory_limit (Optional[int]), (default=4096):
+                Memory limit in MB for the machine learning algorithm. autopytorch
                 will stop fitting the machine learning algorithm if it tries
                 to allocate more than memory_limit MB. If None is provided,
                 no memory limit is set. In case of multi-processing, memory_limit
                 will be per job. This memory limit also applies to the ensemble
                 creation process.
-            smac_scenario_args (Optional[Dict]): Additional arguments inserted
-                into the scenario of SMAC. See the
+            smac_scenario_args (Optional[Dict]):
+                Additional arguments inserted into the scenario of SMAC. See the
                 [SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
-            get_smac_object_callback (Optional[Callable]): Callback function
-                to create an object of class
+            get_smac_object_callback (Optional[Callable]):
+                Callback function to create an object of class
                 [smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
                 The function must accept the arguments scenario_dict,
                 instances, num_params, runhistory, seed and ta. This is
                 an advanced feature. Use only if you are familiar with
                 [SMAC](https://automl.github.io/SMAC3/master/index.html).
-            all_supported_metrics (bool), (default=True): if True, all
-                metrics supporting current task will be calculated
+            all_supported_metrics (bool), (default=True):
+                if True, all metrics supporting current task will be calculated
                 for each pipeline and results will be available via cv_results
             precision (int), (default=32): Numeric precision used when loading
                 ensemble data. Can be either '16', '32' or '64'.
             disable_file_output (Union[bool, List]):
-            load_models (bool), (default=True): Whether to load the
-                models after fitting AutoPyTorch.
-            portfolio_selection (str), (default="none"): If "greedy",
-                runs initial configurations present in
+            load_models (bool), (default=True):
+                Whether to load the models after fitting AutoPyTorch.
+            portfolio_selection (str), (default="none"):
+                If "greedy", runs initial configurations present in
+                portfolio_path, if specified else, those in
                 'autoPyTorch/configs/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
-
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
         Returns:
             self
 
@@ -255,7 +258,8 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            portfolio_selection=portfolio_selection
+            portfolio_selection=portfolio_selection,
+            portfolio_path=portfolio_path
         )
 
     def predict(
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index e6f67c0b5..ff55f702f 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -126,7 +126,8 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none"
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -197,6 +198,9 @@ def search(
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
                 For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
+
         Returns:
             self
 
@@ -242,7 +246,8 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            portfolio_selection=portfolio_selection
+            portfolio_selection=portfolio_selection,
+            portfolio_path=portfolio_path
         )
 
     def predict(
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 8c00120a6..56d99f411 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -109,7 +109,8 @@ def __init__(self,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
                  search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
-                 portfolio_selection: str = "none"
+                 portfolio_selection: str = "none",
+                 portfolio_path: typing.Optional[str] = None
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -161,6 +162,8 @@ def __init__(self,
             portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/configs/greedy_portfolio.json'.
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
         """
         super(AutoMLSMBO, self).__init__()
         # data related
@@ -213,13 +216,12 @@ def __init__(self,
                                               port=self.logger_port)
         self.logger.info("initialised {}".format(self.__class__.__name__))
 
-        # read and validate initial configurations
-        initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__),
-                                                             '../configs/greedy_portfolio.json')))
-
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
-        assert portfolio_selection in ['none', 'greedy']
         if portfolio_selection == "greedy":
+            # read and validate initial configurations
+            portfolio_path = portfolio_path if portfolio_path is not None else \
+                os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
+            initial_configurations = json.load(open(portfolio_path))
             self.initial_configurations = list()
             for configuration_dict in initial_configurations:
                 try:
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 280617306..32f67048a 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -4,6 +4,7 @@
 import sys
 import unittest
 from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function
+import json
 
 import numpy as np
 
@@ -464,3 +465,50 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular):
     estimator._clean_logger()
 
     del estimator
+
+
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
+@pytest.mark.parametrize('openml_id', (40981, ))
+def test_greedy(openml_id, backend, n_samples):
+
+    # Get the data and check that contents of data-manager make sense
+    X, y = sklearn.datasets.fetch_openml(
+        data_id=int(openml_id),
+        return_X_y=True, as_frame=True
+    )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1)
+
+    include = None
+    # for python less than 3.7, learned entity embedding
+    # is not able to be stored on disk (only on CI)
+    if sys.version_info < (3, 7):
+        include = {'network_embedding': ['NoEmbedding']}
+    # Search for a good configuration
+    estimator = TabularClassificationTask(
+        backend=backend,
+        resampling_strategy=HoldoutValTypes.holdout_validation,
+        include_components=include
+    )
+
+    with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='accuracy',
+            total_walltime_limit=30,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
+            portfolio_selection="greedy"
+        )
+
+    successful_config_ids = [run_key.config_id for run_key, run_value in estimator.run_history.data.items(
+    ) if 'SUCCESS' in str(run_value.status)]
+    successful_configs = [estimator.run_history.ids_config[id].get_dictionary() for id in successful_config_ids]
+    portfolio_configs = json.load(open(os.path.join(os.path.dirname(__file__),
+                                                    "../../autoPyTorch/configs/greedy_portfolio.json")))
+    # check if any configs from greedy portfolio were compatible with australian
+    assert any(successful_config in portfolio_configs for successful_config in successful_configs)

From 2b8d25cc9c7a0da2b01d60b09e336bb516c5f440 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 19 May 2021 16:19:09 +0200
Subject: [PATCH 07/15] fix flake tests

---
 test/test_api/test_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 32f67048a..085904ff2 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -1,10 +1,10 @@
+import json
 import os
 import pathlib
 import pickle
 import sys
 import unittest
 from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function
-import json
 
 import numpy as np
 

From 7bbe60310d1e6bc09201656e3bf4f81ada94bc18 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Tue, 25 May 2021 11:22:08 +0200
Subject: [PATCH 08/15] Simplify portfolio selection

---
 autoPyTorch/api/base_task.py              | 24 ++++++++++-------------
 autoPyTorch/api/tabular_classification.py | 22 ++++++++++-----------
 autoPyTorch/api/tabular_regression.py     | 21 ++++++++++----------
 autoPyTorch/optimizer/smbo.py             | 21 +++++++++++---------
 test/test_api/test_api.py                 |  5 +++--
 5 files changed, 45 insertions(+), 48 deletions(-)

diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index afad47ca3..4be24ebb7 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -705,8 +705,7 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none",
-        portfolio_path: Optional[str] = None
+        portfolio_selection: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -777,14 +776,15 @@ def _search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            portfolio_selection (str), (default="none"): If "greedy",
-                runs initial configurations present in
-                'autoPyTorch/configs/greedy_portfolio.json'.
-                These configurations are the best performing configurations
-                when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
-            portfolio_path (Optional[str]):
-                Optional argument to specify path to a portfolio file.
+            portfolio_selection (str), (default=None):
+                This argument controls the initial configurations that
+                AutoPyTorch uses to warm start SMAC for hyperparameter
+                optimization. By default, no warm-starting happens.
+                The user can provide a path to a json file containing
+                configurations, similar to (...herepathtogreedy...).
+                Additionally, the keyword 'greedy' is supported,
+                which would use the default portfolio from
+                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -793,9 +793,6 @@ def _search(
             raise ValueError("Incompatible dataset entered for current task,"
                              "expected dataset to have task type :{} got "
                              ":{}".format(self.task_type, dataset.task_type))
-        if portfolio_selection not in ["none", "greedy"]:
-            raise ValueError("Expected portfolio_selection to be in ['none', 'greedy']"
-                             "got {}".format(portfolio_selection))
 
         # Initialise information needed for the experiment
         experiment_task_name: str = 'runSearch'
@@ -974,7 +971,6 @@ def _search(
                 start_num_run=self._backend.get_next_num_run(peek=True),
                 search_space_updates=self.search_space_updates,
                 portfolio_selection=portfolio_selection,
-                portfolio_path=portfolio_path
             )
             try:
                 run_history, self.trajectory, budget_type = \
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 68e347b55..2eb7ca47f 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -134,8 +134,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none",
-        portfolio_path: Optional[str] = None
+        portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -204,15 +203,15 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True):
                 Whether to load the models after fitting AutoPyTorch.
-            portfolio_selection (str), (default="none"):
-                If "greedy", runs initial configurations present in
-                portfolio_path, if specified else, those in
-                'autoPyTorch/configs/greedy_portfolio.json'.
-                These configurations are the best performing configurations
-                when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
-            portfolio_path (Optional[str]):
-                Optional argument to specify path to a portfolio file.
+            portfolio_selection (str), (default=None):
+                This argument controls the initial configurations that
+                AutoPyTorch uses to warm start SMAC for hyperparameter
+                optimization. By default, no warm-starting happens.
+                The user can provide a path to a json file containing
+                configurations, similar to (...herepathtogreedy...).
+                Additionally, the keyword 'greedy' is supported,
+                which would use the default portfolio from
+                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -259,7 +258,6 @@ def search(
             disable_file_output=disable_file_output,
             load_models=load_models,
             portfolio_selection=portfolio_selection,
-            portfolio_path=portfolio_path
         )
 
     def predict(
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index ff55f702f..e530db5b9 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -126,8 +126,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        portfolio_selection: str = "none",
-        portfolio_path: Optional[str] = None
+        portfolio_selection: Optional[str] = None,
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -192,14 +191,15 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            portfolio_selection (str), (default="none"): If "greedy",
-                runs initial configurations present in
-                'autoPyTorch/configs/greedy_portfolio.json'.
-                These configurations are the best performing configurations
-                when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
-            portfolio_path (Optional[str]):
-                Optional argument to specify path to a portfolio file.
+            portfolio_selection (str), (default=None):
+                This argument controls the initial configurations that
+                AutoPyTorch uses to warm start SMAC for hyperparameter
+                optimization. By default, no warm-starting happens.
+                The user can provide a path to a json file containing
+                configurations, similar to (...herepathtogreedy...).
+                Additionally, the keyword 'greedy' is supported,
+                which would use the default portfolio from
+                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
 
         Returns:
             self
@@ -247,7 +247,6 @@ def search(
             disable_file_output=disable_file_output,
             load_models=load_models,
             portfolio_selection=portfolio_selection,
-            portfolio_path=portfolio_path
         )
 
     def predict(
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 56d99f411..b30ccd455 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -109,8 +109,7 @@ def __init__(self,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
                  search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
-                 portfolio_selection: str = "none",
-                 portfolio_path: typing.Optional[str] = None
+                 portfolio_selection: typing.Optional[str] = None
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -159,11 +158,15 @@ def __init__(self,
                 Allows to create a user specified SMAC object
             ensemble_callback (typing.Optional[EnsembleBuilderManager]):
                 A callback used in this scenario to start ensemble building subtasks
-            portfolio_selection (str), (default="none"): If "greedy",
-                runs initial configurations present in
-                'autoPyTorch/configs/greedy_portfolio.json'.
-            portfolio_path (Optional[str]):
-                Optional argument to specify path to a portfolio file.
+            portfolio_selection (str), (default=None):
+                This argument controls the initial configurations that
+                AutoPyTorch uses to warm start SMAC for hyperparameter
+                optimization. By default, no warm-starting happens.
+                The user can provide a path to a json file containing
+                configurations, similar to (...herepathtogreedy...).
+                Additionally, the keyword 'greedy' is supported,
+                which would use the default portfolio from
+                `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         """
         super(AutoMLSMBO, self).__init__()
         # data related
@@ -217,9 +220,9 @@ def __init__(self,
         self.logger.info("initialised {}".format(self.__class__.__name__))
 
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
-        if portfolio_selection == "greedy":
+        if portfolio_selection is not None:
             # read and validate initial configurations
-            portfolio_path = portfolio_path if portfolio_path is not None else \
+            portfolio_path = portfolio_selection if portfolio_selection != "greedy" else \
                 os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
             initial_configurations = json.load(open(portfolio_path))
             self.initial_configurations = list()
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 085904ff2..c272ef24b 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -470,7 +470,7 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular):
 @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
                      new=dummy_eval_function)
 @pytest.mark.parametrize('openml_id', (40981, ))
-def test_greedy(openml_id, backend, n_samples):
+def test_portfolio_selection(openml_id, backend, n_samples):
 
     # Get the data and check that contents of data-manager make sense
     X, y = sklearn.datasets.fetch_openml(
@@ -502,7 +502,8 @@ def test_greedy(openml_id, backend, n_samples):
             total_walltime_limit=30,
             func_eval_time_limit_secs=5,
             enable_traditional_pipeline=False,
-            portfolio_selection="greedy"
+            portfolio_selection=os.path.join(os.path.dirname(__file__),
+                                             "../../autoPyTorch/configs/greedy_portfolio.json")
         )
 
     successful_config_ids = [run_key.config_id for run_key, run_value in estimator.run_history.data.items(

From 219b178121e0f08d1dcaa5115fad39c146ab852a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <13005107+ravinkohli@users.noreply.github.com>
Date: Wed, 26 May 2021 13:08:07 +0200
Subject: [PATCH 09/15] Update autoPyTorch/optimizer/smbo.py

Co-authored-by: Francisco Rivera Valverde <44504424+franchuterivera@users.noreply.github.com>
---
 autoPyTorch/optimizer/smbo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index b30ccd455..fa79f7197 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -163,7 +163,7 @@ def __init__(self,
                 AutoPyTorch uses to warm start SMAC for hyperparameter
                 optimization. By default, no warm-starting happens.
                 The user can provide a path to a json file containing
-                configurations, similar to (...herepathtogreedy...).
+                configurations, similar to (autoPyTorch/configs/greedy_portfolio.json).
                 Additionally, the keyword 'greedy' is supported,
                 which would use the default portfolio from
                 `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`

From 4e8b76e8b4b9ae6f01e812303706f44de385639d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 26 May 2021 14:45:08 +0200
Subject: [PATCH 10/15] Address comments from fransisco, path exception
 handling and test

---
 autoPyTorch/optimizer/smbo.py |  7 +++++-
 test/test_api/test_api.py     | 41 +++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index fa79f7197..ced47182a 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -224,7 +224,12 @@ def __init__(self,
             # read and validate initial configurations
             portfolio_path = portfolio_selection if portfolio_selection != "greedy" else \
                 os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
-            initial_configurations = json.load(open(portfolio_path))
+            try:
+                initial_configurations = json.load(open(portfolio_path))
+            except FileNotFoundError as e:
+                raise FileNotFoundError("The path: {} provided for 'portfolio_selection' for "
+                                        "the file containing the portfolio configurations "
+                                        "does not exist. Please provide a valid path".format(portfolio_path))
             self.initial_configurations = list()
             for configuration_dict in initial_configurations:
                 try:
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index c272ef24b..9f9d9f765 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -513,3 +513,44 @@ def test_portfolio_selection(openml_id, backend, n_samples):
                                                     "../../autoPyTorch/configs/greedy_portfolio.json")))
     # check if any configs from greedy portfolio were compatible with australian
     assert any(successful_config in portfolio_configs for successful_config in successful_configs)
+
+
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
+@pytest.mark.parametrize('openml_id', (40981, ))
+def test_portfolio_selection_failure(openml_id, backend, n_samples):
+
+    # Get the data and check that contents of data-manager make sense
+    X, y = sklearn.datasets.fetch_openml(
+        data_id=int(openml_id),
+        return_X_y=True, as_frame=True
+    )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1)
+
+    include = None
+    # for python less than 3.7, learned entity embedding
+    # is not able to be stored on disk (only on CI)
+    if sys.version_info < (3, 7):
+        include = {'network_embedding': ['NoEmbedding']}
+    # Search for a good configuration
+    estimator = TabularClassificationTask(
+        backend=backend,
+        resampling_strategy=HoldoutValTypes.holdout_validation,
+        include_components=include
+    )
+
+    with pytest.raises(FileNotFoundError, match=r"The path: .+? provided for 'portfolio_selection' "
+                                                r"for the file containing the portfolio configurations "
+                                                r"does not exist\. Please provide a valid path"):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='accuracy',
+            total_walltime_limit=30,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
+            portfolio_selection="random_path_to_test.json"
+        )

From a1001f3206974f2db3d5a925e2b2494e9a1246b4 Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Wed, 26 May 2021 14:45:45 +0200
Subject: [PATCH 11/15] fix flake

---
 autoPyTorch/optimizer/smbo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index ced47182a..6ff34f995 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -226,7 +226,7 @@ def __init__(self,
                 os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
             try:
                 initial_configurations = json.load(open(portfolio_path))
-            except FileNotFoundError as e:
+            except FileNotFoundError:
                 raise FileNotFoundError("The path: {} provided for 'portfolio_selection' for "
                                         "the file containing the portfolio configurations "
                                         "does not exist. Please provide a valid path".format(portfolio_path))

From af7e48aa9d6b3f49489431210dc6d356b37d750b Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 28 May 2021 11:38:07 +0200
Subject: [PATCH 12/15] Address comments from shuhei

---
 autoPyTorch/optimizer/smbo.py  | 23 +++--------------------
 autoPyTorch/optimizer/utils.py | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 20 deletions(-)
 create mode 100644 autoPyTorch/optimizer/utils.py

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
index 6ff34f995..2ae894e8b 100644
--- a/autoPyTorch/optimizer/smbo.py
+++ b/autoPyTorch/optimizer/smbo.py
@@ -1,7 +1,6 @@
 import copy
 import json
 import logging.handlers
-import os
 import typing
 
 import ConfigSpace
@@ -27,6 +26,7 @@
 )
 from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
 from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.optimizer.utils import read_return_initial_configurations
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 from autoPyTorch.utils.logging_ import get_named_client_logger
@@ -221,25 +221,8 @@ def __init__(self,
 
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
         if portfolio_selection is not None:
-            # read and validate initial configurations
-            portfolio_path = portfolio_selection if portfolio_selection != "greedy" else \
-                os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
-            try:
-                initial_configurations = json.load(open(portfolio_path))
-            except FileNotFoundError:
-                raise FileNotFoundError("The path: {} provided for 'portfolio_selection' for "
-                                        "the file containing the portfolio configurations "
-                                        "does not exist. Please provide a valid path".format(portfolio_path))
-            self.initial_configurations = list()
-            for configuration_dict in initial_configurations:
-                try:
-                    configuration = Configuration(self.config_space, configuration_dict)
-                    self.initial_configurations.append(configuration)
-                except Exception as e:
-                    self.logger.warning(f"Failed to convert {configuration_dict} into"
-                                        f" a Configuration with error {e}. "
-                                        f"Therefore, it can't be used as an initial "
-                                        f"configuration as it does not match the current config space. ")
+            self.initial_configurations = read_return_initial_configurations(config_space=config_space,
+                                                                             portfolio_selection=portfolio_selection)
 
     def reset_data_manager(self) -> None:
         if self.datamanager is not None:
diff --git a/autoPyTorch/optimizer/utils.py b/autoPyTorch/optimizer/utils.py
new file mode 100644
index 000000000..6fb9d5024
--- /dev/null
+++ b/autoPyTorch/optimizer/utils.py
@@ -0,0 +1,33 @@
+import json
+import os
+import warnings
+from typing import Any, Dict, List
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+
+def read_return_initial_configurations(
+    config_space: ConfigurationSpace,
+    portfolio_selection: str
+) -> List[Configuration]:
+
+    # read and validate initial configurations
+    portfolio_path = portfolio_selection if portfolio_selection != "greedy" else \
+        os.path.join(os.path.dirname(__file__), '../configs/greedy_portfolio.json')
+    try:
+        initial_configurations_dict: List[Dict[str, Any]] = json.load(open(portfolio_path))
+    except FileNotFoundError:
+        raise FileNotFoundError("The path: {} provided for 'portfolio_selection' for "
+                                "the file containing the portfolio configurations "
+                                "does not exist. Please provide a valid path".format(portfolio_path))
+    initial_configurations: List[Configuration] = list()
+    for configuration_dict in initial_configurations_dict:
+        try:
+            configuration = Configuration(config_space, configuration_dict)
+            initial_configurations.append(configuration)
+        except Exception as e:
+            warnings.warn(f"Failed to convert {configuration_dict} into"
+                          f" a Configuration with error {e}. "
+                          f"Therefore, it can't be used as an initial "
+                          f"configuration as it does not match the current config space. ")
+    return initial_configurations

From 90a4a2845a5bfaee90527557a1ffe55275e5f70d Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 28 May 2021 11:44:49 +0200
Subject: [PATCH 13/15] fix bug in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 74c730b07..a8522a8dc 100755
--- a/setup.py
+++ b/setup.py
@@ -60,5 +60,5 @@
     },
     test_suite="pytest",
     data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json']),
-                ('portfolio', ['autoPyTorch/optimizer/greedy_portfolio.json'])]
+                ('portfolio', ['autoPyTorch/configs/greedy_portfolio.json'])]
 )

From 23ed56c29c7981cceda7a0c52790cceb3248c68a Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 28 May 2021 12:35:21 +0200
Subject: [PATCH 14/15] fix tests in base trainer evaluate, increase n samples
 and add seed

---
 test/test_pipeline/components/training/base.py          | 2 ++
 test/test_pipeline/components/training/test_training.py | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index d7cb2ebd1..38b6b5007 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -23,6 +23,8 @@ def prepare_trainer(self,
                         trainer: BaseTrainerComponent,
                         task_type: int,
                         epochs=50):
+        torch.manual_seed(1)
+
         if task_type in CLASSIFICATION_TASKS:
             X, y = make_classification(
                 n_samples=n_samples,
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 98ea47716..0310a256a 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -33,6 +33,7 @@
 
 
 OVERFIT_EPOCHS = 1000
+N_SAMPLES = 500
 
 
 class BaseDataLoaderTest(unittest.TestCase):
@@ -162,14 +163,14 @@ def test_evaluate(self, n_samples):
 
 
 class StandardTrainerTest(BaseTraining):
-    def test_regression_epoch_training(self, n_samples):
+    def test_regression_epoch_training(self):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(n_samples,
+         logger) = self.prepare_trainer(N_SAMPLES,
                                         StandardTrainer(),
                                         constants.TABULAR_REGRESSION,
                                         OVERFIT_EPOCHS)

From 6f3951b77e0875b4344368e0174127fd9d50d94b Mon Sep 17 00:00:00 2001
From: Ravin Kohli <kohliravin7@gmail.com>
Date: Fri, 28 May 2021 15:19:01 +0200
Subject: [PATCH 15/15] fix tests in base trainer evaluate, increase n samples
 (fix)

---
 test/test_pipeline/components/training/test_training.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 0310a256a..36670e325 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -128,7 +128,7 @@ def test_fit_transform(self):
 
 
 class TestBaseTrainerComponent(BaseTraining):
-    def test_evaluate(self, n_samples):
+    def test_evaluate(self):
         """
         Makes sure we properly evaluate data, returning a proper loss
         and metric
@@ -140,7 +140,7 @@ def test_evaluate(self, n_samples):
          loader,
          criterion,
          epochs,
-         logger) = self.prepare_trainer(n_samples,
+         logger) = self.prepare_trainer(N_SAMPLES,
                                         BaseTrainerComponent(),
                                         constants.TABULAR_CLASSIFICATION)
 
@@ -163,14 +163,14 @@ def test_evaluate(self, n_samples):
 
 
 class StandardTrainerTest(BaseTraining):
-    def test_regression_epoch_training(self):
+    def test_regression_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(N_SAMPLES,
+         logger) = self.prepare_trainer(n_samples,
                                         StandardTrainer(),
                                         constants.TABULAR_REGRESSION,
                                         OVERFIT_EPOCHS)