diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
deleted file mode 100644
index 538b44edd..000000000
--- a/.github/workflows/examples.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: Examples
-
-on: [push, pull_request]
-
-jobs:
-  ubuntu:
-
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: [3.8]
-      fail-fast:  false
-      max-parallel: 2
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install test dependencies
-      run: |
-        git submodule update --init --recursive
-        python -m pip install --upgrade pip
-        pip install -e .[examples]
-        which python
-        pip freeze
-    - name: Store repository status
-      id: status-before
-      run: |
-        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
-    - name: Run tests
-      run: |
-        python examples/20_basics/example_image_classification.py
-        python examples/20_basics/example_tabular_classification.py
-        python examples/20_basics/example_tabular_regression.py
-        python examples/40_advanced/example_custom_configuration_space.py
-        python examples/40_advanced/example_resampling_strategy.py
-        python examples/40_advanced/example_visualization.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
index 2a4737c4d..a03a35331 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/KernelPCA.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
-                 ) -> None:
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
         self.gamma = gamma
         self.coef0 = coef0
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
         self.add_fit_requirements([
             FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
index 0a8f6c63d..d00697c21 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/Nystroem.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.conditions import EqualsCondition, InCondition
 from ConfigSpace.configuration_space import ConfigurationSpace
@@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 10,
                  kernel: str = 'rbf', degree: int = 3,
                  gamma: float = 0.01, coef0: float = 0.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
-                 ) -> None:
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
         self.n_components = n_components
         self.kernel = kernel
         self.degree = degree
         self.gamma = gamma
         self.coef0 = coef0
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
index a41c0a26d..b64b32eb6 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PolynomialFeatures.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -19,13 +19,12 @@
 class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, degree: int = 2, interaction_only: bool = False,
                  include_bias: bool = False,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.degree = degree
         self.interaction_only = interaction_only
         self.include_bias = include_bias
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
index 767a0f6c1..af187c50d 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/PowerTransformer.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -17,11 +17,10 @@
 
 class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, standardize: bool = True,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.standardize = standardize
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
index 9dbf26cbc..a3267391a 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/RandomKitchenSinks.py
@@ -1,5 +1,5 @@
 from math import ceil, floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -20,12 +20,11 @@
 class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, n_components: int = 100,
                  gamma: float = 1.0,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None
-                 ) -> None:
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
         self.n_components = n_components
         self.gamma = gamma
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
index bfe4568b3..69410d32f 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/TruncatedSVD.py
@@ -1,5 +1,5 @@
 from math import floor
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import (
@@ -18,11 +18,10 @@
 
 class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
     def __init__(self, target_dim: int = 128,
-                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+                 random_state: Optional[np.random.RandomState] = None):
         self.target_dim = target_dim
 
-        self.random_state = random_state
-        super().__init__()
+        super().__init__(random_state=random_state)
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
index 8c85bbf30..d11f69b90 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/feature_preprocessing/base_feature_preprocessor.py
@@ -1,4 +1,8 @@
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from sklearn.utils import check_random_state
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
     autoPyTorchTabularPreprocessingComponent
@@ -8,7 +12,13 @@
 class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
     _required_properties: List[str] = ['handles_sparse']
 
-    def __init__(self) -> None:
+    def __init__(self, random_state: Optional[np.random.RandomState] = None):
+        if random_state is None:
+            # A trainer components need a random state for
+            # sampling -- for example in MixUp training
+            self.random_state = check_random_state(1)
+        else:
+            self.random_state = random_state
         super().__init__()
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/test/conftest.py b/test/conftest.py
index 592d41165..cdaf53703 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -25,6 +25,9 @@
 from autoPyTorch.utils.pipeline import get_dataset_requirements
 
 
+N_SAMPLES = 200
+
+
 @pytest.fixture(scope="session")
 def callattr_ahead_of_alltests(request):
     """
@@ -191,7 +194,7 @@ def session_run_at_end():
 def get_tabular_data(task):
     if task == "classification_numerical_only":
         X, y = make_classification(
-            n_samples=200,
+            n_samples=N_SAMPLES,
             n_features=4,
             n_informative=3,
             n_redundant=1,
@@ -207,18 +210,18 @@ def get_tabular_data(task):
         X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
         categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
         X = X[categorical_columns]
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
 
     elif task == "classification_numerical_and_categorical":
         X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())
 
     elif task == "regression_numerical_only":
-        X, y = make_regression(n_samples=200,
+        X, y = make_regression(n_samples=N_SAMPLES,
                                n_features=4,
                                n_informative=3,
                                n_targets=1,
@@ -240,8 +243,8 @@ def get_tabular_data(task):
             else:
                 X[column] = X[column].fillna(0)
 
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         y = (y - y.mean()) / y.std()
         validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
 
@@ -256,8 +259,8 @@ def get_tabular_data(task):
             else:
                 X[column] = X[column].fillna(0)
 
-        X = X.iloc[0:200]
-        y = y.iloc[0:200]
+        X = X.iloc[0:N_SAMPLES]
+        y = y.iloc[0:N_SAMPLES]
         y = (y - y.mean()) / y.std()
         validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
     elif task == 'iris':
@@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
         'num_run': np.random.randint(50),
         'device': 'cpu',
         'budget_type': 'epochs',
-        'epochs': 100,
+        'epochs': 5,
         'torch_num_threads': 1,
         'early_stopping': 10,
         'working_dir': '/tmp',
@@ -326,7 +329,7 @@ def dataset(request):
 @pytest.fixture
 def dataset_traditional_classifier_num_only():
     X, y = make_classification(
-        n_samples=200,
+        n_samples=N_SAMPLES,
         n_features=4,
         n_informative=3,
         n_redundant=1,
@@ -344,7 +347,7 @@ def dataset_traditional_classifier_categorical_only():
     X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
     categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
     X = X[categorical_columns]
-    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
     return X, y
 
 
@@ -352,7 +355,7 @@ def dataset_traditional_classifier_categorical_only():
 def dataset_traditional_classifier_num_categorical():
     X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
     y = y.astype(np.int)
-    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
     return X, y
 
 
@@ -456,3 +459,8 @@ def loss_mse():
 @pytest.fixture
 def loss_details(request):
     return request.getfixturevalue(request.param)
+
+
+@pytest.fixture
+def n_samples():
+    return N_SAMPLES
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
index 62d220bfd..280617306 100644
--- a/test/test_api/test_api.py
+++ b/test/test_api/test_api.py
@@ -3,6 +3,7 @@
 import pickle
 import sys
 import unittest
+from test.test_api.utils import dummy_do_dummy_prediction, dummy_eval_function
 
 import numpy as np
 
@@ -18,8 +19,6 @@
 
 from smac.runhistory.runhistory import RunHistory
 
-import torch
-
 from autoPyTorch.api.tabular_classification import TabularClassificationTask
 from autoPyTorch.api.tabular_regression import TabularRegressionTask
 from autoPyTorch.datasets.resampling_strategy import (
@@ -30,23 +29,29 @@
 from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
 
 
-# Fixtures
-# ========
+CV_NUM_SPLITS = 2
+HOLDOUT_NUM_SPLITS = 1
 
 
+# ====
 # Test
-# ========
+# ====
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
 @pytest.mark.parametrize('openml_id', (40981, ))
-@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation,
-                                                 CrossValTypes.k_fold_cross_validation,
-                                                 ))
-def test_tabular_classification(openml_id, resampling_strategy, backend):
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, None),
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS})
+                          ))
+def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
     # Get the data and check that contents of data-manager make sense
     X, y = sklearn.datasets.fetch_openml(
         data_id=int(openml_id),
         return_X_y=True, as_frame=True
     )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
     X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
         X, y, random_state=1)
 
@@ -59,21 +64,24 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
     estimator = TabularClassificationTask(
         backend=backend,
         resampling_strategy=resampling_strategy,
+        resampling_strategy_args=resampling_strategy_args,
         include_components=include
     )
 
-    estimator.search(
-        X_train=X_train, y_train=y_train,
-        X_test=X_test, y_test=y_test,
-        optimize_metric='accuracy',
-        total_walltime_limit=150,
-        func_eval_time_limit_secs=50,
-        enable_traditional_pipeline=False,
-    )
+    with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='accuracy',
+            total_walltime_limit=30,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
+        )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_classification'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5
+    expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \
+        else CV_NUM_SPLITS
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -134,7 +142,6 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
-        assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
@@ -145,9 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingClassifier)
-        assert len(model.estimators_) == 5
-        assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
-                          torch.nn.Module)
+        assert len(model.estimators_) == CV_NUM_SPLITS
     else:
         pytest.fail(resampling_strategy)
 
@@ -200,10 +205,13 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
 
 
 @pytest.mark.parametrize('openml_name', ("boston", ))
-@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation,
-                                                 CrossValTypes.k_fold_cross_validation,
-                                                 ))
-def test_tabular_regression(openml_name, resampling_strategy, backend):
+@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function',
+                     new=dummy_eval_function)
+@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args',
+                         ((HoldoutValTypes.holdout_validation, None),
+                          (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS})
+                          ))
+def test_tabular_regression(openml_name, resampling_strategy, backend, resampling_strategy_args, n_samples):
 
     # Get the data and check that contents of data-manager make sense
     X, y = sklearn.datasets.fetch_openml(
@@ -211,6 +219,8 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         return_X_y=True,
         as_frame=True
     )
+    X, y = X.iloc[:n_samples], y.iloc[:n_samples]
+
     # normalize values
     y = (y - y.mean()) / y.std()
 
@@ -234,21 +244,24 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
     estimator = TabularRegressionTask(
         backend=backend,
         resampling_strategy=resampling_strategy,
+        resampling_strategy_args=resampling_strategy_args,
         include_components=include
     )
 
-    estimator.search(
-        X_train=X_train, y_train=y_train,
-        X_test=X_test, y_test=y_test,
-        optimize_metric='r2',
-        total_walltime_limit=100,
-        func_eval_time_limit_secs=10,
-        enable_traditional_pipeline=False,
-    )
+    with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction):
+        estimator.search(
+            X_train=X_train, y_train=y_train,
+            X_test=X_test, y_test=y_test,
+            optimize_metric='r2',
+            total_walltime_limit=30,
+            func_eval_time_limit_secs=5,
+            enable_traditional_pipeline=False,
+        )
 
     # Internal dataset has expected settings
     assert estimator.dataset.task_type == 'tabular_regression'
-    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 5
+    expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation\
+        else CV_NUM_SPLITS
     assert estimator.resampling_strategy == resampling_strategy
     assert estimator.dataset.resampling_strategy == resampling_strategy
     assert len(estimator.dataset.splits) == expected_num_splits
@@ -305,7 +318,6 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         assert os.path.exists(model_file), model_file
         model = estimator._backend.load_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
-        assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module)
     elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
         model_file = os.path.join(
             run_key_model_run_dir,
@@ -315,9 +327,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
         model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
             estimator.seed, successful_num_run, run_key.budget)
         assert isinstance(model, VotingRegressor)
-        assert len(model.estimators_) == 5
-        assert isinstance(model.estimators_[0].named_steps['network'].get_network(),
-                          torch.nn.Module)
+        assert len(model.estimators_) == CV_NUM_SPLITS
     else:
         pytest.fail(resampling_strategy)
 
diff --git a/test/test_api/utils.py b/test/test_api/utils.py
new file mode 100644
index 000000000..0fa84b55b
--- /dev/null
+++ b/test/test_api/utils.py
@@ -0,0 +1,98 @@
+from autoPyTorch.constants import REGRESSION_TASKS
+from autoPyTorch.evaluation.abstract_evaluator import (
+    DummyClassificationPipeline,
+    DummyRegressionPipeline,
+    fit_and_suppress_warnings
+)
+from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
+
+
+# ========
+# Fixtures
+# ========
+class DummyTrainEvaluator(TrainEvaluator):
+
+    def _fit_and_predict(self, pipeline, fold: int, train_indices,
+                         test_indices,
+                         add_pipeline_to_self
+                         ):
+
+        if self.task_type in REGRESSION_TASKS:
+            pipeline = DummyRegressionPipeline(config=1)
+        else:
+            pipeline = DummyClassificationPipeline(config=1)
+
+        self.indices[fold] = ((train_indices, test_indices))
+
+        X = {'train_indices': train_indices,
+             'val_indices': test_indices,
+             'split_id': fold,
+             'num_run': self.num_run,
+             **self.fit_dictionary}  # fit dictionary
+        y = None
+        fit_and_suppress_warnings(self.logger, pipeline, X, y)
+        self.logger.info("Model fitted, now predicting")
+        (
+            Y_train_pred,
+            Y_opt_pred,
+            Y_valid_pred,
+            Y_test_pred
+        ) = self._predict(
+            pipeline,
+            train_indices=train_indices,
+            test_indices=test_indices,
+        )
+
+        if add_pipeline_to_self:
+            self.pipeline = pipeline
+        else:
+            self.pipelines[fold] = pipeline
+
+        return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred
+
+
+# create closure for evaluating an algorithm
+def dummy_eval_function(
+        backend,
+        queue,
+        metric,
+        budget: float,
+        config,
+        seed: int,
+        output_y_hat_optimization: bool,
+        num_run: int,
+        include,
+        exclude,
+        disable_file_output,
+        pipeline_config=None,
+        budget_type=None,
+        init_params=None,
+        logger_port=None,
+        all_supported_metrics=True,
+        search_space_updates=None,
+        instance: str = None,
+) -> None:
+    evaluator = DummyTrainEvaluator(
+        backend=backend,
+        queue=queue,
+        metric=metric,
+        configuration=config,
+        seed=seed,
+        num_run=num_run,
+        output_y_hat_optimization=output_y_hat_optimization,
+        include=include,
+        exclude=exclude,
+        disable_file_output=disable_file_output,
+        init_params=init_params,
+        budget=budget,
+        budget_type=budget_type,
+        logger_port=logger_port,
+        all_supported_metrics=all_supported_metrics,
+        pipeline_config=pipeline_config,
+        search_space_updates=search_space_updates
+    )
+    evaluator.fit_predict_and_loss()
+
+
+def dummy_do_dummy_prediction():
+    return
diff --git a/test/test_ensemble/test_ensemble.py b/test/test_ensemble/test_ensemble.py
index e0e4c3bb1..cd0f02e72 100644
--- a/test/test_ensemble/test_ensemble.py
+++ b/test/test_ensemble/test_ensemble.py
@@ -690,6 +690,8 @@ def test_ensemble_builder_process_realrun(dask_client, ensemble_backend):
 
 @flaky(max_runs=3)
 @unittest.mock.patch('autoPyTorch.ensemble.ensemble_builder.EnsembleBuilder.fit_ensemble')
+@pytest.mark.skipif(sys.version_info >= (3, 7),
+                    reason="Causes out-of-memory Errors in CI")
 def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_client):
     """
     Makes sure ensemble builder returns the size of the ensemble that pynisher allowed
diff --git a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
index a5c342804..822112fca 100644
--- a/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
+++ b/test/test_pipeline/components/preprocessing/test_feature_preprocessor.py
@@ -14,6 +14,11 @@
 from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
 
 
+@pytest.fixture
+def random_state():
+    return 11
+
+
 @pytest.fixture(params=['TruncatedSVD', 'PolynomialFeatures', 'PowerTransformer',
                         'Nystroem', 'KernelPCA', 'RandomKitchenSinks'])
 def preprocessor(request):
@@ -24,10 +29,10 @@ def preprocessor(request):
                                                     'classification_numerical_and_categorical'], indirect=True)
 class TestFeaturePreprocessors:
 
-    def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor):
+    def test_feature_preprocessor(self, fit_dictionary_tabular, preprocessor, random_state):
         preprocessor = FeatureProprocessorChoice(
             dataset_properties=fit_dictionary_tabular['dataset_properties']
-        ).get_components()[preprocessor]()
+        ).get_components()[preprocessor](random_state=random_state)
         configuration = preprocessor. \
             get_hyperparameter_search_space(dataset_properties=fit_dictionary_tabular["dataset_properties"]) \
             .get_default_configuration().get_dictionary()
diff --git a/test/test_pipeline/components/setup/test_setup_networks.py b/test/test_pipeline/components/setup/test_setup_networks.py
index 6826d7ef2..93ded0102 100644
--- a/test/test_pipeline/components/setup/test_setup_networks.py
+++ b/test/test_pipeline/components/setup/test_setup_networks.py
@@ -31,6 +31,9 @@ def test_pipeline_fit(self, fit_dictionary_tabular, embedding, backbone, head):
         """This test makes sure that the pipeline is able to fit
         every combination of network embedding, backbone, head"""
 
+        # increase number of epochs to test for performance
+        fit_dictionary_tabular['epochs'] = 50
+
         include = {'network_backbone': [backbone], 'network_head': [head], 'network_embedding': [embedding]}
 
         if len(fit_dictionary_tabular['dataset_properties']
diff --git a/test/test_pipeline/components/training/base.py b/test/test_pipeline/components/training/base.py
index 10d9ea416..d7cb2ebd1 100644
--- a/test/test_pipeline/components/training/base.py
+++ b/test/test_pipeline/components/training/base.py
@@ -1,5 +1,4 @@
 import logging
-import unittest
 
 from sklearn.datasets import make_classification, make_regression
 
@@ -17,14 +16,16 @@
 from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent, BudgetTracker
 
 
-class BaseTraining(unittest.TestCase):
+class BaseTraining:
 
     def prepare_trainer(self,
+                        n_samples: int,
                         trainer: BaseTrainerComponent,
-                        task_type: int):
+                        task_type: int,
+                        epochs=50):
         if task_type in CLASSIFICATION_TASKS:
             X, y = make_classification(
-                n_samples=5000,
+                n_samples=n_samples,
                 n_features=4,
                 n_informative=3,
                 n_redundant=1,
@@ -42,7 +43,7 @@ def prepare_trainer(self,
 
         elif task_type in REGRESSION_TASKS:
             X, y = make_regression(
-                n_samples=5000,
+                n_samples=n_samples,
                 n_features=4,
                 n_informative=3,
                 n_targets=1,
@@ -78,7 +79,7 @@ def prepare_trainer(self,
         device = torch.device('cpu')
         logger = logging.getLogger('StandardTrainer - test')
         metrics = get_metrics(dataset_properties)
-        epochs = 1000
+        epochs = epochs
         budget_tracker = BudgetTracker(
             budget_type='epochs',
             max_epochs=epochs,
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index d6964fa14..98ea47716 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+import pytest
+
 from sklearn.base import clone
 
 import torch
@@ -30,6 +32,9 @@
 from test.test_pipeline.components.training.base import BaseTraining  # noqa (E402: module level import not at top of file)
 
 
+OVERFIT_EPOCHS = 1000
+
+
 class BaseDataLoaderTest(unittest.TestCase):
     def test_get_set_config_space(self):
         """
@@ -121,9 +126,8 @@ def test_fit_transform(self):
                          loader.val_data_loader)
 
 
-class BaseTrainerComponentTest(BaseTraining, unittest.TestCase):
-
-    def test_evaluate(self):
+class TestBaseTrainerComponent(BaseTraining):
+    def test_evaluate(self, n_samples):
         """
         Makes sure we properly evaluate data, returning a proper loss
         and metric
@@ -135,11 +139,12 @@ def test_evaluate(self):
          loader,
          criterion,
          epochs,
-         logger) = self.prepare_trainer(BaseTrainerComponent(),
+         logger) = self.prepare_trainer(n_samples,
+                                        BaseTrainerComponent(),
                                         constants.TABULAR_CLASSIFICATION)
 
         prev_loss, prev_metrics = trainer.evaluate(loader, epoch=1, writer=None)
-        self.assertIn('accuracy', prev_metrics)
+        assert 'accuracy' in prev_metrics
 
         # Fit the model
         self.train_model(model,
@@ -151,21 +156,23 @@ def test_evaluate(self):
         # Loss and metrics should have improved after fit
         # And the prediction should be better than random
         loss, metrics = trainer.evaluate(loader, epoch=1, writer=None)
-        self.assertGreater(prev_loss, loss)
-        self.assertGreater(metrics['accuracy'], prev_metrics['accuracy'])
-        self.assertGreater(metrics['accuracy'], 0.5)
+        assert prev_loss > loss
+        assert metrics['accuracy'] > prev_metrics['accuracy']
+        assert metrics['accuracy'] > 0.5
 
 
-class StandardTrainerTest(BaseTraining, unittest.TestCase):
-    def test_regression_epoch_training(self):
+class StandardTrainerTest(BaseTraining):
+    def test_regression_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(StandardTrainer(),
-                                        constants.TABULAR_REGRESSION)
+         logger) = self.prepare_trainer(n_samples,
+                                        StandardTrainer(),
+                                        constants.TABULAR_REGRESSION,
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
@@ -176,17 +183,19 @@ def test_regression_epoch_training(self):
             r2 = metrics['r2']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy regression under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy regression under {epochs} epochs")
 
-    def test_classification_epoch_training(self):
+    def test_classification_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(StandardTrainer(),
-                                        constants.TABULAR_CLASSIFICATION)
+         logger) = self.prepare_trainer(n_samples,
+                                        StandardTrainer(),
+                                        constants.TABULAR_CLASSIFICATION,
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
@@ -197,19 +206,21 @@ def test_classification_epoch_training(self):
             accuracy = metrics['accuracy']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy classification under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs")
 
 
-class MixUpTrainerTest(BaseTraining, unittest.TestCase):
-    def test_classification_epoch_training(self):
+class MixUpTrainerTest(BaseTraining):
+    def test_classification_epoch_training(self, n_samples):
         (trainer,
          _,
          _,
          loader,
          _,
          epochs,
-         logger) = self.prepare_trainer(MixUpTrainer(alpha=0.5),
-                                        constants.TABULAR_CLASSIFICATION)
+         logger) = self.prepare_trainer(n_samples,
+                                        MixUpTrainer(alpha=0.5),
+                                        constants.TABULAR_CLASSIFICATION,
+                                        OVERFIT_EPOCHS)
 
         # Train the model
         counter = 0
@@ -220,7 +231,7 @@ def test_classification_epoch_training(self):
             accuracy = metrics['accuracy']
 
             if counter > epochs:
-                self.fail(f"Could not overfit a dummy classification under {epochs} epochs")
+                pytest.fail(f"Could not overfit a dummy classification under {epochs} epochs")
 
 
 class TrainerTest(unittest.TestCase):
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
index 9497c8457..c90eb2a04 100644
--- a/test/test_pipeline/test_tabular_classification.py
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -1,5 +1,6 @@
 import os
 import re
+import unittest
 
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -7,8 +8,6 @@
     UniformIntegerHyperparameter,
 )
 
-import flaky
-
 import numpy as np
 
 import pytest
@@ -53,7 +52,6 @@ def _assert_pipeline_search_space(self, pipeline, search_space_updates):
             elif isinstance(hyperparameter, CategoricalHyperparameter):
                 assert update.value_range == hyperparameter.choices
 
-    @flaky.flaky(max_runs=2)
     def test_pipeline_fit(self, fit_dictionary_tabular):
         """This test makes sure that the pipeline is able to fit
         given random combinations of hyperparameters across the pipeline"""
@@ -81,7 +79,6 @@ def test_pipeline_fit(self, fit_dictionary_tabular):
         # Make sure a network was fit
         assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
 
-    @flaky.flaky(max_runs=3)
     def test_pipeline_predict(self, fit_dictionary_tabular):
         """This test makes sure that the pipeline is able to predict
         given a random configuration"""
@@ -93,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
         # we expect the output to have the same batch size as the test input,
         # and number of outputs per batch sample equal to the number of outputs
@@ -117,7 +117,10 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular):
         pipeline.set_hyperparameters(config)
 
         try:
-            pipeline.fit(fit_dictionary_tabular)
+            with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+                 as patch_train:
+                patch_train.return_value = 1, {}
+                pipeline.fit(fit_dictionary_tabular)
         except Exception as e:
             pytest.fail(f"Failed on config={config} with {e}")
 
@@ -129,7 +132,6 @@ def test_pipeline_predict_proba(self, fit_dictionary_tabular):
         assert isinstance(prediction, np.ndarray)
         assert prediction.shape == expected_output_shape
 
-    @flaky.flaky(max_runs=2)
     def test_pipeline_transform(self, fit_dictionary_tabular):
         """
         In the context of autopytorch, transform expands a fit dictionary with
@@ -144,8 +146,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        # We do not want to make the same early preprocessing operation to the fit dictionary
-        pipeline.fit(fit_dictionary_tabular.copy())
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            # We do not want to make the same early preprocessing operation to the fit dictionary
+            pipeline.fit(fit_dictionary_tabular.copy())
 
         transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular)
 
@@ -173,8 +178,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess
 
         pipeline = TabularClassificationPipeline(
             dataset_properties=fit_dictionary_tabular['dataset_properties'])
-
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
     def test_remove_key_check_requirements(self, fit_dictionary_tabular):
         """Makes sure that when a key is removed from X, correct error is outputted"""
@@ -377,6 +384,8 @@ def test_constant_pipeline_iris(fit_dictionary_tabular):
                                              search_space_updates=search_space_updates)
 
     fit_dictionary_tabular['additional_metrics'] = ['balanced_accuracy']
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular['epochs'] = 50
 
     try:
         pipeline.fit(fit_dictionary_tabular)
@@ -422,6 +431,10 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
     given the default configuration"""
     X = fit_dictionary_tabular_dummy['X_train'].copy()
     y = fit_dictionary_tabular_dummy['y_train'].copy()
+
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular_dummy['epochs'] = 50
+
     pipeline = TabularClassificationPipeline(
         dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'])
 
@@ -431,6 +444,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     pipeline.fit(fit_dictionary_tabular_dummy)
 
+    # Ensure that the network is an instance of torch Module
+    assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
+
     # we expect the output to have the same batch size as the test input,
     # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties)
     expected_output_shape = (X.shape[0],
diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py
index 0215f996f..3df3c6c41 100644
--- a/test/test_pipeline/test_tabular_regression.py
+++ b/test/test_pipeline/test_tabular_regression.py
@@ -1,5 +1,6 @@
 import os
 import re
+import unittest
 
 from ConfigSpace.hyperparameters import (
     CategoricalHyperparameter,
@@ -18,7 +19,6 @@
 from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
 from autoPyTorch.utils.common import FitRequirement
 from autoPyTorch.utils.hyperparameter_search_space_update import (
-    HyperparameterSearchSpaceUpdate,
     HyperparameterSearchSpaceUpdates,
     parse_hyperparameter_search_space_updates
 )
@@ -90,7 +90,10 @@ def test_pipeline_predict(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
         # we expect the output to have the same batch size as the test input,
         # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties)
@@ -114,8 +117,11 @@ def test_pipeline_transform(self, fit_dictionary_tabular):
         config = cs.sample_configuration()
         pipeline.set_hyperparameters(config)
 
-        # We do not want to make the same early preprocessing operation to the fit dictionary
-        pipeline.fit(fit_dictionary_tabular.copy())
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+             as patch_train:
+            patch_train.return_value = 1, {}
+            # We do not want to make the same early preprocessing operation to the fit dictionary
+            pipeline.fit(fit_dictionary_tabular.copy())
 
         transformed_fit_dictionary_tabular = pipeline.transform(fit_dictionary_tabular)
 
@@ -144,7 +150,10 @@ def test_default_configuration(self, fit_dictionary_tabular, is_small_preprocess
         pipeline = TabularRegressionPipeline(
             dataset_properties=fit_dictionary_tabular['dataset_properties'])
 
-        pipeline.fit(fit_dictionary_tabular)
+        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
+                as patch_train:
+            patch_train.return_value = 1, {}
+            pipeline.fit(fit_dictionary_tabular)
 
     def test_remove_key_check_requirements(self, fit_dictionary_tabular):
         """Makes sure that when a key is removed from X, correct error is outputted"""
@@ -279,23 +288,18 @@ def test_set_range_search_space_updates(self, fit_dictionary_tabular):
             assert 'fully_connected:units_layer' in e.args[0]
 
 
-@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ["regression"], indirect=True)
+@pytest.mark.parametrize("fit_dictionary_tabular_dummy", ['regression'], indirect=True)
 def test_pipeline_score(fit_dictionary_tabular_dummy):
     """This test makes sure that the pipeline is able to achieve a decent score on dummy data
     given the default configuration"""
+    # increase number of epochs to test for performance
+    fit_dictionary_tabular_dummy['epochs'] = 50
+
     X = fit_dictionary_tabular_dummy['X_train'].copy()
     y = fit_dictionary_tabular_dummy['y_train'].copy()
 
-    # lower the learning rate of the optimizer until seeding properly works
-    # with the default learning rate of 0.01 regression sometimes does not converge
     pipeline = TabularRegressionPipeline(
         dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'],
-        search_space_updates=HyperparameterSearchSpaceUpdates([
-            HyperparameterSearchSpaceUpdate("optimizer",
-                                            "AdamOptimizer:lr",
-                                            value_range=[0.0001, 0.001],
-                                            default_value=0.001)
-        ])
     )
 
     cs = pipeline.get_hyperparameter_search_space()
@@ -304,6 +308,9 @@ def test_pipeline_score(fit_dictionary_tabular_dummy):
 
     pipeline.fit(fit_dictionary_tabular_dummy)
 
+    # Ensure that the network is an instance of torch Module
+    assert isinstance(pipeline.named_steps['network'].get_network(), torch.nn.Module)
+
     # we expect the output to have the same batch size as the test input,
     # and number of outputs per batch sample equal to the number of targets ("output_shape" in dataset_properties)
     expected_output_shape = (X.shape[0],