From a5b4812a1fe163ce66f7137f17cb009a81cf1e63 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 25 Apr 2023 13:02:32 +0200
Subject: [PATCH 01/67] update conditional targets

---
 doubleml/_utils.py         |  6 ++++++
 doubleml/double_ml_cvar.py |  5 ++---
 doubleml/double_ml_irm.py  | 10 +++-------
 doubleml/double_ml_lpq.py  | 13 ++++++-------
 doubleml/double_ml_pq.py   |  5 ++---
 5 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/doubleml/_utils.py b/doubleml/_utils.py
index e2d450f2..c95d92e7 100644
--- a/doubleml/_utils.py
+++ b/doubleml/_utils.py
@@ -371,3 +371,9 @@ def abs_ipw_score(theta):
                           method='brent')
     ipw_est = res.x
     return ipw_est
+
+
+def _cond_targets(target, cond_sample):
+    cond_target = target.astype(float)
+    cond_target[np.invert(cond_sample)] = np.nan
+    return cond_target
diff --git a/doubleml/double_ml_cvar.py b/doubleml/double_ml_cvar.py
index 6958b6b2..31f446ea 100644
--- a/doubleml/double_ml_cvar.py
+++ b/doubleml/double_ml_cvar.py
@@ -8,7 +8,7 @@
 from .double_ml_score_mixins import LinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_contains_iv, \
     _check_zero_one_treatment, _check_quantile, _check_treatment, _check_trimming, _check_score, \
-    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score
+    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -295,8 +295,7 @@ def ipw_score(theta):
         m_hat['targets'] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
 
         if return_models:
             g_hat['models'] = fitted_models['ml_g']
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index edb43cc1..063d2d6b 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -11,7 +11,7 @@
 from .double_ml_score_mixins import LinearScoreMixin
 
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions, _check_is_propensity, \
-    _trimm, _normalize_ipw
+    _trimm, _normalize_ipw, _cond_targets
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -212,9 +212,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                  est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
                                  return_models=return_models)
         _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
+        g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
@@ -231,9 +229,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                      est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
                                      return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = g_hat1['targets'].astype(float)
-            g_hat1['targets'][d == 0] = np.nan
+            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
 
             if self._dml_data.binary_outcome:
                 binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 658bfe13..97aebb23 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -9,7 +9,7 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_zero_one_treatment, _check_score,\
     _check_trimming, _check_quantile, _check_treatment, _get_bracket_guess, _default_kde, _normalize_ipw, _dml_tune, \
-    _solve_ipw_score
+    _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -423,15 +423,14 @@ def ipw_score(theta):
 
         # save targets and models
         m_z_hat['targets'] = z
+
         # set targets to relevant subsample
-        g_du_z0_hat['targets'][z == 1] = np.nan
-        g_du_z1_hat['targets'][z == 0] = np.nan
+        g_du_z0_hat['targets'] = _cond_targets(g_du_z0_hat['targets'], cond_sample=(z == 0))
+        g_du_z1_hat['targets'] = _cond_targets(g_du_z1_hat['targets'], cond_sample=(z == 1))
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-        m_d_z0_hat['targets'][z == 0] = d[z == 0]
-        m_d_z0_hat['targets'][z == 1] = np.nan
-        m_d_z1_hat['targets'][z == 1] = d[z == 1]
-        m_d_z1_hat['targets'][z == 0] = np.nan
+        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 0))
+        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
             m_z_hat['models'] = fitted_models['ml_m_z']
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 0bcd5b64..e59e2dda 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -8,7 +8,7 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_contains_iv, \
     _check_zero_one_treatment, _check_quantile, _check_treatment, _check_trimming, _check_score, _get_bracket_guess, \
-    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -337,8 +337,7 @@ def ipw_score(theta):
         m_hat['targets'] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
 
         if return_models:
             g_hat['models'] = fitted_models['ml_g']

From a7b46288e8c02edfc1d31de809b7fa4ce8cfb4ff Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 25 Apr 2023 13:03:09 +0200
Subject: [PATCH 02/67] update fit method to supply a dict

---
 doubleml/double_ml.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 1e37cb5c..7ce819b5 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply_predictions=None):
         """
         Estimate DoubleML models.
 
@@ -477,6 +477,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
+        supply_predictions : None or dict
+            If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
+            for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
+            Default is `None`.
+
         Returns
         -------
         self : object
@@ -495,6 +500,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
+        if supply_predictions is not None:
+            if not isinstance(supply_predictions, dict):
+                raise TypeError('The predictions must be a dictionary. '
+                                f'{str(supply_predictions)} of type {str(type(supply_predictions))} was passed.')
+
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
 

From 8b5e77821fd4542359d0015930ed26243680d632 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 11:49:41 +0200
Subject: [PATCH 03/67] checks on supplied_predictions

---
 doubleml/double_ml.py                      | 55 +++++++++++++--
 doubleml/tests/test_doubleml_exceptions.py | 78 ++++++++++++++++++++++
 2 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 7ce819b5..4422cef6 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply_predictions=None):
+    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supplied_predictions=None):
         """
         Estimate DoubleML models.
 
@@ -477,7 +477,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
-        supply_predictions : None or dict
+        supplied_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
             for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
             Default is `None`.
@@ -500,10 +500,8 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
-        if supply_predictions is not None:
-            if not isinstance(supply_predictions, dict):
-                raise TypeError('The predictions must be a dictionary. '
-                                f'{str(supply_predictions)} of type {str(type(supply_predictions))} was passed.')
+        # check prediction format
+        self._check_supplied_predictions(supplied_predictions)
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
@@ -1003,6 +1001,51 @@ def _check_learner(learner, learner_name, regressor, classifier):
 
         return learner_is_classifier
 
+    def _check_supplied_predictions(self, supplied_predictions):
+        if supplied_predictions is not None:
+            if not isinstance(supplied_predictions, dict):
+                raise TypeError('The predictions must be a dictionary. '
+                                f'{str(supplied_predictions)} of type {str(type(supplied_predictions))} was passed.')
+
+            if self.n_rep > 1:
+                raise NotImplementedError('supplied_predictions is not yet implmented for ``n_rep > 1``.')
+
+            supplied_treatments = list(supplied_predictions.keys())
+            valid_treatments = self._dml_data.d_cols
+            if not set(supplied_treatments).issubset(valid_treatments):
+                raise ValueError('Invalid supplied_predictions. '
+                                 f'Invalid treatment variable in {str(supplied_treatments)}. '
+                                 'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
+
+            for treatment in supplied_treatments:
+                if not isinstance(supplied_predictions[treatment], dict):
+                    raise TypeError('supplied_predictions must be a nested dictionary. '
+                                    f'For treatment {str(treatment)} a value of type '
+                                    f'{str(type(supplied_predictions[treatment]))} was passed.')
+
+                supplied_learners = list(supplied_predictions[treatment].keys())
+                valid_learners = self.params_names
+                if not set(supplied_learners).issubset(valid_learners):
+                    raise ValueError('Invalid supplied_predictions. '
+                                     f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
+                                     'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
+
+                for learner in supplied_learners:
+                    if not isinstance(supplied_predictions[treatment][learner],  np.ndarray):
+                        raise TypeError('Invalid supplied_predictions. '
+                                        'The values of the nested list must be a numpy array. '
+                                        'Invalid predictions for treatment ' + str(treatment) +
+                                        ' and learner ' + str(learner) + '. ' +
+                                        f'Object of type {str(type(supplied_predictions[treatment][learner]))} was passed.')
+
+                    expected_shape = (self._dml_data.n_obs, )
+                    if supplied_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid supplied_predictions. '
+                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                                         'Invalid predictions for treatment ' + str(treatment) +
+                                         ' and learner ' + str(learner) + '. ' +
+                                         f'Predictions of shape {str(supplied_predictions[treatment][learner].shape)} passed.')
+
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
         psi_deriv = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 10f38c24..899f9f41 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -977,3 +977,81 @@ def eval_fct(y_pred, y_true):
         return np.nan
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.evaluate_learners(metric=eval_fct)
+
+
+@pytest.mark.ci
+def test_double_ml_supply_predictions():
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=2)
+
+    msg = "The predictions must be a dictionary. ml_m of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions="ml_m")
+
+    predictions = {'ml_f': 'test'}
+    msg = "supplied_predictions is not yet implmented for ``n_rep > 1``."
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=1)
+
+    predictions = {'d': 'test', 'd_f': 'test'}
+    msg = (r"Invalid supplied_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
+           "Valid treatment variables d.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': 'test'}
+    msg = ("supplied_predictions must be a nested dictionary. "
+           "For treatment d a value of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_f': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           "The values of the nested list must be a numpy array. "
+           "Invalid predictions for treatment d and learner ml_m. "
+           "Object of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.array([0])}}
+    msg = ('Invalid supplied_predictions. '
+           r'The supplied predictions have to be of shape \(100,\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(1,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
+    msg = ('Invalid supplied_predictions. '
+           r'The supplied predictions have to be of shape \(100,\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(5, 3\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)

From 1856aab95ef094fd692774c4f09a03cc12255fa9 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 13:23:49 +0200
Subject: [PATCH 04/67] refactor to external_predictions

---
 doubleml/double_ml.py                      | 60 ++++++++++++----------
 doubleml/tests/test_doubleml_exceptions.py | 38 +++++++-------
 2 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 4422cef6..c8b6e2f3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supplied_predictions=None):
+    def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None, store_models=False):
         """
         Estimate DoubleML models.
 
@@ -477,9 +477,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
-        supplied_predictions : None or dict
+        external_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
-            for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
+            for a specific learner is supplied, the model will use the supplied nuisance predictions instead. Has to
+            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the 
+            corresponding learners.
             Default is `None`.
 
         Returns
@@ -501,7 +503,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
                             f'Got {str(store_models)}.')
 
         # check prediction format
-        self._check_supplied_predictions(supplied_predictions)
+        self._check_external_predictions(external_predictions)
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
@@ -516,13 +518,19 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
             self._i_rep = i_rep
             for i_d in range(self._dml_data.n_treat):
                 self._i_treat = i_d
-
+                
                 # this step could be skipped for the single treatment variable case
                 if self._dml_data.n_treat > 1:
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
 
+                # set the supplied predictions for the treatment and each learner (including None)
+                prediction_dict = {}
+                for learner in self.params_names:
+                    prediction_dict[learner] = None
+
                 # ml estimation of nuisance models and computation of score elements
-                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv, return_models=store_models)
+                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
+                                                           return_models=store_models)
 
                 self._set_score_elements(score_elements, self._i_rep, self._i_treat)
 
@@ -949,7 +957,7 @@ def _initialize_ml_nuisance_params(self):
         pass
 
     @abstractmethod
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models):
+    def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions):
         pass
 
     @abstractmethod
@@ -1001,50 +1009,50 @@ def _check_learner(learner, learner_name, regressor, classifier):
 
         return learner_is_classifier
 
-    def _check_supplied_predictions(self, supplied_predictions):
-        if supplied_predictions is not None:
-            if not isinstance(supplied_predictions, dict):
-                raise TypeError('The predictions must be a dictionary. '
-                                f'{str(supplied_predictions)} of type {str(type(supplied_predictions))} was passed.')
+    def _check_external_predictions(self, external_predictions):
+        if external_predictions is not None:
+            if not isinstance(external_predictions, dict):
+                raise TypeError('external_predictions must be a dictionary. '
+                                f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
             if self.n_rep > 1:
-                raise NotImplementedError('supplied_predictions is not yet implmented for ``n_rep > 1``.')
+                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
-            supplied_treatments = list(supplied_predictions.keys())
+            supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
             if not set(supplied_treatments).issubset(valid_treatments):
-                raise ValueError('Invalid supplied_predictions. '
+                raise ValueError('Invalid external_predictions. '
                                  f'Invalid treatment variable in {str(supplied_treatments)}. '
                                  'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
 
             for treatment in supplied_treatments:
-                if not isinstance(supplied_predictions[treatment], dict):
-                    raise TypeError('supplied_predictions must be a nested dictionary. '
+                if not isinstance(external_predictions[treatment], dict):
+                    raise TypeError('external_predictions must be a nested dictionary. '
                                     f'For treatment {str(treatment)} a value of type '
-                                    f'{str(type(supplied_predictions[treatment]))} was passed.')
+                                    f'{str(type(external_predictions[treatment]))} was passed.')
 
-                supplied_learners = list(supplied_predictions[treatment].keys())
+                supplied_learners = list(external_predictions[treatment].keys())
                 valid_learners = self.params_names
                 if not set(supplied_learners).issubset(valid_learners):
-                    raise ValueError('Invalid supplied_predictions. '
+                    raise ValueError('Invalid external_predictions. '
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
                 for learner in supplied_learners:
-                    if not isinstance(supplied_predictions[treatment][learner],  np.ndarray):
-                        raise TypeError('Invalid supplied_predictions. '
+                    if not isinstance(external_predictions[treatment][learner],  np.ndarray):
+                        raise TypeError('Invalid external_predictions. '
                                         'The values of the nested list must be a numpy array. '
                                         'Invalid predictions for treatment ' + str(treatment) +
                                         ' and learner ' + str(learner) + '. ' +
-                                        f'Object of type {str(type(supplied_predictions[treatment][learner]))} was passed.')
+                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
                     expected_shape = (self._dml_data.n_obs, )
-                    if supplied_predictions[treatment][learner].shape != expected_shape:
-                        raise ValueError('Invalid supplied_predictions. '
+                    if external_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid external_predictions. '
                                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
                                          'Invalid predictions for treatment ' + str(treatment) +
                                          ' and learner ' + str(learner) + '. ' +
-                                         f'Predictions of shape {str(supplied_predictions[treatment][learner].shape)} passed.')
+                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 899f9f41..aadd7a62 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -980,7 +980,7 @@ def eval_fct(y_pred, y_true):
 
 
 @pytest.mark.ci
-def test_double_ml_supply_predictions():
+def test_double_ml_external_predictions():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
                               ml_m=LogisticRegression(),
@@ -989,14 +989,14 @@ def test_double_ml_supply_predictions():
                               score='ATE',
                               n_rep=2)
 
-    msg = "The predictions must be a dictionary. ml_m of type <class 'str'> was passed."
+    msg = "external_predictions must be a dictionary. ml_m of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions="ml_m")
+        dml_irm_obj.fit(external_predictions="ml_m")
 
     predictions = {'ml_f': 'test'}
-    msg = "supplied_predictions is not yet implmented for ``n_rep > 1``."
+    msg = "external_predictions is not yet implmented for ``n_rep > 1``."
     with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1007,51 +1007,51 @@ def test_double_ml_supply_predictions():
                               n_rep=1)
 
     predictions = {'d': 'test', 'd_f': 'test'}
-    msg = (r"Invalid supplied_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
+    msg = (r"Invalid external_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
            "Valid treatment variables d.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': 'test'}
-    msg = ("supplied_predictions must be a nested dictionary. "
+    msg = ("external_predictions must be a nested dictionary. "
            "For treatment d a value of type <class 'str'> was passed.")
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_f': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
            "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
            "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            "The values of the nested list must be a numpy array. "
            "Invalid predictions for treatment d and learner ml_m. "
            "Object of type <class 'str'> was passed.")
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': np.array([0])}}
-    msg = ('Invalid supplied_predictions. '
+    msg = ('Invalid external_predictions. '
            r'The supplied predictions have to be of shape \(100,\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(1,\) passed.')
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
-    msg = ('Invalid supplied_predictions. '
+    msg = ('Invalid external_predictions. '
            r'The supplied predictions have to be of shape \(100,\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(5, 3\) passed.')
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)

From 189ac0e6cea43ea35059fbb287035eaffcfc2cfd Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 16:19:15 +0200
Subject: [PATCH 05/67] extend IRM with external predictions

---
 doubleml/double_ml.py               |  10 ++-
 doubleml/double_ml_irm.py           | 102 ++++++++++++++++------------
 doubleml/tests/_utils_irm_manual.py |  30 ++++----
 doubleml/tests/test_irm.py          |  42 +++++++++++-
 4 files changed, 123 insertions(+), 61 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c8b6e2f3..22799ecb 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -524,12 +524,18 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
 
                 # set the supplied predictions for the treatment and each learner (including None)
-                prediction_dict = {}
+                ext_prediction_dict = {}
                 for learner in self.params_names:
-                    prediction_dict[learner] = None
+                    if external_predictions is None:
+                        ext_prediction_dict[learner] = None
+                    elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
+                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                    else:
+                        ext_prediction_dict[learner] = None
 
                 # ml estimation of nuisance models and computation of score elements
                 score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
+                                                           external_predictions=ext_prediction_dict,
                                                            return_models=store_models)
 
                 self._set_score_elements(score_elements, self._i_rep, self._i_treat)
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 063d2d6b..41a6b898 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 import warnings
+import copy
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -199,7 +200,7 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -208,44 +209,60 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
-
-        if self._dml_data.binary_outcome:
-            binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
-            zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
-            if binary_preds & zero_one_preds:
-                raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                 f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                 'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                 'probabilities and not labels are predicted.')
-
-        g_hat1 = {'preds': None, 'targets': None, 'models': None}
-        if (self.score == 'ATE') | callable(self.score):
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+        if external_predictions['ml_g0'] is None:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
             if self._dml_data.binary_outcome:
-                binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
-                zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
+                binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
+                zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
                 if binary_preds & zero_one_preds:
                     raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                     f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                     'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                     'probabilities and not labels are predicted.')
+                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                        'probabilities and not labels are predicted.')
+        else:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+
+        g_hat1 = {'preds': None, 'targets': None, 'models': None}
+        if (self.score == 'ATE') | callable(self.score):
+            if external_predictions['ml_g1'] is None:
+                g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+                _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+                g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+
+                if self._dml_data.binary_outcome:
+                    binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
+                    zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
+                    if binary_preds & zero_one_preds:
+                        raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
+                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                        'probabilities and not labels are predicted.')
+            else:
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                          'targets': None,
+                          'models': None}
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if external_predictions['ml_m'] is None:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        else:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
 
         psi_a, psi_b = self._score_elements(y, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
@@ -273,14 +290,13 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
             for _, test_index in smpls:
                 p_hat[test_index] = np.mean(d[test_index])
 
-        m_hat = _trimm(m_hat, self.trimming_rule, self.trimming_threshold)
-
+        m_hat_adj = copy.deepcopy(m_hat)
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
                 for _, test_index in smpls:
-                    m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                    m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
             else:
-                m_hat = _normalize_ipw(m_hat, d)
+                m_hat_adj = _normalize_ipw(m_hat, d)
 
         # compute residuals
         u_hat0 = y - g_hat0
@@ -291,19 +307,19 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
         if isinstance(self.score, str):
             if self.score == 'ATE':
                 psi_b = g_hat1 - g_hat0 \
-                    + np.divide(np.multiply(d, u_hat1), m_hat) \
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
-                psi_a = np.full_like(m_hat, -1.0)
+                    + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
+                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj)
+                psi_a = np.full_like(m_hat_adj, -1.0)
             else:
                 assert self.score == 'ATTE'
                 psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-                    - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
-                                np.multiply(p_hat, (1.0 - m_hat)))
+                    - np.divide(np.multiply(m_hat_adj, np.multiply(1.0-d, u_hat0)),
+                                np.multiply(p_hat, (1.0 - m_hat_adj)))
                 psi_a = - np.divide(d, p_hat)
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d,
-                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat,
+                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj,
                                       smpls=smpls)
 
         return psi_a, psi_b
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 5328630b..f0299515 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -1,4 +1,5 @@
 import numpy as np
+import copy
 from sklearn.base import clone, is_classifier
 
 from ._utils_boot import boot_manual, draw_weights
@@ -139,20 +140,21 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         for _, test_index in smpls:
-            m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+            m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
 
     for idx, (_, test_index) in enumerate(smpls):
         thetas[idx] = irm_orth(g_hat0[test_index], g_hat1[test_index],
-                               m_hat[test_index], p_hat[test_index],
+                               m_hat_adj[test_index], p_hat[test_index],
                                u_hat0[test_index], u_hat1[test_index],
                                d[test_index], score)
     theta_hat = np.mean(thetas)
 
     if len(smpls) > 1:
         se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                             m_hat, p_hat,
+                             m_hat_adj, p_hat,
                              u_hat0, u_hat1,
                              d, score, n_obs))
     else:
@@ -160,7 +162,7 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
         test_index = smpls[0][1]
         n_obs = len(test_index)
         se = np.sqrt(var_irm(theta_hat, g_hat0[test_index], g_hat1[test_index],
-                             m_hat[test_index], p_hat[test_index],
+                             m_hat_adj[test_index], p_hat[test_index],
                              u_hat0[test_index], u_hat1[test_index],
                              d[test_index], score, n_obs))
 
@@ -172,13 +174,14 @@ def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
-        m_hat = _normalize_ipw(m_hat, d)
+        m_hat_adj = _normalize_ipw(m_hat, d)
 
-    theta_hat = irm_orth(g_hat0, g_hat1, m_hat, p_hat,
+    theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
                          u_hat0, u_hat1, d, score)
     se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                         m_hat, p_hat,
+                         m_hat_adj, p_hat,
                          u_hat0, u_hat1,
                          d, score, n_obs))
 
@@ -249,12 +252,13 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         if dml_procedure == 'dml1':
             for _, test_index in smpls:
-                m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
         else:
-            m_hat = _normalize_ipw(m_hat, d)
+            m_hat_adj = _normalize_ipw(m_hat, d)
 
     if apply_cross_fitting:
         if score == 'ATE':
@@ -272,13 +276,13 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
 
     if score == 'ATE':
         psi = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(d, u_hat1), m_hat) \
-                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat) - theta
+                + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
+                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat_adj) - theta
     else:
         assert score == 'ATTE'
         psi = np.divide(np.multiply(d, u_hat0), p_hat) \
-            - np.divide(np.multiply(m_hat, np.multiply(1.-d, u_hat0)),
-                        np.multiply(p_hat, (1.-m_hat))) \
+            - np.divide(np.multiply(m_hat_adj, np.multiply(1.-d, u_hat0)),
+                        np.multiply(p_hat, (1.-m_hat_adj))) \
             - theta * np.divide(d, p_hat)
 
     boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index d28e628e..4592e167 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -18,8 +18,8 @@
 @pytest.fixture(scope='module',
                 params=[[LinearRegression(),
                          LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10),
-                         RandomForestClassifier(max_depth=5, n_estimators=10)]])
+                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
 def learner(request):
     return request.param
 
@@ -87,10 +87,31 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
                          normalize_ipw=normalize_ipw,
                          trimming_threshold=trimming_threshold)
 
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    dml_irm_obj_ext = dml.DoubleMLIRM(obj_dml_data,
+                                      ml_g, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure,
+                                      normalize_ipw=normalize_ipw,
+                                      draw_sample_splitting=False,
+                                      trimming_threshold=trimming_threshold)
+
+    # synchronize the sample splitting
+    dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+ 
+    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
+                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
+                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
+    dml_irm_obj_ext.fit(external_predictions=prediction_dict)
+
     res_dict = {'coef': dml_irm_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_irm_obj_ext.coef,
                 'se': dml_irm_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_irm_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -104,10 +125,14 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_irm_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_irm_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_t_stat
 
     return res_dict
 
@@ -117,6 +142,9 @@ def test_dml_irm_coef(dml_irm_fixture):
     assert math.isclose(dml_irm_fixture['coef'],
                         dml_irm_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture['coef'],
+                        dml_irm_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -124,7 +152,9 @@ def test_dml_irm_se(dml_irm_fixture):
     assert math.isclose(dml_irm_fixture['se'],
                         dml_irm_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-
+    assert math.isclose(dml_irm_fixture['se'],
+                        dml_irm_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
@@ -132,9 +162,15 @@ def test_dml_irm_boot(dml_irm_fixture):
         assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
                            dml_irm_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
+                           dml_irm_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
                            dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
+                           dml_irm_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci

From d280a78765570254f6d9294e0490c67dcdf0cda0 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 16:57:21 +0200
Subject: [PATCH 06/67] refactor IRM nuisance est and fix unit tests

---
 doubleml/double_ml_cvar.py                   |  2 +-
 doubleml/double_ml_iivm.py                   |  2 +-
 doubleml/double_ml_irm.py                    | 33 +++++++++++---------
 doubleml/double_ml_lpq.py                    |  2 +-
 doubleml/double_ml_pliv.py                   |  2 +-
 doubleml/double_ml_plr.py                    |  2 +-
 doubleml/double_ml_pq.py                     |  2 +-
 doubleml/tests/test_nonlinear_score_mixin.py |  2 +-
 8 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/doubleml/double_ml_cvar.py b/doubleml/double_ml_cvar.py
index 31f446ea..bc9d6af6 100644
--- a/doubleml/double_ml_cvar.py
+++ b/doubleml/double_ml_cvar.py
@@ -206,7 +206,7 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in ['ml_g', 'ml_m']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 4a8d727a..b710105b 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -241,7 +241,7 @@ def _check_data(self, obj_dml_data):
             raise ValueError(err_msg)
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, z = check_X_y(x, np.ravel(self._dml_data.z),
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 41a6b898..96912d5b 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -209,7 +209,12 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         # nuisance g
-        if external_predictions['ml_g0'] is None:
+        if external_predictions['ml_g0'] is not None:
+            # use external predictions
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
                                         return_models=return_models)
@@ -224,14 +229,15 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                         'probabilities and not labels are predicted.')
-        else:
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': None,
-                      'models': None}
 
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if (self.score == 'ATE') | callable(self.score):
-            if external_predictions['ml_g1'] is None:
+            if external_predictions['ml_g1'] is not None:
+                # use external predictions
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                          'targets': None,
+                          'models': None}
+            else:
                 g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
                                         return_models=return_models)
@@ -246,23 +252,20 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                         'probabilities and not labels are predicted.')
-            else:
-                g_hat1 = {'preds': external_predictions['ml_g1'],
-                          'targets': None,
-                          'models': None}
 
         # nuisance m
-        if external_predictions['ml_m'] is None:
+        if external_predictions['ml_m'] is not None:
+            # use external predictions
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
             m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
                                     return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
-        else:
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
 
         psi_a, psi_b = self._score_elements(y, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 97aebb23..72663c06 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -276,7 +276,7 @@ def _initialize_ml_nuisance_params(self):
                         for learner in ['ml_m_z', 'ml_g_du_z0', 'ml_g_du_z1',
                                         'ml_m_d_z0', 'ml_m_d_z1']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 88369250..1ad57d24 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -292,7 +292,7 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
             learner = 'ml_l'
         super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         if self.partialX & (not self.partialZ):
             psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
         elif (not self.partialX) & self.partialZ:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 4b7eb401..76bf603d 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -185,7 +185,7 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
             learner = 'ml_l'
         super(DoubleMLPLR, self).set_ml_nuisance_params(learner, treat_var, params)
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index e59e2dda..70e79c00 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -254,7 +254,7 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in ['ml_g', 'ml_m']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index 92210085..77d8dfe6 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -99,7 +99,7 @@ def _check_score(self, score):
     def _check_data(self, obj_dml_data):
         pass
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,

From 437f85b3692b9870c4ff12b0b0ebd10c56eb9da6 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 8 May 2023 10:51:45 +0200
Subject: [PATCH 07/67] fix format

---
 doubleml/double_ml.py      |  4 ++--
 doubleml/double_ml_irm.py  | 20 ++++++++++----------
 doubleml/tests/test_irm.py |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 22799ecb..e276df81 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -480,7 +480,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
         external_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
             for a specific learner is supplied, the model will use the supplied nuisance predictions instead. Has to
-            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the 
+            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the
             corresponding learners.
             Default is `None`.
 
@@ -518,7 +518,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
             self._i_rep = i_rep
             for i_d in range(self._dml_data.n_treat):
                 self._i_treat = i_d
-                
+
                 # this step could be skipped for the single treatment variable case
                 if self._dml_data.n_treat > 1:
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 9f77de62..b52ece60 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -220,8 +220,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
@@ -230,9 +230,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
                 if binary_preds & zero_one_preds:
                     raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                        'probabilities and not labels are predicted.')
+                                     f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                     'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                     'probabilities and not labels are predicted.')
 
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if (self.score == 'ATE') | callable(self.score):
@@ -243,8 +243,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                         return_models=return_models)
                 _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
                 g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
 
@@ -253,9 +253,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
                     if binary_preds & zero_one_preds:
                         raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                        'probabilities and not labels are predicted.')
+                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                         'probabilities and not labels are predicted.')
 
         # nuisance m
         if external_predictions['ml_m'] is not None:
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 4592e167..992eb28f 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -100,7 +100,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
     # synchronize the sample splitting
     dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
- 
+
     prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
                              'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
                              'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
@@ -156,6 +156,7 @@ def test_dml_irm_se(dml_irm_fixture):
                         dml_irm_fixture['se_ext'],
                         rel_tol=1e-9, abs_tol=1e-4)
 
+
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
     for bootstrap in dml_irm_fixture['boot_methods']:

From 7287d243d438c3ca122a3faf25932f0b0a4fa33d Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 8 May 2023 11:15:47 +0200
Subject: [PATCH 08/67] update nuisance_est input DID and DIDCS

---
 doubleml/double_ml_did.py    | 2 +-
 doubleml/double_ml_did_cs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 199b08d7..41317545 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -192,7 +192,7 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 2be66b41..fdde41ff 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -205,7 +205,7 @@ def _check_data(self, obj_dml_data):
 
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,

From 4530983836ae2edf3a8381490cf9271358428acc Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 12 Jun 2023 17:18:52 +0200
Subject: [PATCH 09/67] Adjusted DoubleMLPLR class for external prediction

---
 doubleml/double_ml_plr.py | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 5704aec7..aa5b24df 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -181,16 +181,26 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+        if external_predictions['ml_l'] is not None:
+            l_hat = {'preds': external_predictions['ml_l'],
+                      'targets': None,
+                      'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
+            _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        if external_predictions['ml_m'] is not None:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
         if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
@@ -211,10 +221,15 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
-            _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            if external_predictions['ml_g'] is not None:
+                g_hat = {'preds': external_predictions['ml_g'],
+                        'targets': None,
+                        'models': None}
+            else:
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+                _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         psi_a, psi_b = self._score_elements(y, d, l_hat['preds'], m_hat['preds'], g_hat['preds'], smpls)
         psi_elements = {'psi_a': psi_a,

From e369026ad5a04d5635c3b2977c4762f41201896b Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 13 Jun 2023 08:40:18 +0200
Subject: [PATCH 10/67] External predictions added to test cases for PLR

---
 doubleml/tests/test_plr.py | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 7b8b56d9..17eacfe2 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -76,11 +76,44 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
     res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
                          all_smpls, dml_procedure, score)
+    
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1)}}
+        
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
+
 
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_plr_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -91,10 +124,14 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
 
     return res_dict
 
@@ -104,6 +141,9 @@ def test_dml_plr_coef(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['coef'],
                         dml_plr_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['coef'],
+                        dml_plr_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -111,6 +151,9 @@ def test_dml_plr_se(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['se'],
                         dml_plr_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['se'],
+                        dml_plr_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -119,9 +162,15 @@ def test_dml_plr_boot(dml_plr_fixture):
         assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
                            dml_plr_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
+                           dml_plr_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
                            dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.fixture(scope="module")

From fe3d862e2b2e873555b93eea9c8432d3fcc7aa25 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 13 Jun 2023 13:01:49 +0200
Subject: [PATCH 11/67] Excluding testfile from staging

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 41a409eb..7dae397b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
+test_plr_basic.py

From f1e112766bea04d05c55592a11d3d253174ed81d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <68404051+JanTeichertKluge@users.noreply.github.com>
Date: Tue, 13 Jun 2023 13:41:01 +0200
Subject: [PATCH 12/67] Update .gitignore

removing testfile
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7dae397b..41a409eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,3 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
-test_plr_basic.py

From e4573441e216c17e50b27e98c2856fca465d1c1e Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 09:21:55 +0200
Subject: [PATCH 13/67] minor change according to n_rep > 1

---
 doubleml/double_ml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index e276df81..4fecfe80 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            if self.n_rep > 1:
-                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            # if self.n_rep > 1:
+            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols

From 84aa99ff0f1916dcdc198b988203489a0b1261f9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 09:39:46 +0200
Subject: [PATCH 14/67] .

---
 doubleml/double_ml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 4fecfe80..e276df81 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            # if self.n_rep > 1:
-            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            if self.n_rep > 1:
+                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols

From 5cfc73cd3ea07cc53576dde91d750471b0002483 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 10:33:22 +0200
Subject: [PATCH 15/67] n_rep > 1 are now supported by double_ml.py

---
 doubleml/double_ml.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index e276df81..05b6059c 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,7 +529,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
                     else:
                         ext_prediction_dict[learner] = None
 
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            if self.n_rep > 1:
-                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            # if self.n_rep > 1:
+            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
@@ -1052,7 +1052,7 @@ def _check_external_predictions(self, external_predictions):
                                         ' and learner ' + str(learner) + '. ' +
                                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
-                    expected_shape = (self._dml_data.n_obs, )
+                    expected_shape = (self._dml_data.n_obs, self.n_rep)
                     if external_predictions[treatment][learner].shape != expected_shape:
                         raise ValueError('Invalid external_predictions. '
                                          f'The supplied predictions have to be of shape {str(expected_shape)}. '

From 78b0bba144f4c076fce8810b3ba7f0d493eb26b9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 13:04:02 +0200
Subject: [PATCH 16/67] Update double_ml.py

---
 doubleml/double_ml.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 05b6059c..d0d25ee3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,9 +1021,6 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            # if self.n_rep > 1:
-            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
-
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
             if not set(supplied_treatments).issubset(valid_treatments):

From cd16290d8e3722c7640cdfcefc1fa334e2c23f90 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 14:11:48 +0200
Subject: [PATCH 17/67] Addition / adaptation of the test files

---
 doubleml/tests/test_irm.py           |  6 ++---
 doubleml/tests/test_plr.py           | 10 ++++----
 doubleml/tests/test_plr_rep_cross.py | 37 ++++++++++++++++++++++++++--
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 992eb28f..24a5c240 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -101,9 +101,9 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
     # synchronize the sample splitting
     dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
-                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
-                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
+    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1, 1),
+                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1, 1),
+                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1, 1)}}
     dml_irm_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_irm_obj.coef,
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 17eacfe2..7177e872 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -97,13 +97,13 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
     dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
     if score == 'partialling out':
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1)}}
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
     else:
         assert score == 'IV-type'
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1),
-                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1)}}
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
         
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py
index f2a50e21..4f95f10a 100644
--- a/doubleml/tests/test_plr_rep_cross.py
+++ b/doubleml/tests/test_plr_rep_cross.py
@@ -74,13 +74,46 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
 
     res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
                          all_smpls, dml_procedure, score, n_rep)
+    
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
+        
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
-                'boot_methods': boot_methods
-                }
+                'se_ext': dml_plr_obj_ext.se,
+                'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
         np.random.seed(3141)

From 75dfd1e7d94207419d92d4e16131b17c196ecfcb Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 14:50:08 +0200
Subject: [PATCH 18/67] Changes double_ml to pass partly ext. predictions

---
 doubleml/double_ml.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d0d25ee3..8402080c 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,7 +529,10 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        else:
+                            ext_prediction_dict[learner] = None
                     else:
                         ext_prediction_dict[learner] = None
 
@@ -1041,13 +1044,13 @@ def _check_external_predictions(self, external_predictions):
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
-                for learner in supplied_learners:
-                    if not isinstance(external_predictions[treatment][learner],  np.ndarray):
-                        raise TypeError('Invalid external_predictions. '
-                                        'The values of the nested list must be a numpy array. '
-                                        'Invalid predictions for treatment ' + str(treatment) +
-                                        ' and learner ' + str(learner) + '. ' +
-                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+                # for learner in supplied_learners:
+                #     if not isinstance(external_predictions[treatment][learner],  np.ndarray):
+                #         raise TypeError('Invalid external_predictions. '
+                #                         'The values of the nested list must be a numpy array. '
+                #                         'Invalid predictions for treatment ' + str(treatment) +
+                #                         ' and learner ' + str(learner) + '. ' +
+                #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
                     expected_shape = (self._dml_data.n_obs, self.n_rep)
                     if external_predictions[treatment][learner].shape != expected_shape:

From 87955c1bad126e180cbdf31f7b58c7558e967b34 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 15 Jun 2023 16:44:29 +0200
Subject: [PATCH 19/67] new testfile for ext_preds

---
 doubleml/tests/test_external_predictions.py | 80 +++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 doubleml/tests/test_external_predictions.py

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
new file mode 100644
index 00000000..43a0cba6
--- /dev/null
+++ b/doubleml/tests/test_external_predictions.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV
+from doubleml import DoubleMLPLR, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018
+
+
+# @pytest.fixture(scope='module',
+#                 params=[LinearRegression(),
+#                         LassoCV()])
+# def learner(request):
+#     return request.param
+
+@pytest.fixture(scope='module',
+                params=['IV-type', 'partialling out'])
+def score(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=['dml1', 'dml2'])
+def dml_procedure(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(learner, score, dml_procedure):
+    ext_predictions = {'d': {}}
+
+    x, y, d = make_plr_CCDDHNR2018(n_obs=500,
+                                   dim_x=20,
+                                   alpha=0.5,
+                                   return_type="np.array")
+
+    lm_m1 = LinearRegression()
+    lm_l1 = LinearRegression()
+
+    np.random.seed(3141)
+    lm_m1.fit(x, d)
+    ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    lm_l1.fit(x, y)
+    ext_predictions['d']['ml_l'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
+                         ml_m=learner,
+                         ml_l=learner,
+                         score=score,
+                         n_rep=n_rep,
+                         dml_procedure=dml_procedure)
+    np.random.seed(3141)
+    DMLPLR.fit(store_predictions=True)
+
+    DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
+                             ml_m=learner,
+                             ml_l=learner,
+                             score=score,
+                             n_rep=n_rep,
+                             dml_procedure=dml_procedure)
+
+    np.random.seed(3141)
+    DMLPLR_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {'coef_normal': DMLPLR.coef,
+                'coef_ext': DMLPLR_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
+                        adapted_doubleml_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file

From f64fb575fb66224306881e18cfa25a021fd6265f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 20 Jun 2023 15:53:18 +0200
Subject: [PATCH 20/67] new testcases / change dml.py

---
 doubleml/double_ml.py                       |  4 ++-
 doubleml/tests/test_external_predictions.py | 29 ++++++++++++---------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 8402080c..d2ec3d06 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,8 +529,10 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep > 1:
                             ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        elif isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep == 1:
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
                         else:
                             ext_prediction_dict[learner] = None
                     else:
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 43a0cba6..d1c681d7 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,7 +5,6 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 
-
 # @pytest.fixture(scope='module',
 #                 params=[LinearRegression(),
 #                         LassoCV()])
@@ -29,7 +28,7 @@ def n_rep(request):
 
 
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(learner, score, dml_procedure):
+def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions = {'d': {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500,
@@ -37,30 +36,36 @@ def adapted_doubleml_fixture(learner, score, dml_procedure):
                                    alpha=0.5,
                                    return_type="np.array")
 
-    lm_m1 = LinearRegression()
-    lm_l1 = LinearRegression()
+    # lm_m1 = LinearRegression()
+    # lm_l1 = LinearRegression()
 
     np.random.seed(3141)
-    lm_m1.fit(x, d)
-    ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
 
-    lm_l1.fit(x, y)
-    ext_predictions['d']['ml_l'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+    # lm_m1.fit(x, d)
+    # ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    # lm_l1.fit(x, y)
+    # ext_predictions['d']['ml_l'] = np.stack([lm_l1.predict(x) for _ in range(n_rep)], axis=1)
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
     DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
-                         ml_m=learner,
-                         ml_l=learner,
+                         ml_m=LinearRegression(),
+                         ml_l=LinearRegression(),
                          score=score,
                          n_rep=n_rep,
                          dml_procedure=dml_procedure)
     np.random.seed(3141)
+
     DMLPLR.fit(store_predictions=True)
 
+    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
+    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
+
+
     DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
-                             ml_m=learner,
-                             ml_l=learner,
+                             ml_m=LinearRegression(),
+                             ml_l=LinearRegression(),
                              score=score,
                              n_rep=n_rep,
                              dml_procedure=dml_procedure)

From f72d6b984fd6e93a82a1d0700caab77f8b2f136e Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 27 Jun 2023 15:59:42 +0200
Subject: [PATCH 21/67] Fix testcases for external predictions

---
 doubleml/tests/test_external_predictions.py | 53 +++++++++++----------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index d1c681d7..5aa09b61 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,11 +5,17 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 
-# @pytest.fixture(scope='module',
-#                 params=[LinearRegression(),
-#                         LassoCV()])
-# def learner(request):
-#     return request.param
+class dummy_learner:
+    _estimator_type = "regressor"
+    def fit(*args):
+        raise AttributeError("Accessed fit method!")
+    def predict(*args):
+        raise AttributeError("Accessed predict method!")
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method!")
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method!")
+    
 
 @pytest.fixture(scope='module',
                 params=['IV-type', 'partialling out'])
@@ -36,25 +42,21 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
                                    alpha=0.5,
                                    return_type="np.array")
 
-    # lm_m1 = LinearRegression()
-    # lm_l1 = LinearRegression()
-
     np.random.seed(3141)
 
-    # lm_m1.fit(x, d)
-    # ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
-
-    # lm_l1.fit(x, y)
-    # ext_predictions['d']['ml_l'] = np.stack([lm_l1.predict(x) for _ in range(n_rep)], axis=1)
-
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
-                         ml_m=LinearRegression(),
+    kwargs = {'obj_dml_data': dml_data,
+              'score': score,
+              'n_rep': n_rep,
+              'dml_procedure': dml_procedure}
+    
+    if score == 'IV-type':
+        kwargs['ml_g'] = LinearRegression()
+    
+    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
                          ml_l=LinearRegression(),
-                         score=score,
-                         n_rep=n_rep,
-                         dml_procedure=dml_procedure)
+                         **kwargs)
     np.random.seed(3141)
 
     DMLPLR.fit(store_predictions=True)
@@ -62,13 +64,14 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
     ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
 
+    if score == 'IV-type':
+        kwargs['ml_g'] = dummy_learner()
+        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'].squeeze()
+
 
-    DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
-                             ml_m=LinearRegression(),
-                             ml_l=LinearRegression(),
-                             score=score,
-                             n_rep=n_rep,
-                             dml_procedure=dml_procedure)
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
+                             ml_l=dummy_learner(),
+                             **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)

From 731325a110d448c5172004cef710fb04d64a5045 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 15:30:30 +0200
Subject: [PATCH 22/67] Add external prediction option to PLIV model

---
 doubleml/double_ml.py                         |  14 +--
 doubleml/double_ml_pliv.py                    |  72 ++++++++-----
 doubleml/double_ml_plr.py                     |   4 +-
 .../tests/test_external_predictions_IV.py     | 102 ++++++++++++++++++
 4 files changed, 158 insertions(+), 34 deletions(-)
 create mode 100644 doubleml/tests/test_external_predictions_IV.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d2ec3d06..9d0698e6 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1054,13 +1054,13 @@ def _check_external_predictions(self, external_predictions):
                 #                         ' and learner ' + str(learner) + '. ' +
                 #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
-                    expected_shape = (self._dml_data.n_obs, self.n_rep)
-                    if external_predictions[treatment][learner].shape != expected_shape:
-                        raise ValueError('Invalid external_predictions. '
-                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
-                                         'Invalid predictions for treatment ' + str(treatment) +
-                                         ' and learner ' + str(learner) + '. ' +
-                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+                #     expected_shape = (self._dml_data.n_obs, self.n_rep)
+                #     if external_predictions[treatment][learner].shape != expected_shape:
+                #         raise ValueError('Invalid external_predictions. '
+                #                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                #                          'Invalid predictions for treatment ' + str(treatment) +
+                #                          ' and learner ' + str(learner) + '. ' +
+                #                          f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 1ad57d24..2b87321f 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -293,13 +293,13 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
         super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        if self.partialX & (not self.partialZ):
-            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
+        if self.partialX & (not self.partialZ):          
+            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, external_predictions, return_models)
         elif (not self.partialX) & self.partialZ:
-            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
+            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, external_predictions, return_models)
         else:
             assert (self.partialX & self.partialZ)
-            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, return_models)
+            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, external_predictions, return_models)
 
         return psi_elements, preds
 
@@ -318,16 +318,21 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
+        if external_predictions['ml_l'] is not None:
+            l_hat = {'preds': external_predictions['ml_l'],
+                      'targets': None,
+                      'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
         _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         predictions = {'ml_l': l_hat['preds']}
@@ -337,37 +342,54 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
         if self._dml_data.n_instr == 1:
             # one instrument: just identified
             x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                             force_all_finite=False)
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+                                force_all_finite=False)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                        'targets': None,
+                        'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             predictions['ml_m'] = m_hat['preds']
             targets['ml_m'] = m_hat['targets']
             models['ml_m'] = m_hat['models']
         else:
             # several instruments: 2SLS
             m_hat = {'preds': np.full((self._dml_data.n_obs, self._dml_data.n_instr), np.nan),
+                     'targets': [None] * self._dml_data.n_instr,
                      'models': [None] * self._dml_data.n_instr}
-            z = self._dml_data.z
             for i_instr in range(self._dml_data.n_instr):
+                z = self._dml_data.z
                 x, this_z = check_X_y(x, z[:, i_instr],
-                                      force_all_finite=False)
-                res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                 est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                 method=self._predict_method['ml_m'], return_models=return_models)
+                                    force_all_finite=False)
+                if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
+                    m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                else:
+                    res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
+                                                    est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
+                                                    method=self._predict_method['ml_m'], return_models=return_models)
 
-                m_hat['preds'][:, i_instr] = res_cv_predict['preds']
+                    m_hat['preds'][:, i_instr] = res_cv_predict['preds']
 
-                predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
-                targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
-                models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
 
         _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
 
         # nuisance r
-        r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                return_models=return_models)
+        if external_predictions['ml_r'] is not None:
+            r_hat = {'preds': external_predictions['ml_r'],
+                     'targets': None,
+                     'models': None}
+        else:
+            r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
+                                    return_models=return_models)
         _check_finite_predictions(r_hat['preds'], self._learner['ml_r'], 'ml_r', smpls)
         predictions['ml_r'] = r_hat['preds']
         targets['ml_r'] = r_hat['targets']
@@ -405,7 +427,7 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls):
         # compute residuals
         u_hat = y - l_hat
         w_hat = d - r_hat
-        v_hat = z - m_hat
+        v_hat = z- m_hat
 
         r_hat_tilde = None
         if self._dml_data.n_instr > 1:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index aa5b24df..ad1f0f1a 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -183,8 +183,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance l
         if external_predictions['ml_l'] is not None:
             l_hat = {'preds': external_predictions['ml_l'],
-                      'targets': None,
-                      'models': None}
+                     'targets': None,
+                     'models': None}
         else:
             l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
new file mode 100644
index 00000000..550704d2
--- /dev/null
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV
+from doubleml import DoubleMLPLIV, DoubleMLData
+from doubleml.datasets import make_pliv_CHS2015
+
+class dummy_learner:
+    _estimator_type = "regressor"
+    def fit(*args):
+        raise AttributeError("Accessed fit method!")
+    def predict(*args):
+        raise AttributeError("Accessed predict method!")
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method!")
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method!")
+    
+
+@pytest.fixture(scope='module',
+                params=['IV-type', 'partialling out'])
+def score(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=['dml1', 'dml2'])
+def dml_procedure(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1])
+def n_rep(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def dim_z(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(score, dml_procedure, n_rep):
+    ext_predictions = {'d': {}}
+
+    data = make_pliv_CHS2015(n_obs=500,
+							 dim_x=20,
+							 alpha=0.5,
+							 dim_z=dim_z,
+							 return_type="DataFrame")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData(data, 'y', 'd', z_cols=[f"Z{i}" for i in range(1, dim_z+1)])
+
+    kwargs = {'obj_dml_data': dml_data,
+              'score': score,
+              'n_rep': n_rep,
+              'dml_procedure': dml_procedure}
+    
+    if score == 'IV-type':
+        kwargs['ml_g'] = LinearRegression()
+    
+    DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
+                           ml_l=LinearRegression(),
+                           ml_r=LinearRegression(),
+                           **kwargs)
+    np.random.seed(3141)
+
+    DMLPLIV.fit(store_predictions=True)
+
+    ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
+    ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+
+    if dimz == 1:
+        ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+    else:
+        for instr in range(dimz):
+            ext_predictions['d']['ml_m_' + 'Z' + str(instr+1)] = DMLPLIV.predictions['ml_m_' + 'Z' + str(instr+1)].squeeze()
+
+        if score == 'IV-type':
+            kwargs['ml_g'] = dummy_learner()
+            ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+
+
+    DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
+                               ml_l=dummy_learner(),
+                               ml_r=dummy_learner(),
+                               **kwargs)
+
+    np.random.seed(3141)
+    DMLPLR_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {'coef_normal': DMLPLIV.coef,
+                'coef_ext': DMLPLIV_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
+                        adapted_doubleml_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file

From d96f28cf8178f5b9f2cfede01bd96cce501c1792 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 16:33:31 +0200
Subject: [PATCH 23/67] Fix PLIV model for IV-type score and add testcases

---
 doubleml/double_ml_pliv.py                    |  19 +--
 .../tests/test_external_predictions_IV.py     | 113 ++++++++++--------
 2 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 2b87321f..34d4fe29 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -399,13 +399,18 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         if (self._dml_data.n_instr == 1) & ('ml_g' in self._learner):
             # an estimate of g is obtained for the IV-type score and callable scores
             # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
-            psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
-            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-            # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+            if external_predictions['ml_g'] is not None:
+                g_hat = {'preds': external_predictions['ml_g'],
+                        'targets': None,
+                        'models': None}
+            else:
+                psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
+                psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+                # nuisance g
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
             _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         predictions['ml_g'] = g_hat['preds']
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index 550704d2..b92cd2e6 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -18,7 +18,7 @@ def get_params(*args, **kwargs):
     
 
 @pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+                params=['partialling out', 'IV-type'])
 def score(request):
     return request.param
 
@@ -28,7 +28,7 @@ def dml_procedure(request):
     return request.param
 
 @pytest.fixture(scope='module',
-                params=[1])
+                params=[1, 3])
 def n_rep(request):
     return request.param
 
@@ -37,63 +37,70 @@ def n_rep(request):
 def dim_z(request):
     return request.param
 
-
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(score, dml_procedure, n_rep):
-    ext_predictions = {'d': {}}
-
-    data = make_pliv_CHS2015(n_obs=500,
-							 dim_x=20,
-							 alpha=0.5,
-							 dim_z=dim_z,
-							 return_type="DataFrame")
-
-    np.random.seed(3141)
-
-    dml_data = DoubleMLData(data, 'y', 'd', z_cols=[f"Z{i}" for i in range(1, dim_z+1)])
-
-    kwargs = {'obj_dml_data': dml_data,
-              'score': score,
-              'n_rep': n_rep,
-              'dml_procedure': dml_procedure}
-    
-    if score == 'IV-type':
-        kwargs['ml_g'] = LinearRegression()
-    
-    DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
-                           ml_l=LinearRegression(),
-                           ml_r=LinearRegression(),
-                           **kwargs)
-    np.random.seed(3141)
-
-    DMLPLIV.fit(store_predictions=True)
-
-    ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
-    ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
-
-    if dimz == 1:
-        ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
+
+    # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
+    if dim_z > 1 and score == 'IV-type':
+        res_dict = {'coef_normal': 1,
+                    'coef_ext': 1}
+        
+        return res_dict
     else:
-        for instr in range(dimz):
-            ext_predictions['d']['ml_m_' + 'Z' + str(instr+1)] = DMLPLIV.predictions['ml_m_' + 'Z' + str(instr+1)].squeeze()
+        ext_predictions = {'d': {}}
 
-        if score == 'IV-type':
-            kwargs['ml_g'] = dummy_learner()
-            ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+        data = make_pliv_CHS2015(n_obs=500,
+                                 dim_x=20,
+                                 alpha=0.5,
+                                 dim_z=dim_z,
+                                 return_type="DataFrame")
 
+        np.random.seed(3141)
 
-    DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                               ml_l=dummy_learner(),
-                               ml_r=dummy_learner(),
-                               **kwargs)
+        z_cols = [f"Z{i}" for i in range(1, dim_z+1)]
+        dml_data = DoubleMLData(data, 'y', 'd', z_cols=z_cols)
 
-    np.random.seed(3141)
-    DMLPLR_ext.fit(external_predictions=ext_predictions)
-
-    res_dict = {'coef_normal': DMLPLIV.coef,
-                'coef_ext': DMLPLIV_ext.coef}
-
-    return res_dict
+        kwargs = {'obj_dml_data': dml_data,
+                'score': score,
+                'n_rep': n_rep,
+                'dml_procedure': dml_procedure}
+        
+        if score == 'IV-type':
+            kwargs['ml_g'] = LinearRegression()
+        
+        DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
+                            ml_l=LinearRegression(),
+                            ml_r=LinearRegression(),
+                            **kwargs)
+        np.random.seed(3141)
+
+        DMLPLIV.fit(store_predictions=True)
+
+        ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
+        ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+
+        if dim_z == 1:
+            ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+            if score == 'IV-type':
+                kwargs['ml_g'] = dummy_learner()
+                ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+        else:
+            for instr in range(dim_z):
+                ml_m_key = 'ml_m_' + 'Z' + str(instr+1)
+                ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
+
+        DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
+                                ml_l=dummy_learner(),
+                                ml_r=dummy_learner(),
+                                **kwargs)
+
+        np.random.seed(3141)
+        DMLPLIV_ext.fit(external_predictions=ext_predictions)
+
+        res_dict = {'coef_normal': DMLPLIV.coef,
+                    'coef_ext': DMLPLIV_ext.coef}
+
+        return res_dict
 
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):

From 102b27a6e13cb210a07d6c2712912d49f8b8219c Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 20:52:23 +0200
Subject: [PATCH 24/67] Added external prediction option to DoubleMLDID

---
 doubleml/double_ml_did.py | 40 ++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 41317545..c035e560 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -201,31 +201,45 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance g
         # get train indices for d == 0
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
 
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
         # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
-
+            g_hat0['targets'] = g_hat0['targets'].astype(float) # is None if external predictions are used
+            g_hat0['targets'][d == 1] = np.nan
+        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
         # only relevant for observational or experimental setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                        'targets': None,
+                        'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         if self.score == 'experimental':
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
+            if external_predictions['ml_g1'] is not None:
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                        'targets': None,
+                        'models': None}
+            else:
+                g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
 
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples

From d90dc8fc96fa83464337eec38e6c229e90c878cf Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 19 Jul 2023 14:22:01 +0200
Subject: [PATCH 25/67] Update test_external_predictions_IV.py

---
 doubleml/tests/test_external_predictions_IV.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index b92cd2e6..e871c102 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -90,8 +90,8 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
                 ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
 
         DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                                ml_l=dummy_learner(),
-                                ml_r=dummy_learner(),
+                                   ml_l=dummy_learner(),
+                                   ml_r=dummy_learner(),
                                 **kwargs)
 
         np.random.seed(3141)

From af0c039f3c596d5d873a1be76f28e7f3ba5929a6 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 19 Jul 2023 17:41:13 +0200
Subject: [PATCH 26/67] add restriction to external predictions (matrix)

---
 .gitignore            |  1 +
 doubleml/double_ml.py | 36 +++++++++++++++++-------------------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/.gitignore b/.gitignore
index 41a409eb..d9ffb93c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
+.flake8
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6f9f471e..217c802e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -561,10 +561,8 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep > 1:
-                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
-                        elif isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep == 1:
-                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][:, i_rep]
                         else:
                             ext_prediction_dict[learner] = None
                     else:
@@ -1090,21 +1088,21 @@ def _check_external_predictions(self, external_predictions):
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
-                # for learner in supplied_learners:
-                #     if not isinstance(external_predictions[treatment][learner],  np.ndarray):
-                #         raise TypeError('Invalid external_predictions. '
-                #                         'The values of the nested list must be a numpy array. '
-                #                         'Invalid predictions for treatment ' + str(treatment) +
-                #                         ' and learner ' + str(learner) + '. ' +
-                #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
-
-                #     expected_shape = (self._dml_data.n_obs, self.n_rep)
-                #     if external_predictions[treatment][learner].shape != expected_shape:
-                #         raise ValueError('Invalid external_predictions. '
-                #                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
-                #                          'Invalid predictions for treatment ' + str(treatment) +
-                #                          ' and learner ' + str(learner) + '. ' +
-                #                          f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+                for learner in supplied_learners:
+                    if not isinstance(external_predictions[treatment][learner], np.ndarray):
+                        raise TypeError('Invalid external_predictions. '
+                                        'The values of the nested list must be a numpy array. '
+                                        'Invalid predictions for treatment ' + str(treatment) +
+                                        ' and learner ' + str(learner) + '. ' +
+                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+
+                    expected_shape = (self._dml_data.n_obs, self.n_rep)
+                    if external_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid external_predictions. '
+                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                                         'Invalid predictions for treatment ' + str(treatment) +
+                                         ' and learner ' + str(learner) + '. ' +
+                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         # scores

From a3f218ca3ca6d67230015f79d1b62efc14df4199 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 19 Jul 2023 17:41:31 +0200
Subject: [PATCH 27/67] fix unit tests

---
 doubleml/double_ml_pliv.py                    |   4 +-
 doubleml/tests/test_doubleml_exceptions.py    |  17 +--
 doubleml/tests/test_external_predictions.py   |  10 +-
 .../tests/test_external_predictions_IV.py     | 111 ++++++++++--------
 4 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index eb14f946..e16caa94 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -287,10 +287,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         if self.partialX & (not self.partialZ):
             psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, external_predictions, return_models)
         elif (not self.partialX) & self.partialZ:
-            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, external_predictions, return_models)
+            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
         else:
             assert (self.partialX & self.partialZ)
-            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, external_predictions, return_models)
+            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, return_models)
 
         return psi_elements, preds
 
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 3958f35a..57b3d32c 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -1328,11 +1328,6 @@ def test_double_ml_external_predictions():
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.fit(external_predictions="ml_m")
 
-    predictions = {'ml_f': 'test'}
-    msg = "external_predictions is not yet implmented for ``n_rep > 1``."
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.fit(external_predictions=predictions)
-
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
                               ml_m=LogisticRegression(),
@@ -1377,15 +1372,23 @@ def test_double_ml_external_predictions():
 
     predictions = {'d': {'ml_m': np.array([0])}}
     msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100,\). '
+           r'The supplied predictions have to be of shape \(100, 1\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(1,\) passed.')
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
+    predictions = {'d': {'ml_m': np.zeros(100)}}
+    msg = ('Invalid external_predictions. '
+           r'The supplied predictions have to be of shape \(100, 1\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(100,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
     predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
     msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100,\). '
+           r'The supplied predictions have to be of shape \(100, 1\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(5, 3\) passed.')
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 5aa09b61..07d408ee 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -50,10 +50,10 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
               'score': score,
               'n_rep': n_rep,
               'dml_procedure': dml_procedure}
-    
+
     if score == 'IV-type':
         kwargs['ml_g'] = LinearRegression()
-    
+
     DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
                          ml_l=LinearRegression(),
                          **kwargs)
@@ -61,12 +61,12 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
-    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
+    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'][:, :, 0]
+    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
 
     if score == 'IV-type':
         kwargs['ml_g'] = dummy_learner()
-        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'].squeeze()
+        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
 
 
     DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index e871c102..28e7feb6 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -5,105 +5,112 @@
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
 
+
 class dummy_learner:
     _estimator_type = "regressor"
+
     def fit(*args):
         raise AttributeError("Accessed fit method!")
+
     def predict(*args):
         raise AttributeError("Accessed predict method!")
+
     def set_params(*args):
         raise AttributeError("Accessed set_params method!")
+
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method!")
-    
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=['dml1', 'dml2'])
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def dim_z(request):
     return request.param
 
+
 @pytest.fixture(scope="module")
 def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
-
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
-    if dim_z > 1 and score == 'IV-type':
-        res_dict = {'coef_normal': 1,
-                    'coef_ext': 1}
-        
+    if dim_z > 1 and score == "IV-type":
+        res_dict = {"coef_normal": 1, "coef_ext": 1}
+
         return res_dict
     else:
-        ext_predictions = {'d': {}}
+        ext_predictions = {"d": {}}
 
-        data = make_pliv_CHS2015(n_obs=500,
-                                 dim_x=20,
-                                 alpha=0.5,
-                                 dim_z=dim_z,
-                                 return_type="DataFrame")
+        data = make_pliv_CHS2015(
+            n_obs=500, dim_x=20, alpha=0.5, dim_z=dim_z, return_type="DataFrame"
+        )
 
         np.random.seed(3141)
 
-        z_cols = [f"Z{i}" for i in range(1, dim_z+1)]
-        dml_data = DoubleMLData(data, 'y', 'd', z_cols=z_cols)
-
-        kwargs = {'obj_dml_data': dml_data,
-                'score': score,
-                'n_rep': n_rep,
-                'dml_procedure': dml_procedure}
-        
-        if score == 'IV-type':
-            kwargs['ml_g'] = LinearRegression()
-        
-        DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
-                            ml_l=LinearRegression(),
-                            ml_r=LinearRegression(),
-                            **kwargs)
+        z_cols = [f"Z{i}" for i in range(1, dim_z + 1)]
+        dml_data = DoubleMLData(data, "y", "d", z_cols=z_cols)
+
+        kwargs = {
+            "obj_dml_data": dml_data,
+            "score": score,
+            "n_rep": n_rep,
+            "dml_procedure": dml_procedure,
+        }
+
+        if score == "IV-type":
+            kwargs["ml_g"] = LinearRegression()
+
+        DMLPLIV = DoubleMLPLIV(
+            ml_m=LinearRegression(),
+            ml_l=LinearRegression(),
+            ml_r=LinearRegression(),
+            **kwargs,
+        )
         np.random.seed(3141)
 
         DMLPLIV.fit(store_predictions=True)
 
-        ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
-        ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+        ext_predictions["d"]["ml_l"] = DMLPLIV.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_r"] = DMLPLIV.predictions["ml_r"][:, :, 0]
 
         if dim_z == 1:
-            ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
-            if score == 'IV-type':
-                kwargs['ml_g'] = dummy_learner()
-                ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+            ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
+            if score == "IV-type":
+                kwargs["ml_g"] = dummy_learner()
+                ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
-                ml_m_key = 'ml_m_' + 'Z' + str(instr+1)
-                ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
+                ml_m_key = "ml_m_" + "Z" + str(instr + 1)
+                ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
 
-        DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                                   ml_l=dummy_learner(),
-                                   ml_r=dummy_learner(),
-                                **kwargs)
+        DMLPLIV_ext = DoubleMLPLIV(
+            ml_m=dummy_learner(), ml_l=dummy_learner(), ml_r=dummy_learner(), **kwargs
+        )
 
         np.random.seed(3141)
         DMLPLIV_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {'coef_normal': DMLPLIV.coef,
-                    'coef_ext': DMLPLIV_ext.coef}
+        res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
 
         return res_dict
 
+
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):
-    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
-                        adapted_doubleml_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )

From 0fca1366be78e07d40b273e0d275206f21a42323 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Sep 2023 12:08:03 +0200
Subject: [PATCH 28/67] add `dummy_learners` into a new `utils` submodule

---
 doubleml/tests/test_external_predictions.py   | 18 +++--------
 .../tests/test_external_predictions_IV.py     | 21 ++-----------
 doubleml/utils/__init__.py                    |  7 +++++
 doubleml/utils/dummy_learners.py              | 30 +++++++++++++++++++
 4 files changed, 44 insertions(+), 32 deletions(-)
 create mode 100644 doubleml/utils/__init__.py
 create mode 100644 doubleml/utils/dummy_learners.py

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 07d408ee..26043da3 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -4,17 +4,7 @@
 from sklearn.linear_model import LinearRegression, LassoCV
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
-
-class dummy_learner:
-    _estimator_type = "regressor"
-    def fit(*args):
-        raise AttributeError("Accessed fit method!")
-    def predict(*args):
-        raise AttributeError("Accessed predict method!")
-    def set_params(*args):
-        raise AttributeError("Accessed set_params method!")
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method!")
+from doubleml.utils import dummy_regressor
     
 
 @pytest.fixture(scope='module',
@@ -65,12 +55,12 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
 
     if score == 'IV-type':
-        kwargs['ml_g'] = dummy_learner()
+        kwargs['ml_g'] = dummy_regressor()
         ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
 
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
-                             ml_l=dummy_learner(),
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(),
+                             ml_l=dummy_regressor(),
                              **kwargs)
 
     np.random.seed(3141)
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index 28e7feb6..5563ef90 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -4,22 +4,7 @@
 from sklearn.linear_model import LinearRegression, LassoCV
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
-
-
-class dummy_learner:
-    _estimator_type = "regressor"
-
-    def fit(*args):
-        raise AttributeError("Accessed fit method!")
-
-    def predict(*args):
-        raise AttributeError("Accessed predict method!")
-
-    def set_params(*args):
-        raise AttributeError("Accessed set_params method!")
-
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method!")
+from doubleml.utils import dummy_regressor
 
 
 @pytest.fixture(scope="module", params=["partialling out", "IV-type"])
@@ -87,7 +72,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         if dim_z == 1:
             ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
             if score == "IV-type":
-                kwargs["ml_g"] = dummy_learner()
+                kwargs["ml_g"] = dummy_regressor()
                 ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
@@ -95,7 +80,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
                 ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
 
         DMLPLIV_ext = DoubleMLPLIV(
-            ml_m=dummy_learner(), ml_l=dummy_learner(), ml_r=dummy_learner(), **kwargs
+            ml_m=dummy_regressor(), ml_l=dummy_regressor(), ml_r=dummy_regressor(), **kwargs
         )
 
         np.random.seed(3141)
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
new file mode 100644
index 00000000..b3fbb9f0
--- /dev/null
+++ b/doubleml/utils/__init__.py
@@ -0,0 +1,7 @@
+from .dummy_learners import dummy_classifier
+from .dummy_learners import dummy_regressor
+
+__all__ = [
+    "dummy_classifier",
+    "dummy_regressor",
+]
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
new file mode 100644
index 00000000..11e5caf1
--- /dev/null
+++ b/doubleml/utils/dummy_learners.py
@@ -0,0 +1,30 @@
+class dummy_regressor:
+    _estimator_type = "regressor"
+
+    def fit(*args):
+        raise AttributeError("Accessed fit method of dummy_regressor!")
+
+    def predict(*args):
+        raise AttributeError("Accessed predict method of dummy_regressor!")
+
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method of dummy_regressor!")
+
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method of dummy_regressor!")
+
+
+class dummy_classifier:
+    _estimator_type = "classifier"
+
+    def fit(*args):
+        raise AttributeError("Accessed fit method of dummy_classifier!")
+
+    def predict(*args):
+        raise AttributeError("Accessed predict method of dummy_classifier!")
+
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method of dummy_classifier!")
+
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method of dummy_classifier!")

From b1aa16afbca336143186ecb6a5e279fbfc8a71ef Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Sep 2023 12:08:29 +0200
Subject: [PATCH 29/67] code formatting

---
 doubleml/tests/test_external_predictions.py | 58 ++++++++-------------
 1 file changed, 23 insertions(+), 35 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 26043da3..d0ba02a3 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,74 +5,62 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import dummy_regressor
-    
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=['dml1', 'dml2'])
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def adapted_doubleml_fixture(score, dml_procedure, n_rep):
-    ext_predictions = {'d': {}}
+    ext_predictions = {"d": {}}
 
-    x, y, d = make_plr_CCDDHNR2018(n_obs=500,
-                                   dim_x=20,
-                                   alpha=0.5,
-                                   return_type="np.array")
+    x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
 
     np.random.seed(3141)
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    kwargs = {'obj_dml_data': dml_data,
-              'score': score,
-              'n_rep': n_rep,
-              'dml_procedure': dml_procedure}
+    kwargs = {"obj_dml_data": dml_data, "score": score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    if score == 'IV-type':
-        kwargs['ml_g'] = LinearRegression()
+    if score == "IV-type":
+        kwargs["ml_g"] = LinearRegression()
 
-    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
-                         ml_l=LinearRegression(),
-                         **kwargs)
+    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
     np.random.seed(3141)
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'][:, :, 0]
-    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
 
-    if score == 'IV-type':
-        kwargs['ml_g'] = dummy_regressor()
-        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
+    if score == "IV-type":
+        kwargs["ml_g"] = dummy_regressor()
+        ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
 
-
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(),
-                             ml_l=dummy_regressor(),
-                             **kwargs)
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(), ml_l=dummy_regressor(), **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {'coef_normal': DMLPLR.coef,
-                'coef_ext': DMLPLR_ext.coef}
+    res_dict = {"coef_normal": DMLPLR.coef, "coef_ext": DMLPLR_ext.coef}
 
     return res_dict
 
+
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):
-    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
-                        adapted_doubleml_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"], adapted_doubleml_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+    )

From e0e8c154adb0af8f79c6c83481d36812a14ee043 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 10:25:55 +0200
Subject: [PATCH 30/67] Update dummy_learners.py to allow the get / set params
 method

---
 doubleml/utils/dummy_learners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 11e5caf1..a15ae441 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,10 +8,10 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_regressor!")
+        print("\n\nAccessed set_params method of dummy_regressor!\n\n")
 
     def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_regressor!")
+        print("\n\nAccessed get_params method of dummy_regressor!\n\n")
 
 
 class dummy_classifier:

From 0b45b542973509e336e99f04f77fa3325dee5cef Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 11:01:47 +0200
Subject: [PATCH 31/67] Redo changes

---
 doubleml/utils/dummy_learners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index a15ae441..e29742f8 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,10 +8,10 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        print("\n\nAccessed set_params method of dummy_regressor!\n\n")
+        raise AttributeError("Accessed set_params method of dummy_regressor!)
 
     def get_params(*args, **kwargs):
-        print("\n\nAccessed get_params method of dummy_regressor!\n\n")
+        raise AttributeError("Accessed get_params method of dummy_regressor!")
 
 
 class dummy_classifier:

From 03b0831f96baedf4f7c6810463886bbd183b13d8 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 11:02:00 +0200
Subject: [PATCH 32/67] typo

---
 doubleml/utils/dummy_learners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index e29742f8..11e5caf1 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,7 +8,7 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_regressor!)
+        raise AttributeError("Accessed set_params method of dummy_regressor!")
 
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_regressor!")

From dd244399fab6eda648d8a0ad9ffb4796b75d2824 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 15:49:00 +0100
Subject: [PATCH 33/67] Refact. Unit Test for ext. predictions

---
 doubleml/tests/test_external_predictions.py | 63 +++++++++++++++++----
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index d0ba02a3..f6409d40 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -1,14 +1,18 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV
-from doubleml import DoubleMLPLR, DoubleMLData
-from doubleml.datasets import make_plr_CCDDHNR2018
-from doubleml.utils import dummy_regressor
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
 
 
 @pytest.fixture(scope="module", params=["IV-type", "partialling out"])
-def score(request):
+def plr_score(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
+def irm_score(request):
     return request.param
 
 
@@ -23,7 +27,7 @@ def n_rep(request):
 
 
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(score, dml_procedure, n_rep):
+def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
@@ -32,9 +36,9 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    kwargs = {"obj_dml_data": dml_data, "score": score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    kwargs = {"obj_dml_data": dml_data, "score": plr_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    if score == "IV-type":
+    if plr_score == "IV-type":
         kwargs["ml_g"] = LinearRegression()
 
     DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
@@ -45,7 +49,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
     ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
 
-    if score == "IV-type":
+    if plr_score == "IV-type":
         kwargs["ml_g"] = dummy_regressor()
         ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
 
@@ -59,8 +63,45 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     return res_dict
 
 
+@pytest.fixture(scope="module")
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLIRM.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLIRM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_plr_coef(doubleml_plr_fixture):
+    assert math.isclose(
+        doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    
 @pytest.mark.ci
-def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(
-        adapted_doubleml_fixture["coef_normal"], adapted_doubleml_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+        doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
     )

From 7f698074db3cc96aaa1b8c5bc0aea2f79cec569f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 15:49:12 +0100
Subject: [PATCH 34/67] Unit tests for IRM model

---
 doubleml/utils/dummy_learners.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 11e5caf1..e4c08767 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -28,3 +28,6 @@ def set_params(*args):
 
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_classifier!")
+
+    def predict_proba(*args, **kwargs):
+        raise AttributeError("Accessed predict_proba method of dummy_classifier!")

From d2ce02c0fd5661b79caa19bb404562b73dd03720 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:09:30 +0100
Subject: [PATCH 35/67] Impl. and Unit Tetsts for DID external predictions

---
 doubleml/double_ml_did.py                     |  2 +-
 .../tests/test_external_predictions_did.py    | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 doubleml/tests/test_external_predictions_did.py

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index c76cee98..87c02931 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -218,7 +218,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # adjust target values to consider only compatible subsamples
             g_hat0['targets'] = g_hat0['targets'].astype(float)
             g_hat0['targets'][d == 1] = np.nan
-        
+
         # nuisance g for d==1
         if external_predictions['ml_g1'] is not None:
             g_hat1 = {'preds': external_predictions['ml_g1'],
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
new file mode 100644
index 00000000..9e14de6d
--- /dev/null
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLData, DoubleMLDID
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=['observational', 'experimental'])
+def did_score(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    np.random.seed(3141)
+
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLDID.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+
+    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLDID_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_doubleml_did_coef(doubleml_did_fixture):
+    assert math.isclose(
+        doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )

From 91e481aeecc8f33905bc13c3bfebf9aacc5b95a2 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:31:18 +0100
Subject: [PATCH 36/67] dummy_learners inherit from sklearn BaseEstimator

---
 doubleml/utils/dummy_learners.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index e4c08767..6043413f 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -1,4 +1,6 @@
-class dummy_regressor:
+from sklearn.base import BaseEstimator
+
+class dummy_regressor(BaseEstimator):
     _estimator_type = "regressor"
 
     def fit(*args):
@@ -14,7 +16,7 @@ def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_regressor!")
 
 
-class dummy_classifier:
+class dummy_classifier(BaseEstimator):
     _estimator_type = "classifier"
 
     def fit(*args):

From 8ae786758782d5fd7d00fc51d00c811318527c02 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:31:38 +0100
Subject: [PATCH 37/67] Impl. and Unit Tetsts for DIDCS external preds.

---
 doubleml/double_ml_did_cs.py                  | 80 ++++++++++++-------
 .../tests/test_external_predictions_did.py    | 41 +++++++++-
 2 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 53910946..f0986eed 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -228,40 +228,62 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
-
-        g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
-        g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
-
-        g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
-        g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
-
-        g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
-        g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
-
-        g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
-        g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d0_t0'] is not None:
+            g_hat_d0_t0 = {'preds': external_predictions['ml_g_d0_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            
+            g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
+            g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d0_t1'] is not None:
+            g_hat_d0_t1 = {'preds': external_predictions['ml_g_d0_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
+            g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d1_t0'] is not None:
+            g_hat_d1_t0 = {'preds': external_predictions['ml_g_d1_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
+            g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d1_t1'] is not None:
+            g_hat_d1_t1 = {'preds': external_predictions['ml_g_d1_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
+            g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
 
         # only relevant for observational or experimental setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                         'targets': None,
+                         'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
+                _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+                _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         psi_a, psi_b = self._score_elements(y, d, t,
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
index 9e14de6d..308d9e8b 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -2,7 +2,7 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID
+from doubleml import DoubleMLData, DoubleMLDID, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 
@@ -49,8 +49,47 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
 
     return res_dict
 
+
+@pytest.fixture(scope="module")
+def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    np.random.seed(3141)
+
+    dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
+
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLDIDCS.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t1"] = DMLDIDCS.predictions["ml_g_d0_t1"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t0"] = DMLDIDCS.predictions["ml_g_d1_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t1"] = DMLDIDCS.predictions["ml_g_d1_t1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
+
+    DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLDIDCS_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDIDCS.coef, "coef_ext": DMLDIDCS_ext.coef}
+
+    return res_dict
+
+
+
 @pytest.mark.ci
 def test_doubleml_did_coef(doubleml_did_fixture):
     assert math.isclose(
         doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
     )
+
+@pytest.mark.ci
+def test_doubleml_didcs_coef(doubleml_didcs_fixture):
+    assert math.isclose(
+        doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )
\ No newline at end of file

From 39a6cdabfc0e95b2f2b6bda86f488abcae921268 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:26:48 +0100
Subject: [PATCH 38/67] dummy_learners are now "cloneable"

---
 doubleml/utils/dummy_learners.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 6043413f..2f893fb2 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -1,5 +1,6 @@
 from sklearn.base import BaseEstimator
 
+
 class dummy_regressor(BaseEstimator):
     _estimator_type = "regressor"
 
@@ -12,9 +13,6 @@ def predict(*args):
     def set_params(*args):
         raise AttributeError("Accessed set_params method of dummy_regressor!")
 
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_regressor!")
-
 
 class dummy_classifier(BaseEstimator):
     _estimator_type = "classifier"
@@ -28,8 +26,5 @@ def predict(*args):
     def set_params(*args):
         raise AttributeError("Accessed set_params method of dummy_classifier!")
 
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_classifier!")
-
     def predict_proba(*args, **kwargs):
         raise AttributeError("Accessed predict_proba method of dummy_classifier!")

From 40413e1bab936f515c78825e98b92ffefe3430a5 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:27:12 +0100
Subject: [PATCH 39/67] Unit Tests for new dummy leaerner classes

---
 doubleml/tests/test_dummy_learners.py | 46 +++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 doubleml/tests/test_dummy_learners.py

diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
new file mode 100644
index 00000000..ee3d979a
--- /dev/null
+++ b/doubleml/tests/test_dummy_learners.py
@@ -0,0 +1,46 @@
+import pytest
+import numpy as np
+from doubleml.utils import dummy_regressor, dummy_classifier
+from sklearn.base import clone
+
+
+@pytest.fixture(scope="module")
+def dl_fixture():
+    fixture = {
+        "dummy_regressor": dummy_regressor(),
+        "dummy_classifier": dummy_classifier(),
+        "X": np.random.normal(0, 1, size=(100, 10)),
+        "y_con": np.random.normal(0, 1, size=(100, 1)),
+        "y_cat": np.random.binomial(1, 0.5, size=(100, 1)),
+    }
+
+    return fixture
+
+
+@pytest.mark.ci
+def test_fit(dl_fixture):
+    msg = "Accessed fit method of dummy_regressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_regressor"].fit(dl_fixture["X"], dl_fixture["y_con"])
+    msg = "Accessed fit method of dummy_classifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_classifier"].fit(dl_fixture["X"], dl_fixture["y_cat"])
+
+
+@pytest.mark.ci
+def test_predict(dl_fixture):
+    msg = "Accessed predict method of dummy_regressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_regressor"].predict(dl_fixture["X"])
+    msg = "Accessed predict method of dummy_classifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_classifier"].predict(dl_fixture["X"])
+
+
+@pytest.mark.ci
+def test_clone(dl_fixture):
+    try:
+        _ = clone(dl_fixture["dummy_regressor"])
+        _ = clone(dl_fixture["dummy_classifier"])
+    except Error as e:
+        pytest.fail(f"clone() raised an exception:\n{str(e)}\n")

From 59800124cac4ddc43f88a3343e81ec9139d14f16 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:35:40 +0100
Subject: [PATCH 40/67] formatting

---
 .../tests/test_external_predictions_did.py    | 29 ++++---------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
index 308d9e8b..27495c6c 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -7,10 +7,11 @@
 from doubleml.utils import dummy_regressor, dummy_classifier
 
 
-@pytest.fixture(scope="module", params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
@@ -24,16 +25,10 @@ def n_rep(request):
 @pytest.fixture(scope="module")
 def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
-
-    np.random.seed(3141)
-
     dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
     DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
-
     DMLDID.fit(store_predictions=True)
 
     ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
@@ -41,10 +36,9 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
 
     DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
     np.random.seed(3141)
     DMLDID_ext.fit(external_predictions=ext_predictions)
-
+    
     res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
 
     return res_dict
@@ -53,16 +47,10 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
 @pytest.fixture(scope="module")
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
-
-    np.random.seed(3141)
-
     dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
-
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
     DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
-
     DMLDIDCS.fit(store_predictions=True)
 
     ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
@@ -72,7 +60,6 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
 
     DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
     np.random.seed(3141)
     DMLDIDCS_ext.fit(external_predictions=ext_predictions)
 
@@ -81,15 +68,11 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     return res_dict
 
 
-
 @pytest.mark.ci
 def test_doubleml_did_coef(doubleml_did_fixture):
-    assert math.isclose(
-        doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
-    )
+    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
 
 @pytest.mark.ci
 def test_doubleml_didcs_coef(doubleml_didcs_fixture):
-    assert math.isclose(
-        doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
-    )
\ No newline at end of file
+    assert math.isclose(doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)

From d3109b65fdc6d6632623da8c198f0e417798e786 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 12:43:08 +0100
Subject: [PATCH 41/67] seperate testfiles for unit tests for ext. preds.

---
 .../tests/test_did_external_predictions.py    | 49 +++++++++++++++
 ....py => test_didcs_external_predictions.py} | 29 +--------
 .../tests/test_irm_external_predictions.py    | 63 +++++++++++++++++++
 ...V.py => test_pliv_external_predictions.py} |  4 +-
 ...ns.py => test_plr_external_predictions.py} | 51 ++-------------
 5 files changed, 118 insertions(+), 78 deletions(-)
 create mode 100644 doubleml/tests/test_did_external_predictions.py
 rename doubleml/tests/{test_external_predictions_did.py => test_didcs_external_predictions.py} (61%)
 create mode 100644 doubleml/tests/test_irm_external_predictions.py
 rename doubleml/tests/{test_external_predictions_IV.py => test_pliv_external_predictions.py} (97%)
 rename doubleml/tests/{test_external_predictions.py => test_plr_external_predictions.py} (50%)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
new file mode 100644
index 00000000..ebf8b616
--- /dev/null
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLData, DoubleMLDID
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+    DMLDID.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+
+    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    np.random.seed(3141)
+    DMLDID_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_did_coef(doubleml_did_fixture):
+    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_didcs_external_predictions.py
similarity index 61%
rename from doubleml/tests/test_external_predictions_did.py
rename to doubleml/tests/test_didcs_external_predictions.py
index 27495c6c..6effc805 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -2,7 +2,7 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID, DoubleMLDIDCS
+from doubleml import DoubleMLData, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 
@@ -22,28 +22,6 @@ def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope="module")
-def doubleml_did_fixture(did_score, dml_procedure, n_rep):
-    ext_predictions = {"d": {}}
-    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    np.random.seed(3141)
-    DMLDID.fit(store_predictions=True)
-
-    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
-
-    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    np.random.seed(3141)
-    DMLDID_ext.fit(external_predictions=ext_predictions)
-    
-    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
-
-    return res_dict
-
-
 @pytest.fixture(scope="module")
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
@@ -68,11 +46,6 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     return res_dict
 
 
-@pytest.mark.ci
-def test_doubleml_did_coef(doubleml_did_fixture):
-    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
-
-
 @pytest.mark.ci
 def test_doubleml_didcs_coef(doubleml_didcs_fixture):
     assert math.isclose(doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
new file mode 100644
index 00000000..19c96330
--- /dev/null
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLIRM, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
+def irm_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLIRM.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLIRM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_plr_coef(doubleml_plr_fixture):
+    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_irm_coef(doubleml_irm_fixture):
+    assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_pliv_external_predictions.py
similarity index 97%
rename from doubleml/tests/test_external_predictions_IV.py
rename to doubleml/tests/test_pliv_external_predictions.py
index 5563ef90..cbd13dfe 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -31,9 +31,7 @@ def dim_z(request):
 def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
     if dim_z > 1 and score == "IV-type":
-        res_dict = {"coef_normal": 1, "coef_ext": 1}
-
-        return res_dict
+        pytest.skip("IV-type score only allows dim_z = 1")
     else:
         ext_predictions = {"d": {}}
 
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
similarity index 50%
rename from doubleml/tests/test_external_predictions.py
rename to doubleml/tests/test_plr_external_predictions.py
index f6409d40..ca04794f 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -2,19 +2,15 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLData
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml import DoubleMLPLR, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.utils import dummy_regressor
 
 
 @pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def plr_score(request):
     return request.param
 
-@pytest.fixture(scope="module", params=["ATE", "ATTE"])
-def irm_score(request):
-    return request.param
-
 
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
@@ -63,45 +59,6 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
     return res_dict
 
 
-@pytest.fixture(scope="module")
-def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
-    ext_predictions = {"d": {}}
-
-    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
-
-    np.random.seed(3141)
-
-    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
-
-    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
-    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    np.random.seed(3141)
-
-    DMLIRM.fit(store_predictions=True)
-
-    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
-
-    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
-    np.random.seed(3141)
-    DMLIRM_ext.fit(external_predictions=ext_predictions)
-
-    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
-
-    return res_dict
-
-
 @pytest.mark.ci
 def test_doubleml_plr_coef(doubleml_plr_fixture):
-    assert math.isclose(
-        doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
-    )
-    
-@pytest.mark.ci
-def test_doubleml_irm_coef(doubleml_irm_fixture):
-    assert math.isclose(
-        doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
-    )
+    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From efa436d873512e7c9e93b94b18e5fc28ea0f4c71 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 14:34:59 +0100
Subject: [PATCH 42/67] add external preds for iivm models

---
 doubleml/double_ml_iivm.py                    | 96 ++++++++++++-------
 .../tests/test_iivm_external_predictions.py   | 75 +++++++++++++++
 .../tests/test_irm_external_predictions.py    |  6 --
 3 files changed, 137 insertions(+), 40 deletions(-)
 create mode 100644 doubleml/tests/test_iivm_external_predictions.py

diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 78429ae9..1cb793b2 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -258,13 +258,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][z == 1] = np.nan
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat0['targets'] = g_hat0['targets'].astype(float)
+            g_hat0['targets'][z == 1] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
@@ -276,14 +281,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'probabilities and not labels are predicted.')
 
             _check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
-
-        g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat1['targets'] = g_hat1['targets'].astype(float)
-        g_hat1['targets'][z == 0] = np.nan
+        if external_predictions['ml_g1'] is not None:
+            g_hat1 = {'preds': external_predictions['ml_g1'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
+            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat1['targets'] = g_hat1['targets'].astype(float)
+            g_hat1['targets'][z == 0] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
@@ -297,34 +306,53 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             _check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if external_predictions['ml_m'] is not None:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
         # nuisance r
+        r0 = external_predictions['ml_r0'] is not None
         if self.subgroups['always_takers']:
-            r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r0:
+                r_hat0 = {'preds': external_predictions['ml_r0'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
+                                        return_models=return_models)
         else:
             r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
-        _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat0['targets'] = r_hat0['targets'].astype(float)
-        r_hat0['targets'][z == 1] = np.nan
+        if not r0:
+            _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat0['targets'] = r_hat0['targets'].astype(float)
+            r_hat0['targets'][z == 1] = np.nan
 
+        r1 = external_predictions['ml_r1'] is not None
         if self.subgroups['never_takers']:
-            r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r1:
+                r_hat1 = {'preds': external_predictions['ml_r1'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
+                                        return_models=return_models)
         else:
             r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
-        _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat1['targets'] = r_hat1['targets'].astype(float)
-        r_hat1['targets'][z == 0] = np.nan
+        if not r1:
+            _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat1['targets'] = r_hat1['targets'].astype(float)
+            r_hat1['targets'][z == 0] = np.nan
 
         psi_a, psi_b = self._score_elements(y, z, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
new file mode 100644
index 00000000..40bb02db
--- /dev/null
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLIIVM, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    data = make_iivm_data(
+        n_obs=500, dim_x=20, theta=0.5, alpha_x=1.0, return_type="DataFrame"
+    )
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LATE",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+    }
+
+    DMLIIVM = DoubleMLIIVM(
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ml_r=LogisticRegression(),
+        **kwargs,
+    )
+    np.random.seed(3141)
+
+    DMLIIVM.fit(store_predictions=True)
+    
+    ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
+    ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
+
+    
+    DMLIIVM_ext = DoubleMLIIVM(
+        ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
+    )
+
+    np.random.seed(3141)
+    DMLIIVM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIIVM.coef, "coef_ext": DMLIIVM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index 19c96330..c1463a07 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -52,12 +52,6 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     return res_dict
 
-
-@pytest.mark.ci
-def test_doubleml_plr_coef(doubleml_plr_fixture):
-    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
-
-
 @pytest.mark.ci
 def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 4e3f36fdf89a5619d55af8e01f11e10f052f0252 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 17:39:40 +0100
Subject: [PATCH 43/67] add external preds for pq models

---
 doubleml/double_ml_pq.py                      | 140 ++++++++++--------
 .../tests/test_pq_external_predictions.py     |  64 ++++++++
 2 files changed, 144 insertions(+), 60 deletions(-)
 create mode 100644 doubleml/tests/test_pq_external_predictions.py

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 76e49f1e..d785429f 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -261,79 +261,95 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
-
+        
+        g = external_predictions['ml_g'] is not None
+        m = external_predictions['ml_m'] is not None
+        
         # initialize nuisance predictions, targets and models
-        g_hat = {'models': None,
-                 'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                 'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                 }
-        m_hat = copy.deepcopy(g_hat)
-
-        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
-        # initialize models
-        fitted_models = {}
-        for learner in self.params_names:
-            # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        
+        if not (g and m):
+            g_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
+                    }
+            m_hat = copy.deepcopy(g_hat)
+            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+            # initialize models
+            fitted_models = {}
+            for learner in self.params_names:
+                # set nuisance model parameters
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
+                                            for i_fold in range(self.n_folds)]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        elif (g and not m) or (m and not g):
+            raise ValueError('External predictions for both g and m are required.')
+        else:
+            g_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': external_predictions['ml_g']
+                    }
+            m_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': external_predictions['ml_m']
+                    }
 
         # caculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
+        if not (g and m): 
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
 
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=d[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
+                                                            random_state=42, stratify=d[train_inds])
+                smpls_prelim = [(train, test) for train, test in
+                                StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
 
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
 
-            # get a copy of ml_m as a preliminary learner
-            ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-            m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                           method='predict_proba', smpls=smpls_prelim)['preds']
+                # get a copy of ml_m as a preliminary learner
+                ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
+                m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
+                                            method='predict_proba', smpls=smpls_prelim)['preds']
 
-            m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
 
-            if self._normalize_ipw:
-                m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
-            if self.treatment == 0:
-                m_hat_prelim = 1 - m_hat_prelim
+                if self._normalize_ipw:
+                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                if self.treatment == 0:
+                    m_hat_prelim = 1 - m_hat_prelim
 
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
-                return res
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
+                    return res
 
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
 
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
 
-            dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
-            y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
+                dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
+                y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
 
-            fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
+                fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
 
-            # predict nuisance values on the test data and the corresponding targets
-            g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
-            g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
+                # predict nuisance values on the test data and the corresponding targets
+                g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
+                g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
 
-            # refit the propensity score on the whole training set
-            fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-            m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
+                # refit the propensity score on the whole training set
+                fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
+                m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
 
         # set target for propensity score
         m_hat['targets'] = d
@@ -348,6 +364,7 @@ def ipw_score(theta):
         # clip propensities and normalize ipw weights
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
+        #if not (g and m):
         m_hat_adj = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
         if self._normalize_ipw:
             if self.dml_procedure == 'dml1':
@@ -358,9 +375,12 @@ def ipw_score(theta):
 
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
-
         # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
+        if not (g and m):
+            self._coef_start_val = np.mean(ipw_vec)
+        #else:
+        #    m_hat_adj = m_hat['preds']
+            
 
         psi_elements = {'ind_d': d == self.treatment, 'g': g_hat['preds'],
                         'm': m_hat_adj, 'y': y}
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
new file mode 100644
index 00000000..c21bd1c8
--- /dev/null
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLPQ, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
+
+    dml_data = DoubleMLData(data, "y", "d")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "PQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+    }
+
+    ml_g = LogisticRegression()
+    ml_m = LogisticRegression()
+
+    DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    np.random.seed(3141)
+
+    DMLPQ.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+
+    DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLPLQ_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_pq_coef(doubleml_pq_fixture):
+    assert math.isclose(doubleml_pq_fixture["coef_normal"], doubleml_pq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 580d5d29025bf3c0d678db7ccb61ca705d06d9ca Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 28 Nov 2023 18:25:52 +0100
Subject: [PATCH 44/67] Update test_did_external_predictions.py

---
 doubleml/tests/test_did_external_predictions.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index ebf8b616..12d7e3c9 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -5,7 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
-
+from ._utils import draw_smpls
 
 @pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
@@ -26,8 +26,16 @@ def n_rep(request):
 def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
     DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    DMLDID.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDID.fit(store_predictions=True)
 
@@ -36,6 +44,7 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
 
     DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    DMLDID_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDID_ext.fit(external_predictions=ext_predictions)
 

From fe13dee4e743b0c8564ee2e54cc7704ac9179a20 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 29 Nov 2023 14:54:39 +0100
Subject: [PATCH 45/67] fix unit test for ext. preds. for DID CS model

---
 doubleml/tests/test_didcs_external_predictions.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 6effc805..0eed900a 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -5,6 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
 
 
 @pytest.fixture(scope="module", params=["observational", "experimental"])
@@ -26,8 +27,17 @@ def n_rep(request):
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "n_folds": 5,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
     DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    DMLDIDCS.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDIDCS.fit(store_predictions=True)
 
@@ -38,6 +48,7 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
 
     DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    DMLDIDCS_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDIDCS_ext.fit(external_predictions=ext_predictions)
 

From 73e87b12c22c5ad04ac78cd136627758f4373f30 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 29 Nov 2023 14:55:05 +0100
Subject: [PATCH 46/67] fix unit test for ext. preds. for PQ model

---
 doubleml/tests/test_pq_external_predictions.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index c21bd1c8..4468db83 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -5,6 +5,7 @@
 from doubleml import DoubleMLPQ, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
 
 
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
@@ -29,6 +30,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
         "obj_dml_data": dml_data,
@@ -36,12 +38,14 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
+        "draw_sample_splitting": False
     }
 
     ml_g = LogisticRegression()
     ml_m = LogisticRegression()
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    DMLPQ.set_sample_splitting(all_smpls)
     np.random.seed(3141)
 
     DMLPQ.fit(store_predictions=True)
@@ -50,6 +54,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
 
     DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
     DMLPLQ_ext.fit(external_predictions=ext_predictions)

From b2f09589b193ebe07603bfee2c7a9dbad55697ae Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 30 Nov 2023 14:34:43 +0100
Subject: [PATCH 47/67] add ext. preds. for LPQ model (only for DML2)

---
 doubleml/double_ml_lpq.py                     | 713 ++++++++++--------
 .../tests/test_lpq_external_predictions.py    |  74 ++
 2 files changed, 492 insertions(+), 295 deletions(-)
 create mode 100644 doubleml/tests/test_lpq_external_predictions.py

diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 30e4e730..a3255b6f 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -9,11 +9,19 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _cond_targets, \
-    _get_bracket_guess, _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _cond_targets,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_quantile
+from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, _check_quantile
 
 
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
@@ -100,29 +108,25 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     d  0.217244  0.636453  0.341336  0.73285 -1.03018  1.464668
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='LPQ',
-                 dml_procedure='dml2',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True,
-                 apply_cross_fitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         dml_procedure,
-                         draw_sample_splitting,
-                         apply_cross_fitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="LPQ",
+        dml_procedure="dml2",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        apply_cross_fitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -130,21 +134,21 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
             self._kde = kde
         self._normalize_ipw = normalize_ipw
 
         self._check_data(self._dml_data)
 
-        valid_score = ['LPQ']
+        valid_score = ["LPQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,24 +159,34 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_m_z': clone(ml_m),
-                         'ml_g_du_z0': clone(ml_g), 'ml_g_du_z1': clone(ml_g),
-                         'ml_m_d_z0': clone(ml_m), 'ml_m_d_z1': clone(ml_m)}
-        self._predict_method = {'ml_m_z': 'predict_proba',
-                                'ml_g_du_z0': 'predict_proba', 'ml_g_du_z1': 'predict_proba',
-                                'ml_m_d_z0': 'predict_proba', 'ml_m_d_z1': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {
+            "ml_m_z": clone(ml_m),
+            "ml_g_du_z0": clone(ml_g),
+            "ml_g_du_z1": clone(ml_g),
+            "ml_m_d_z0": clone(ml_m),
+            "ml_m_d_z1": clone(ml_m),
+        }
+        self._predict_method = {
+            "ml_m_z": "predict_proba",
+            "ml_g_du_z0": "predict_proba",
+            "ml_g_du_z1": "predict_proba",
+            "ml_m_d_z0": "predict_proba",
+            "ml_m_d_z1": "predict_proba",
+        }
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
             strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=strata)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=strata,
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
     @property
@@ -219,33 +233,33 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'm_z', 'g_du_z0', 'g_du_z1', 'y', 'z', 'comp_prob']
+        return ["ind_d", "m_z", "g_du_z0", "g_du_z1", "y", "z", "comp_prob"]
 
     def _compute_ipw_score(self, theta, d, y, prop, z, comp_prob):
         sign = 2 * self.treatment - 1.0
         weights = sign * (z / prop - (1 - z) / (1 - prop)) / comp_prob
         u = (d == self._treatment) * (y <= theta)
-        v = -1. * self.quantile
+        v = -1.0 * self.quantile
         score = weights * u + v
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        m_z = psi_elements['m_z']
-        g_du_z0 = psi_elements['g_du_z0']
-        g_du_z1 = psi_elements['g_du_z1']
-        y = psi_elements['y']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        m_z = psi_elements["m_z"]
+        g_du_z0 = psi_elements["g_du_z0"]
+        g_du_z1 = psi_elements["g_du_z1"]
+        y = psi_elements["y"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m_z = psi_elements['m_z']
-            g_du_z0 = psi_elements['g_du_z0'][inds]
-            g_du_z1 = psi_elements['g_du_z1'][inds]
-            y = psi_elements['y'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m_z = psi_elements["m_z"]
+            g_du_z0 = psi_elements["g_du_z0"][inds]
+            g_du_z1 = psi_elements["g_du_z1"][inds]
+            y = psi_elements["y"][inds]
+            z = psi_elements["z"][inds]
 
         score1 = g_du_z1 - g_du_z0
         score2 = (z / m_z) * (ind_d * (y <= coef) - g_du_z1)
@@ -255,17 +269,17 @@ def _compute_score(self, psi_elements, coef, inds=None):
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        y = psi_elements['y']
-        m_z = psi_elements['m_z']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        y = psi_elements["y"]
+        m_z = psi_elements["m_z"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            y = psi_elements['y'][inds]
-            m_z = psi_elements['m_z'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            y = psi_elements["y"][inds]
+            m_z = psi_elements["m_z"][inds]
+            z = psi_elements["z"][inds]
 
         score_weights = sign * ((z / m_z) - (1 - z) / (1 - m_z)) * ind_d / comp_prob
         u = (y - coef).reshape(-1, 1)
@@ -274,178 +288,225 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_m_z', 'ml_g_du_z0', 'ml_g_du_z1',
-                                        'ml_m_d_z0', 'ml_m_d_z1']}
+        self._params = {
+            learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
+            for learner in ["ml_m_z", "ml_g_du_z0", "ml_g_du_z1", "ml_m_d_z0", "ml_m_d_z1"]
+        }
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+
+        m_z = external_predictions["ml_m_z"] is not None
+        m_d_d0 = external_predictions["ml_m_d_z0"] is not None
+        m_d_d1 = external_predictions["ml_m_d_z1"] is not None
+        g_du_z0 = external_predictions["ml_g_du_z0"] is not None
+        g_du_z1 = external_predictions["ml_g_du_z1"] is not None
+        ext_preds = [m_z, m_d_d0, m_d_d1, g_du_z0, g_du_z1]
 
         # create strata for splitting
         strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
 
         # initialize nuisance predictions, targets and models
-        m_z_hat = {'models': None,
-                   'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                   'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                   }
-        m_d_z0_hat = copy.deepcopy(m_z_hat)
-        m_d_z1_hat = copy.deepcopy(m_z_hat)
-        g_du_z0_hat = copy.deepcopy(m_z_hat)
-        g_du_z1_hat = copy.deepcopy(m_z_hat)
-
-        # initialize models
-        fitted_models = {}
-        for learner in self.params_names:
-            # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        if not all(ext_preds):
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z0_hat = copy.deepcopy(m_z_hat)
+            m_d_z1_hat = copy.deepcopy(m_z_hat)
+            g_du_z0_hat = copy.deepcopy(m_z_hat)
+            g_du_z1_hat = copy.deepcopy(m_z_hat)
+
+            # initialize models
+            fitted_models = {}
+            for learner in self.params_names:
+                # set nuisance model parameters
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+        elif any(ext_preds) and not any(ext_preds):
+            raise ValueError("External predictions for all estimations or for none are required.")
+        else:
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_z"],
+            }
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z0"],
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z1"],
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z0"],
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z1"],
+            }
 
-        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
         # calculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
-
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=strata[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])]
-
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
-            z_train_1 = z[train_inds_1]
-
-            # preliminary propensity for z
-            ml_m_z_prelim = clone(fitted_models['ml_m_z'][i_fold])
-            m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
-                                             method='predict_proba', smpls=smpls_prelim)['preds']
-
-            m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
-            if self._normalize_ipw:
-                m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
-
-            # propensity for d == 1 cond. on z == 0 (training set 1)
-            z0_train_1 = z_train_1 == 0
-            x_z0_train_1 = x_train_1[z0_train_1, :]
-            d_z0_train_1 = d_train_1[z0_train_1]
-            ml_m_d_z0_prelim = clone(fitted_models['ml_m_d_z0'][i_fold])
-            ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
-            m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
-
-            # propensity for d == 1 cond. on z == 1 (training set 1)
-            z1_train_1 = z_train_1 == 1
-            x_z1_train_1 = x_train_1[z1_train_1, :]
-            d_z1_train_1 = d_train_1[z1_train_1]
-            ml_m_d_z1_prelim = clone(fitted_models['ml_m_d_z1'][i_fold])
-            ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
-            m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
-
-            # preliminary estimate of theta_2_aux
-            comp_prob_prelim = np.mean(m_d_z1_hat_prelim - m_d_z0_hat_prelim
-                                       + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
-                                       - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim))
-
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim,
-                                                      z_train_1, comp_prob_prelim))
-                return res
-
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
-
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
-            z_train_2 = z[train_inds_2]
-
-            # define test observations
-            d_test = d[test_inds]
-            y_test = y[test_inds]
-            x_test = x[test_inds, :]
-            z_test = z[test_inds]
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
-            z0_train_2 = z_train_2 == 0
-            x_z0_train_2 = x_train_2[z0_train_2, :]
-            du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z0'][i_fold].fit(x_z0_train_2, du_z0_train_2)
-            g_du_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z0'][i_fold], x_test)
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
-            z1_train_2 = z_train_2 == 1
-            x_z1_train_2 = x_train_2[z1_train_2, :]
-            du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z1'][i_fold].fit(x_z1_train_2, du_z1_train_2)
-            g_du_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z1'][i_fold], x_test)
-
-            # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-            test_inds_z0 = test_inds[z_test == 0]
-            test_inds_z1 = test_inds[z_test == 1]
-            g_du_z0_hat['targets'][test_inds_z0] = (1.0 * (d_test[z_test == 0] == self._treatment) *
-                                                    (y_test[z_test == 0] <= ipw_est))
-            g_du_z1_hat['targets'][test_inds_z1] = (1.0 * (d_test[z_test == 1] == self._treatment) *
-                                                    (y_test[z_test == 1] <= ipw_est))
-
-            # refit nuisance elements for the local potential quantile
-            z_train = z[train_inds]
-            x_train = x[train_inds]
-            d_train = d[train_inds]
-
-            # refit propensity for z (whole training set)
-            fitted_models['ml_m_z'][i_fold].fit(x_train, z_train)
-            m_z_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_z'][i_fold], x_test)
-
-            # refit propensity for d == 1 cond. on z == 0 (whole training set)
-            z0_train = z_train == 0
-            x_z0_train = x_train[z0_train, :]
-            d_z0_train = d_train[z0_train]
-            fitted_models['ml_m_d_z0'][i_fold].fit(x_z0_train, d_z0_train)
-            m_d_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z0'][i_fold], x_test)
-
-            # propensity for d == 1 cond. on z == 1 (whole training set)
-            x_z1_train = x_train[z_train == 1, :]
-            d_z1_train = d_train[z_train == 1]
-            fitted_models['ml_m_d_z1'][i_fold].fit(x_z1_train, d_z1_train)
-            m_d_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z1'][i_fold], x_test)
+        if not all(ext_preds):
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
+
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])
+                ]
+
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
+                z_train_1 = z[train_inds_1]
+
+                # preliminary propensity for z
+                ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
+                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim)[
+                    "preds"
+                ]
+
+                m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                if self._normalize_ipw:
+                    m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
+
+                # propensity for d == 1 cond. on z == 0 (training set 1)
+                z0_train_1 = z_train_1 == 0
+                x_z0_train_1 = x_train_1[z0_train_1, :]
+                d_z0_train_1 = d_train_1[z0_train_1]
+                ml_m_d_z0_prelim = clone(fitted_models["ml_m_d_z0"][i_fold])
+                ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
+                m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
+
+                # propensity for d == 1 cond. on z == 1 (training set 1)
+                z1_train_1 = z_train_1 == 1
+                x_z1_train_1 = x_train_1[z1_train_1, :]
+                d_z1_train_1 = d_train_1[z1_train_1]
+                ml_m_d_z1_prelim = clone(fitted_models["ml_m_d_z1"][i_fold])
+                ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
+                m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
+
+                # preliminary estimate of theta_2_aux
+                comp_prob_prelim = np.mean(
+                    m_d_z1_hat_prelim
+                    - m_d_z0_hat_prelim
+                    + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
+                    - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim)
+                )
+
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(
+                        self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim, z_train_1, comp_prob_prelim)
+                    )
+                    return res
+
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
+
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
+                z_train_2 = z[train_inds_2]
+
+                # define test observations
+                d_test = d[test_inds]
+                y_test = y[test_inds]
+                x_test = x[test_inds, :]
+                z_test = z[test_inds]
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
+                z0_train_2 = z_train_2 == 0
+                x_z0_train_2 = x_train_2[z0_train_2, :]
+                du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z0"][i_fold].fit(x_z0_train_2, du_z0_train_2)
+                g_du_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z0"][i_fold], x_test)
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
+                z1_train_2 = z_train_2 == 1
+                x_z1_train_2 = x_train_2[z1_train_2, :]
+                du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z1"][i_fold].fit(x_z1_train_2, du_z1_train_2)
+                g_du_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z1"][i_fold], x_test)
+
+                # the predictions of both should only be evaluated conditional on z == 0 or z == 1
+                test_inds_z0 = test_inds[z_test == 0]
+                test_inds_z1 = test_inds[z_test == 1]
+                g_du_z0_hat["targets"][test_inds_z0] = (
+                    1.0 * (d_test[z_test == 0] == self._treatment) * (y_test[z_test == 0] <= ipw_est)
+                )
+                g_du_z1_hat["targets"][test_inds_z1] = (
+                    1.0 * (d_test[z_test == 1] == self._treatment) * (y_test[z_test == 1] <= ipw_est)
+                )
+
+                # refit nuisance elements for the local potential quantile
+                z_train = z[train_inds]
+                x_train = x[train_inds]
+                d_train = d[train_inds]
+
+                # refit propensity for z (whole training set)
+                fitted_models["ml_m_z"][i_fold].fit(x_train, z_train)
+                m_z_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_z"][i_fold], x_test)
+
+                # refit propensity for d == 1 cond. on z == 0 (whole training set)
+                z0_train = z_train == 0
+                x_z0_train = x_train[z0_train, :]
+                d_z0_train = d_train[z0_train]
+                fitted_models["ml_m_d_z0"][i_fold].fit(x_z0_train, d_z0_train)
+                m_d_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z0"][i_fold], x_test)
+
+                # propensity for d == 1 cond. on z == 1 (whole training set)
+                x_z1_train = x_train[z_train == 1, :]
+                d_z1_train = d_train[z_train == 1]
+                fitted_models["ml_m_d_z1"][i_fold].fit(x_z1_train, d_z1_train)
+                m_d_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z1"][i_fold], x_test)
 
         # save targets and models
-        m_z_hat['targets'] = z
+        m_z_hat["targets"] = z
 
         # set targets to relevant subsample
-        g_du_z0_hat['targets'] = _cond_targets(g_du_z0_hat['targets'], cond_sample=(z == 0))
-        g_du_z1_hat['targets'] = _cond_targets(g_du_z1_hat['targets'], cond_sample=(z == 1))
+        g_du_z0_hat["targets"] = _cond_targets(g_du_z0_hat["targets"], cond_sample=(z == 0))
+        g_du_z1_hat["targets"] = _cond_targets(g_du_z1_hat["targets"], cond_sample=(z == 1))
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 0))
-        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 1))
+        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 0))
+        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
-            m_z_hat['models'] = fitted_models['ml_m_z']
-            m_d_z0_hat['models'] = fitted_models['ml_m_d_z0']
-            m_d_z1_hat['models'] = fitted_models['ml_m_d_z1']
-            g_du_z0_hat['models'] = fitted_models['ml_g_du_z0']
-            g_du_z1_hat['models'] = fitted_models['ml_g_du_z1']
+            m_z_hat["models"] = fitted_models["ml_m_z"]
+            m_d_z0_hat["models"] = fitted_models["ml_m_d_z0"]
+            m_d_z1_hat["models"] = fitted_models["ml_m_d_z1"]
+            g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"]
+            g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"]
 
         # clip propensities
-        m_z_hat_adj = _trimm(m_z_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_z_hat_adj[test_index] = _normalize_ipw(m_z_hat_adj[test_index], z[test_index])
             else:
@@ -453,49 +514,60 @@ def ipw_score(theta):
 
         # this could be adjusted to be compatible with dml1
         # estimate final nuisance parameter
-        comp_prob_hat = np.mean(m_d_z1_hat['preds'] - m_d_z0_hat['preds']
-                                + z / m_z_hat_adj * (d - m_d_z1_hat['preds'])
-                                - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat['preds']))
-
-        # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
-
-        psi_elements = {'ind_d': d == self._treatment, 'm_z': m_z_hat_adj,
-                        'g_du_z0': g_du_z0_hat['preds'], 'g_du_z1': g_du_z1_hat['preds'],
-                        'y': y, 'z': z, 'comp_prob': comp_prob_hat}
-        preds = {'predictions': {'ml_m_z':  m_z_hat['preds'],
-                                 'ml_m_d_z0': m_d_z0_hat['preds'],
-                                 'ml_m_d_z1': m_d_z1_hat['preds'],
-                                 'ml_g_du_z0': g_du_z0_hat['preds'],
-                                 'ml_g_du_z1': g_du_z1_hat['preds']},
-                 'targets': {'ml_m_z':  m_z_hat['targets'],
-                             'ml_m_d_z0': m_d_z0_hat['targets'],
-                             'ml_m_d_z1': m_d_z1_hat['targets'],
-                             'ml_g_du_z0': g_du_z0_hat['targets'],
-                             'ml_g_du_z1': g_du_z1_hat['targets']},
-                 'models': {'ml_m_z':  m_z_hat['models'],
-                            'ml_m_d_z0': m_d_z0_hat['models'],
-                            'ml_m_d_z1': m_d_z1_hat['models'],
-                            'ml_g_du_z0': g_du_z0_hat['models'],
-                            'ml_g_du_z1': g_du_z1_hat['models']}
-                 }
+        comp_prob_hat = np.mean(
+            m_d_z1_hat["preds"]
+            - m_d_z0_hat["preds"]
+            + z / m_z_hat_adj * (d - m_d_z1_hat["preds"])
+            - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat["preds"])
+        )
+
+        if not all(ext_preds):
+            # readjust start value for minimization
+            self._coef_start_val = np.mean(ipw_vec)
+
+        psi_elements = {
+            "ind_d": d == self._treatment,
+            "m_z": m_z_hat_adj,
+            "g_du_z0": g_du_z0_hat["preds"],
+            "g_du_z1": g_du_z1_hat["preds"],
+            "y": y,
+            "z": z,
+            "comp_prob": comp_prob_hat,
+        }
+        preds = {
+            "predictions": {
+                "ml_m_z": m_z_hat["preds"],
+                "ml_m_d_z0": m_d_z0_hat["preds"],
+                "ml_m_d_z1": m_d_z1_hat["preds"],
+                "ml_g_du_z0": g_du_z0_hat["preds"],
+                "ml_g_du_z1": g_du_z1_hat["preds"],
+            },
+            "targets": {
+                "ml_m_z": m_z_hat["targets"],
+                "ml_m_d_z0": m_d_z0_hat["targets"],
+                "ml_m_d_z1": m_d_z1_hat["targets"],
+                "ml_g_du_z0": g_du_z0_hat["targets"],
+                "ml_g_du_z1": g_du_z1_hat["targets"],
+            },
+            "models": {
+                "ml_m_z": m_z_hat["models"],
+                "ml_m_d_z0": m_d_z0_hat["models"],
+                "ml_m_d_z1": m_d_z1_hat["models"],
+                "ml_g_du_z0": g_du_z0_hat["models"],
+                "ml_g_du_z1": g_du_z1_hat["models"],
+            },
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_m_z': None,
-                               'ml_m_d_z0': None,
-                               'ml_m_d_z1': None,
-                               'ml_g_du_z0': None,
-                               'ml_g_du_z1': None}
+            scoring_methods = {"ml_m_z": None, "ml_m_d_z0": None, "ml_m_d_z1": None, "ml_g_du_z0": None, "ml_g_du_z1": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_z0 = [np.intersect1d(np.where(z == 0)[0], train) for train, _ in smpls]
@@ -504,21 +576,66 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         approx_quant = np.quantile(y[d == self.treatment], self.quantile)
         du = (d == self.treatment) * (y <= approx_quant)
 
-        m_z_tune_res = _dml_tune(z, x, train_inds,
-                                 self._learner['ml_m_z'], param_grids['ml_m_z'], scoring_methods['ml_m_z'],
-                                 n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z0_tune_res = _dml_tune(d, x, train_inds_z0,
-                                    self._learner['ml_m_d_z0'], param_grids['ml_m_d_z0'], scoring_methods['ml_m_d_z0'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z1_tune_res = _dml_tune(d, x, train_inds_z1,
-                                    self._learner['ml_m_d_z1'], param_grids['ml_m_d_z1'], scoring_methods['ml_m_d_z1'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z0_tune_res = _dml_tune(du, x, train_inds_z0,
-                                     self._learner['ml_g_du_z0'], param_grids['ml_g_du_z0'], scoring_methods['ml_g_du_z0'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z1_tune_res = _dml_tune(du, x, train_inds_z1,
-                                     self._learner['ml_g_du_z1'], param_grids['ml_g_du_z1'], scoring_methods['ml_g_du_z1'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        m_z_tune_res = _dml_tune(
+            z,
+            x,
+            train_inds,
+            self._learner["ml_m_z"],
+            param_grids["ml_m_z"],
+            scoring_methods["ml_m_z"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z0_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z0,
+            self._learner["ml_m_d_z0"],
+            param_grids["ml_m_d_z0"],
+            scoring_methods["ml_m_d_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z1_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z1,
+            self._learner["ml_m_d_z1"],
+            param_grids["ml_m_d_z1"],
+            scoring_methods["ml_m_d_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z0_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z0,
+            self._learner["ml_g_du_z0"],
+            param_grids["ml_g_du_z0"],
+            scoring_methods["ml_g_du_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z1_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z1,
+            self._learner["ml_g_du_z1"],
+            param_grids["ml_g_du_z1"],
+            scoring_methods["ml_g_du_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         m_z_best_params = [xx.best_params_ for xx in m_z_tune_res]
         m_d_z0_best_params = [xx.best_params_ for xx in m_d_z0_tune_res]
@@ -526,34 +643,40 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         g_du_z0_best_params = [xx.best_params_ for xx in g_du_z0_tune_res]
         g_du_z1_best_params = [xx.best_params_ for xx in g_du_z1_tune_res]
 
-        params = {'ml_m_z': m_z_best_params,
-                  'ml_m_d_z0': m_d_z0_best_params,
-                  'ml_m_d_z1': m_d_z1_best_params,
-                  'ml_g_du_z0': g_du_z0_best_params,
-                  'ml_g_du_z1': g_du_z1_best_params}
-        tune_res = {'ml_m_z': m_z_tune_res,
-                    'ml_m_d_z0': m_d_z0_tune_res,
-                    'ml_m_d_z1':  m_d_z1_tune_res,
-                    'ml_g_du_z0': g_du_z0_tune_res,
-                    'ml_g_du_z1': g_du_z1_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+        params = {
+            "ml_m_z": m_z_best_params,
+            "ml_m_d_z0": m_d_z0_best_params,
+            "ml_m_d_z1": m_d_z1_best_params,
+            "ml_g_du_z0": g_du_z0_best_params,
+            "ml_g_du_z1": g_du_z1_best_params,
+        }
+        tune_res = {
+            "ml_m_z": m_z_tune_res,
+            "ml_m_d_z0": m_d_z0_tune_res,
+            "ml_m_d_z1": m_d_z1_tune_res,
+            "ml_g_du_z0": g_du_z0_tune_res,
+            "ml_g_du_z1": g_du_z1_tune_res,
+        }
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_zero_one_treatment(self)
-        one_instr = (obj_dml_data.n_instr == 1)
-        err_msg = ('Incompatible data. '
-                   'To fit an LPQ model with DML '
-                   'exactly one binary variable with values 0 and 1 '
-                   'needs to be specified as instrumental variable.')
+        one_instr = obj_dml_data.n_instr == 1
+        err_msg = (
+            "Incompatible data. "
+            "To fit an LPQ model with DML "
+            "exactly one binary variable with values 0 and 1 "
+            "needs to be specified as instrumental variable."
+        )
         if one_instr:
-            binary_instr = (type_of_target(obj_dml_data.z) == 'binary')
+            binary_instr = type_of_target(obj_dml_data.z) == "binary"
             zero_one_instr = np.all((np.power(obj_dml_data.z, 2) - obj_dml_data.z) == 0)
             if not (one_instr & binary_instr & zero_one_instr):
                 raise ValueError(err_msg)
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
new file mode 100644
index 00000000..af30b879
--- /dev/null
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLLPQ, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, return_type='DataFrame')
+
+    dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LPQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+        #"draw_sample_splitting": False
+    }
+
+    ml_g = LogisticRegression()
+    ml_m = LogisticRegression()
+
+    DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    DMLLPQ.set_sample_splitting(all_smpls)
+    
+    np.random.seed(3141)
+    DMLLPQ.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_m_z"] = DMLLPQ.predictions["ml_m_z"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z0"] = DMLLPQ.predictions["ml_m_d_z0"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z1"] = DMLLPQ.predictions["ml_m_d_z1"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z0"] = DMLLPQ.predictions["ml_g_du_z0"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z1"] = DMLLPQ.predictions["ml_g_du_z1"][:, :, 0]
+
+    DMLLPLQ_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    DMLLPLQ_ext.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    DMLLPLQ_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLLPQ.coef, "coef_ext": DMLLPLQ_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_lpq_coef(doubleml_lpq_fixture):
+    assert math.isclose(doubleml_lpq_fixture["coef_normal"], doubleml_lpq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 464a3f6c7cd05d9ceb1cc1ef45f1cca8b96a7922 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Dec 2023 11:48:22 +0100
Subject: [PATCH 48/67] fix ext. preds. for LPQ model

---
 doubleml/tests/test_lpq_external_predictions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index af30b879..36a838e5 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,11 +27,12 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, return_type='DataFrame')
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type='DataFrame')
 
     dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
     #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=[dml_data.d, dml_data.z])
     #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
@@ -40,7 +41,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        #"draw_sample_splitting": False
+        "draw_sample_splitting": False
     }
 
     ml_g = LogisticRegression()

From 2d9125c8c6fa706b0d9218247f4ddd234266f6c3 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Dec 2023 11:51:31 +0100
Subject: [PATCH 49/67] optimize unit test for ext. preds in LPQ

---
 doubleml/tests/test_lpq_external_predictions.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index 36a838e5..db321b37 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,13 +27,10 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type='DataFrame')
+    data = make_iivm_data(theta=0.5, n_obs=500, dim_x=20, alpha_x=1.0, return_type="DataFrame")
 
-    dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=[dml_data.d, dml_data.z])
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
         "obj_dml_data": dml_data,
@@ -41,7 +38,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     ml_g = LogisticRegression()
@@ -49,7 +46,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
 
     DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLLPQ.set_sample_splitting(all_smpls)
-    
+
     np.random.seed(3141)
     DMLLPQ.fit(store_predictions=True)
 

From ba72cac5ac7862bcd0c8ec83610b3271d3e47dd2 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 6 Dec 2023 13:51:16 +0100
Subject: [PATCH 50/67] fix unit-test for LPQ external predictions

---
 doubleml/tests/test_lpq_external_predictions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index db321b37..2a13b4bc 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,7 +27,7 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=500, dim_x=20, alpha_x=1.0, return_type="DataFrame")
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d", z_cols="z")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)

From 35ee9762af256a5c7ede3c97823fa3851251c8f4 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 6 Dec 2023 16:44:50 +0100
Subject: [PATCH 51/67] update pq model for individual external prediction

---
 doubleml/double_ml_pq.py                      | 347 ++++++++++--------
 .../tests/test_pq_external_predictions.py     |  32 +-
 2 files changed, 215 insertions(+), 164 deletions(-)

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index d785429f..546cbe92 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -8,11 +8,26 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _get_bracket_guess, \
-    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score, _cond_targets
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+    _cond_targets,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_contains_iv, _check_quantile
+from ._utils_checks import (
+    _check_score,
+    _check_trimming,
+    _check_zero_one_treatment,
+    _check_treatment,
+    _check_contains_iv,
+    _check_quantile,
+)
 
 
 class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
@@ -100,29 +115,25 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='PQ',
-                 dml_procedure='dml2',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True,
-                 apply_cross_fitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         dml_procedure,
-                         draw_sample_splitting,
-                         apply_cross_fitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="PQ",
+        dml_procedure="dml2",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        apply_cross_fitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -130,21 +141,21 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
         self._check_data(self._dml_data)
 
-        valid_score = ['PQ']
+        valid_score = ["PQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,19 +166,21 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
-        self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m}
+        self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=self._dml_data.d)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=self._dml_data.d,
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
     @property
@@ -214,36 +227,36 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'g', 'm', 'y']
+        return ["ind_d", "g", "m", "y"]
 
     def _compute_ipw_score(self, theta, d, y, prop):
         score = (d == self.treatment) / prop * (y <= theta) - self.quantile
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        g = psi_elements['g']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        g = psi_elements["g"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            g = psi_elements['g'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            g = psi_elements["g"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score = ind_d * ((y <= coef) - g) / m + g - self.quantile
         return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score_weights = ind_d / m
 
@@ -253,74 +266,85 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_g', 'ml_m']}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        
-        g = external_predictions['ml_g'] is not None
-        m = external_predictions['ml_m'] is not None
-        
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+
+        g_external = external_predictions["ml_g"] is not None
+        m_external = external_predictions["ml_m"] is not None
+
         # initialize nuisance predictions, targets and models
-        
-        if not (g and m):
-            g_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                    }
-            m_hat = copy.deepcopy(g_hat)
-            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
-            # initialize models
-            fitted_models = {}
-            for learner in self.params_names:
-                # set nuisance model parameters
+
+        if not g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+        if not m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+        # initialize models
+        fitted_models = {}
+        for learner in self.params_names:
+            # set nuisance model parameters
+            if (learner == "ml_g" and not g_external) or (learner == "ml_m" and not m_external):
                 est_params = self._get_params(learner)
                 if est_params is not None:
-                    fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                            for i_fold in range(self.n_folds)]
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
                 else:
                     fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
-        elif (g and not m) or (m and not g):
-            raise ValueError('External predictions for both g and m are required.')
-        else:
-            g_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': external_predictions['ml_g']
-                    }
-            m_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': external_predictions['ml_m']
-                    }
+        if g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g"],
+            }
+        if m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m"],
+            }
 
         # caculate nuisance functions over different folds
-        if not (g and m): 
+        if not all([g_external, m_external]):
             for i_fold in range(self.n_folds):
                 train_inds = smpls[i_fold][0]
                 test_inds = smpls[i_fold][1]
 
                 # start nested crossfitting
-                train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                            random_state=42, stratify=d[train_inds])
-                smpls_prelim = [(train, test) for train, test in
-                                StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=d[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])
+                ]
 
                 d_train_1 = d[train_inds_1]
                 y_train_1 = y[train_inds_1]
                 x_train_1 = x[train_inds_1, :]
 
-                # get a copy of ml_m as a preliminary learner
-                ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-                m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                            method='predict_proba', smpls=smpls_prelim)['preds']
-
-                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
-
+                if not m_external:
+                    # get a copy of ml_m as a preliminary learner
+                    ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
+                    m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)[
+                        "preds"
+                    ]
+                else:
+                    m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)   
                 if self._normalize_ipw:
-                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                        m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
                 if self.treatment == 0:
                     m_hat_prelim = 1 - m_hat_prelim
 
@@ -341,33 +365,34 @@ def ipw_score(theta):
                 dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
                 y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
 
-                fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
-
-                # predict nuisance values on the test data and the corresponding targets
-                g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
-                g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
+                if not g_external:
+                    fitted_models["ml_g"][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
 
-                # refit the propensity score on the whole training set
-                fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-                m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
+                    # predict nuisance values on the test data and the corresponding targets
+                    g_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g"][i_fold], x[test_inds, :])
+                    g_hat["targets"][test_inds] = y[test_inds] <= ipw_est
+                if not m_external:
+                    # refit the propensity score on the whole training set
+                    fitted_models["ml_m"][i_fold].fit(x[train_inds, :], d[train_inds])
+                    m_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m"][i_fold], x[test_inds, :])
 
         # set target for propensity score
-        m_hat['targets'] = d
+        m_hat["targets"] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
+        g_hat["targets"] = _cond_targets(g_hat["targets"], cond_sample=(d == self.treatment))
 
         if return_models:
-            g_hat['models'] = fitted_models['ml_g']
-            m_hat['models'] = fitted_models['ml_m']
+            g_hat["models"] = fitted_models["ml_g"]
+            m_hat["models"] = fitted_models["ml_m"]
 
         # clip propensities and normalize ipw weights
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
-        #if not (g and m):
-        m_hat_adj = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+
+        m_hat_adj = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_hat_adj[test_index] = _normalize_ipw(m_hat_adj[test_index], d[test_index])
             else:
@@ -376,65 +401,73 @@ def ipw_score(theta):
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
         # readjust start value for minimization
-        if not (g and m):
+        if not (g_external or m_external):
             self._coef_start_val = np.mean(ipw_vec)
-        #else:
-        #    m_hat_adj = m_hat['preds']
-            
-
-        psi_elements = {'ind_d': d == self.treatment, 'g': g_hat['preds'],
-                        'm': m_hat_adj, 'y': y}
-
-        preds = {'predictions': {'ml_g': g_hat['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g': g_hat['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g': g_hat['models'],
-                            'ml_m': m_hat['models']}
-                 }
+
+        psi_elements = {"ind_d": d == self.treatment, "g": g_hat["preds"], "m": m_hat_adj, "y": y}
+
+        preds = {
+            "predictions": {"ml_g": g_hat["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g": g_hat["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g": g_hat["models"], "ml_m": m_hat["models"]},
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_treat = [np.intersect1d(np.where(d == self.treatment)[0], train) for train, _ in smpls]
 
         # use self._coef_start_val as a very crude approximation of ipw_est
         approx_goal = y <= np.quantile(y[d == self.treatment], self.quantile)
-        g_tune_res = _dml_tune(approx_goal, x, train_inds_treat,
-                               self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_tune_res = _dml_tune(
+            approx_goal,
+            x,
+            train_inds_treat,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g_best_params = [xx.best_params_ for xx in g_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g': g_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g_tune': g_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g": g_best_params, "ml_m": m_best_params}
+        tune_res = {"g_tune": g_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_contains_iv(obj_dml_data)
         _check_zero_one_treatment(self)
         return
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 4468db83..f1c89755 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -23,8 +23,17 @@ def normalize_ipw(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_none(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_none(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_ml_g_none):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
@@ -41,8 +50,8 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
         "draw_sample_splitting": False
     }
 
-    ml_g = LogisticRegression()
-    ml_m = LogisticRegression()
+    ml_g = LogisticRegression(random_state=42)
+    ml_m = LogisticRegression(random_state=42)
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
@@ -50,10 +59,19 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
 
     DMLPQ.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
-
-    DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    if set_ml_m_none:
+        ml_m = LogisticRegression(random_state=42)
+    else:
+        ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_classifier()
+        
+    if set_ml_g_none:
+        ml_g = LogisticRegression(random_state=42)
+    else:
+        ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+        ml_g = dummy_classifier()
+
+    DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)

From 7075af320d34adac13efa78889397265f863726d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 12:05:58 +0100
Subject: [PATCH 52/67] update pq model for individual external prediction

---
 doubleml/double_ml_pq.py                      | 12 +++++------
 .../tests/test_pq_external_predictions.py     | 20 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 546cbe92..ee398f49 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -337,14 +337,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 if not m_external:
                     # get a copy of ml_m as a preliminary learner
                     ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
-                    m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)[
-                        "preds"
-                    ]
+                    m_hat_prelim = _dml_cv_predict(
+                        ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim
+                    )["preds"]
                 else:
                     m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
-                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)   
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
                 if self._normalize_ipw:
-                        m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
                 if self.treatment == 0:
                     m_hat_prelim = 1 - m_hat_prelim
 
@@ -401,7 +401,7 @@ def ipw_score(theta):
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
         # readjust start value for minimization
-        if not (g_external or m_external):
+        if not g_external or not m_external:
             self._coef_start_val = np.mean(ipw_vec)
 
         psi_elements = {"ind_d": d == self.treatment, "g": g_hat["preds"], "m": m_hat_adj, "y": y}
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index f1c89755..358134c6 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -24,16 +24,16 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[True, False])
-def set_ml_m_none(request):
+def set_ml_m_ext(request):
     return request.param
 
 @pytest.fixture(scope="module", params=[True, False])
-def set_ml_g_none(request):
+def set_ml_g_ext(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_ml_g_none):
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
@@ -50,8 +50,8 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_
         "draw_sample_splitting": False
     }
 
-    ml_g = LogisticRegression(random_state=42)
     ml_m = LogisticRegression(random_state=42)
+    ml_g = LogisticRegression(random_state=42)
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
@@ -59,17 +59,17 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_
 
     DMLPQ.fit(store_predictions=True)
 
-    if set_ml_m_none:
-        ml_m = LogisticRegression(random_state=42)
-    else:
+    if set_ml_m_ext:
         ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
-        
-    if set_ml_g_none:
-        ml_g = LogisticRegression(random_state=42)
     else:
+        ml_m = LogisticRegression(random_state=42)
+        
+    if set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
+    else:
+        ml_g = LogisticRegression(random_state=42)
 
     DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)

From 48329e4dfcd9000442c8b630a7a48935591e053c Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:01:32 +0100
Subject: [PATCH 53/67] update external preds in IRM model

---
 doubleml/double_ml_irm.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index c8a2d208..78bd3928 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -214,9 +214,13 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
+        
+        g0_external = external_predictions['ml_g0'] is not None
+        g1_external = external_predictions['ml_g1'] is not None
+        m_external = external_predictions['ml_m'] is not None
 
         # nuisance g
-        if external_predictions['ml_g0'] is not None:
+        if g0_external:
             # use external predictions
             g_hat0 = {'preds': external_predictions['ml_g0'],
                       'targets': None,
@@ -237,7 +241,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                      'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                      'probabilities and not labels are predicted.')
 
-        if external_predictions['ml_g1'] is not None:
+        if g1_external:
             # use external predictions
             g_hat1 = {'preds': external_predictions['ml_g1'],
                       'targets': None,
@@ -260,7 +264,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'probabilities and not labels are predicted.')
 
         # nuisance m
-        if external_predictions['ml_m'] is not None:
+        if m_external:
             # use external predictions
             m_hat = {'preds': external_predictions['ml_m'],
                      'targets': None,

From a5bb73b02d5e50fd0d66f0e375036b75ed574d34 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:05 +0100
Subject: [PATCH 54/67] add unit test for IRM uncomplete external preds.

---
 .../tests/test_irm_external_predictions.py    | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index c1463a07..ee55ce66 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIRM, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
@@ -22,8 +22,18 @@ def n_rep(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
 
     x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
@@ -39,11 +49,20 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     DMLIRM.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_classifier()
+    else:
+        ml_m = LogisticRegression(random_state=42)
 
-    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    if set_ml_g_ext:
+        ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+        ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+        ml_g = dummy_regressor()
+    else:
+        ml_g = LinearRegression()
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
 
     np.random.seed(3141)
     DMLIRM_ext.fit(external_predictions=ext_predictions)
@@ -52,6 +71,7 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     return res_dict
 
+
 @pytest.mark.ci
 def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 35d8f33ef60806fd74f2bd2b2bde7bb1523ca267 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:16 +0100
Subject: [PATCH 55/67] update external preds in PLR model

---
 doubleml/double_ml_plr.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 0e4ff31a..4b222c8b 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -171,9 +171,16 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
+        
+        m_external = external_predictions['ml_m'] is not None
+        l_external = external_predictions['ml_l'] is not None
+        if 'ml_g' in self._learner:
+            g_external = external_predictions['ml_g'] is not None
+        else:
+            g_external = False
 
         # nuisance l
-        if external_predictions['ml_l'] is not None:
+        if l_external:
             l_hat = {'preds': external_predictions['ml_l'],
                      'targets': None,
                      'models': None}
@@ -184,7 +191,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         # nuisance m
-        if external_predictions['ml_m'] is not None:
+        if m_external:
             m_hat = {'preds': external_predictions['ml_m'],
                      'targets': None,
                      'models': None}
@@ -213,7 +220,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            if external_predictions['ml_g'] is not None:
+            if g_external:
                 g_hat = {'preds': external_predictions['ml_g'],
                         'targets': None,
                         'models': None}

From 73220b830cf12854737eb94ff58e76c9aca02ac4 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:28 +0100
Subject: [PATCH 56/67] add unit test for PLR uncomplete external preds.

---
 .../tests/test_plr_external_predictions.py    | 42 +++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index ca04794f..f1386e11 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import dummy_regressor
@@ -22,8 +22,23 @@ def n_rep(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_l_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
+def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
@@ -42,14 +57,27 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
-    ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_regressor()
+    else:
+        ml_m = LinearRegression()
 
-    if plr_score == "IV-type":
-        kwargs["ml_g"] = dummy_regressor()
+    if set_ml_l_ext:
+        ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+        ml_l = dummy_regressor()
+    else:
+        ml_l = LinearRegression()
+
+    if plr_score == "IV-type" and set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
+        kwargs["ml_g"] = dummy_regressor()
+    elif plr_score == "IV-type" and not set_ml_g_ext:
+        kwargs["ml_g"] = LinearRegression()
+    else:
+        pass
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(), ml_l=dummy_regressor(), **kwargs)
+    DMLPLR_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)

From 076c49b30e3bfa36707a400c229df0ad63dc65bf Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:22:18 +0100
Subject: [PATCH 57/67] add flags if external predictions are implemented.

---
 doubleml/double_ml.py        | 11 +++++++++--
 doubleml/double_ml_did.py    |  2 ++
 doubleml/double_ml_did_cs.py |  3 +++
 doubleml/double_ml_iivm.py   |  2 ++
 doubleml/double_ml_irm.py    |  2 ++
 doubleml/double_ml_lpq.py    |  2 ++
 doubleml/double_ml_pliv.py   |  2 ++
 doubleml/double_ml_plr.py    |  2 ++
 doubleml/double_ml_pq.py     |  4 +++-
 9 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c97a6304..2ca29b00 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -63,6 +63,9 @@ def __init__(self,
         self._sensitivity_implemented = False
         self._sensitivity_elements = None
         self._sensitivity_params = None
+        
+        # initialize external predictions
+        self._external_predictions_implemented = False
 
         # check resampling specifications
         if not isinstance(n_folds, int):
@@ -530,8 +533,12 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
-        # check prediction format
-        self._check_external_predictions(external_predictions)
+        # check if external predictions are implemented
+        if self._external_predictions_implemented:
+            # check prediction format
+            self._check_external_predictions(external_predictions)
+        elif not self._external_predictions_implemented and external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 87c02931..1add5e0d 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -148,6 +148,8 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     @property
     def in_sample_normalization(self):
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index f0986eed..2cdbb003 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -148,6 +148,9 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
+        
 
     @property
     def in_sample_normalization(self):
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 1cb793b2..5e3cb073 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -193,6 +193,8 @@ def __init__(self,
                 raise TypeError("subgroups['never_takers'] must be True or False. "
                                 f'Got {str(subgroups["never_takers"])}.')
         self.subgroups = subgroups
+        
+        self._external_predictions_implemented = True
 
     @property
     def normalize_ipw(self):
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 78bd3928..0c049b66 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -161,6 +161,8 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     @property
     def normalize_ipw(self):
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index a3255b6f..6efd375f 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -188,6 +188,8 @@ def __init__(
                 stratify=strata,
             )
             self._smpls = obj_dml_resampling.split_samples()
+            
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index e16caa94..b7f6259c 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -145,6 +145,8 @@ def __init__(self,
         if 'ml_g' in self._learner:
             self._predict_method['ml_g'] = 'predict'
         self._initialize_ml_nuisance_params()
+        
+        self._external_predictions_implemented = True
 
     @classmethod
     def _partialX(cls,
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 4b222c8b..c374d6ee 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -150,6 +150,8 @@ def __init__(self,
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index ee398f49..e7f42eae 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -182,6 +182,8 @@ def __init__(
                 stratify=self._dml_data.d,
             )
             self._smpls = obj_dml_resampling.split_samples()
+            
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
@@ -341,7 +343,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                         ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim
                     )["preds"]
                 else:
-                    m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
+                    m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])]
                 m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
                 if self._normalize_ipw:
                     m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)

From f84fc862ffad09971c569d5ee407f3523ea762e8 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:22:47 +0100
Subject: [PATCH 58/67] change DGP in PQ external prediction test

---
 doubleml/tests/test_pq_external_predictions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 358134c6..328e911d 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -36,7 +36,7 @@ def set_ml_g_ext(request):
 def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
+    data = make_irm_data(theta=0.5, n_obs=1000, dim_x=5, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
@@ -56,7 +56,6 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-
     DMLPQ.fit(store_predictions=True)
 
     if set_ml_m_ext:

From 5e8f32de64ae151d7b71e3cced7e8a3c9d2a8ae6 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 16:16:07 +0100
Subject: [PATCH 59/67] add unit test for NotImpl.Error for ext. preds.

---
 ...leml_external_prediction_implementation.py | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 doubleml/tests/test_doubleml_external_prediction_implementation.py

diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_external_prediction_implementation.py
new file mode 100644
index 00000000..b6ca3cbf
--- /dev/null
+++ b/doubleml/tests/test_doubleml_external_prediction_implementation.py
@@ -0,0 +1,21 @@
+import numpy as np
+import pytest
+from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
+
+# CVAR
+msg = "External predictions not implemented for DoubleMLCVAR."
+ext_predictions = {"d": {}}
+with pytest.raises(NotImplementedError, match=msg):
+    cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
+    cvar.fit(external_predictions=ext_predictions)
+
+
+# QTE
+msg = "External predictions not implemented for DoubleMLQTE."
+with pytest.raises(NotImplementedError, match=msg):
+    qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
+    cvar.fit(external_predictions=ext_predictions)

From bb9f94fb0df53c6cd3317b05dbfb67ef058aa447 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 16:43:41 +0100
Subject: [PATCH 60/67] add NotImpl.Error for ext. preds in QTE

---
 doubleml/double_ml_qte.py                     |  6 ++++-
 ...leml_external_prediction_implementation.py | 25 +++++++++++--------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_qte.py b/doubleml/double_ml_qte.py
index c640abf7..8f2286d1 100644
--- a/doubleml/double_ml_qte.py
+++ b/doubleml/double_ml_qte.py
@@ -386,7 +386,7 @@ def __psi1_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_quant, self._i_rep]
 
-    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None):
         """
         Estimate DoubleMLQTE models.
 
@@ -414,12 +414,16 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         -------
         self : object
         """
+        
+        if external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
         # parallel estimation of the quantiles
         parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs')
         fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
                                  for i_quant in range(self.n_quantiles))
 
+
         # combine the estimates and scores
         for i_quant in range(self.n_quantiles):
             self._i_quant = i_quant
diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_external_prediction_implementation.py
index b6ca3cbf..9d082859 100644
--- a/doubleml/tests/test_doubleml_external_prediction_implementation.py
+++ b/doubleml/tests/test_doubleml_external_prediction_implementation.py
@@ -5,17 +5,20 @@
 from doubleml.utils import dummy_regressor, dummy_classifier
 
 df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
-
-# CVAR
-msg = "External predictions not implemented for DoubleMLCVAR."
 ext_predictions = {"d": {}}
-with pytest.raises(NotImplementedError, match=msg):
-    cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
-    cvar.fit(external_predictions=ext_predictions)
 
 
-# QTE
-msg = "External predictions not implemented for DoubleMLQTE."
-with pytest.raises(NotImplementedError, match=msg):
-    qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
-    cvar.fit(external_predictions=ext_predictions)
+@pytest.mark.ci
+def test_cvar_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLCVAR."
+    with pytest.raises(NotImplementedError, match=msg):
+        cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
+        cvar.fit(external_predictions=ext_predictions)
+
+
+@pytest.mark.ci
+def test_qte_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLQTE."
+    with pytest.raises(NotImplementedError, match=msg):
+        qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
+        qte.fit(external_predictions=ext_predictions)

From 432ccc5dbc26295956b4dba69446fea2cfe714db Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:17:38 +0100
Subject: [PATCH 61/67] reformatting

---
 ... => test_doubleml_exceptions_ext_preds.py} |  3 +-
 .../tests/test_pliv_external_predictions.py   |  5 +--
 .../tests/test_pq_external_predictions.py     | 29 +++++++++++----
 doubleml/utils/dummy_learners.py              | 36 +++++++++++++++++++
 4 files changed, 62 insertions(+), 11 deletions(-)
 rename doubleml/tests/{test_doubleml_external_prediction_implementation.py => test_doubleml_exceptions_ext_preds.py} (90%)

diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
similarity index 90%
rename from doubleml/tests/test_doubleml_external_prediction_implementation.py
rename to doubleml/tests/test_doubleml_exceptions_ext_preds.py
index 9d082859..4be4430f 100644
--- a/doubleml/tests/test_doubleml_external_prediction_implementation.py
+++ b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
@@ -1,10 +1,9 @@
-import numpy as np
 import pytest
 from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
 
-df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
+df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
 ext_predictions = {"d": {}}
 
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index cbd13dfe..b9061498 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV
+from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
 from doubleml.utils import dummy_regressor
@@ -32,6 +32,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
     if dim_z > 1 and score == "IV-type":
         pytest.skip("IV-type score only allows dim_z = 1")
+        res_dict = None
     else:
         ext_predictions = {"d": {}}
 
@@ -86,7 +87,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
 
         res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
 
-        return res_dict
+    return res_dict
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 328e911d..0f3c0bc7 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLPQ, DoubleMLData
 from doubleml.datasets import make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import dummy_classifier
 from ._utils import draw_smpls
 
 
@@ -27,6 +27,7 @@ def normalize_ipw(request):
 def set_ml_m_ext(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=[True, False])
 def set_ml_g_ext(request):
     return request.param
@@ -36,7 +37,7 @@ def set_ml_g_ext(request):
 def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_irm_data(theta=0.5, n_obs=1000, dim_x=5, return_type="DataFrame")
+    data = make_irm_data(theta=1, n_obs=500, dim_x=5, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
@@ -47,7 +48,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     ml_m = LogisticRegression(random_state=42)
@@ -63,24 +64,38 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
-        
+
     if set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
     else:
         ml_g = LogisticRegression(random_state=42)
 
-    DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
+    DMLPLQ_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
     DMLPLQ_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef}
+    if set_ml_m_ext and not set_ml_g_ext:
+        # adjust tolerance for the case that ml_m is set to external predictions
+        # because no preliminary results are available for ml_m, the model use the (external) final predictions for ml_m
+        tol_rel = 0.1
+        tol_abs = 0.1
+    else:
+        tol_rel = 1e-9
+        tol_abs = 1e-4
+
+    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_doubleml_pq_coef(doubleml_pq_fixture):
-    assert math.isclose(doubleml_pq_fixture["coef_normal"], doubleml_pq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        doubleml_pq_fixture["coef_normal"],
+        doubleml_pq_fixture["coef_ext"],
+        rel_tol=doubleml_pq_fixture["tol_rel"],
+        abs_tol=doubleml_pq_fixture["tol_abs"],
+    )
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 2f893fb2..4d771b20 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -2,6 +2,23 @@
 
 
 class dummy_regressor(BaseEstimator):
+    """
+    A dummy regressor that raises an AttributeError when attempting to access
+    its fit, predict, or set_params methods.
+    Attributes
+    ----------
+    _estimator_type : str
+        Type of the estimator, set to "regressor".
+    Methods
+    -------
+    fit(*args)
+        Raises AttributeError: "Accessed fit method of DummyRegressor!"
+    predict(*args)
+        Raises AttributeError: "Accessed predict method of DummyRegressor!"
+    set_params(*args)
+        Raises AttributeError: "Accessed set_params method of DummyRegressor!"
+    """
+
     _estimator_type = "regressor"
 
     def fit(*args):
@@ -15,6 +32,25 @@ def set_params(*args):
 
 
 class dummy_classifier(BaseEstimator):
+    """
+    A dummy classifier that raises an AttributeError when attempting to access
+    its fit, predict, set_params, or predict_proba methods.
+    Attributes
+    ----------
+    _estimator_type : str
+        Type of the estimator, set to "classifier".
+    Methods
+    -------
+    fit(*args)
+        Raises AttributeError: "Accessed fit method of DummyClassifier!"
+    predict(*args)
+        Raises AttributeError: "Accessed predict method of DummyClassifier!"
+    set_params(*args)
+        Raises AttributeError: "Accessed set_params method of DummyClassifier!"
+    predict_proba(*args, **kwargs)
+        Raises AttributeError: "Accessed predict_proba method of DummyClassifier!"
+    """
+
     _estimator_type = "classifier"
 
     def fit(*args):

From 4009c47543dc0c158fd53783ff3ed02e4b7f446a Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:21:27 +0100
Subject: [PATCH 62/67] Fix Typo in try except statement

---
 doubleml/tests/test_dummy_learners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
index ee3d979a..a357345c 100644
--- a/doubleml/tests/test_dummy_learners.py
+++ b/doubleml/tests/test_dummy_learners.py
@@ -42,5 +42,5 @@ def test_clone(dl_fixture):
     try:
         _ = clone(dl_fixture["dummy_regressor"])
         _ = clone(dl_fixture["dummy_classifier"])
-    except Error as e:
+    except Exception as e:
         pytest.fail(f"clone() raised an exception:\n{str(e)}\n")

From fae1d17b8387556f1cf406b0f526f5f80e818713 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:44:14 +0100
Subject: [PATCH 63/67] Format to PEP8 standards

---
 doubleml/double_ml.py                         |  4 +-
 doubleml/double_ml_did.py                     | 14 +++---
 doubleml/double_ml_did_cs.py                  | 20 ++++----
 doubleml/double_ml_iivm.py                    | 17 ++++---
 doubleml/double_ml_irm.py                     |  6 +--
 doubleml/double_ml_lpq.py                     |  4 +-
 doubleml/double_ml_pliv.py                    | 26 +++++-----
 doubleml/double_ml_plr.py                     |  6 +--
 doubleml/double_ml_pq.py                      | 47 ++++++++++---------
 doubleml/double_ml_qte.py                     |  5 +-
 .../tests/test_did_external_predictions.py    |  5 +-
 .../tests/test_didcs_external_predictions.py  |  4 +-
 .../tests/test_iivm_external_predictions.py   |  5 +-
 .../tests/test_lpq_external_predictions.py    |  2 +-
 doubleml/tests/test_plr.py                    |  4 +-
 doubleml/tests/test_plr_rep_cross.py          |  4 +-
 16 files changed, 83 insertions(+), 90 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 2ca29b00..dd3547ad 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -63,7 +63,7 @@ def __init__(self,
         self._sensitivity_implemented = False
         self._sensitivity_elements = None
         self._sensitivity_params = None
-        
+
         # initialize external predictions
         self._external_predictions_implemented = False
 
@@ -127,7 +127,7 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi, self._psi_deriv, self._psi_elements,\
+        self._psi, self._psi_deriv, self._psi_elements, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 1add5e0d..77bface4 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -146,9 +146,7 @@ def __init__(self,
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
-
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     @property
@@ -213,8 +211,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
 
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
@@ -228,8 +226,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
 
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
@@ -242,8 +240,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # nuisance m
             if external_predictions['ml_m'] is not None:
                 m_hat = {'preds': external_predictions['ml_m'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 2cdbb003..55b5e32e 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -148,9 +148,7 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
-        
 
     @property
     def in_sample_normalization(self):
@@ -237,9 +235,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
-            
+                                          est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+
             g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
             g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
         if external_predictions['ml_g_d0_t1'] is not None:
@@ -248,8 +246,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
             g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
         if external_predictions['ml_g_d1_t0'] is not None:
@@ -258,8 +256,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
             g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
         if external_predictions['ml_g_d1_t1'] is not None:
@@ -268,8 +266,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
             g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
 
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 5e3cb073..d981250e 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -193,7 +193,6 @@ def __init__(self,
                 raise TypeError("subgroups['never_takers'] must be True or False. "
                                 f'Got {str(subgroups["never_takers"])}.')
         self.subgroups = subgroups
-        
         self._external_predictions_implemented = True
 
     @property
@@ -266,8 +265,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat0['targets'] = g_hat0['targets'].astype(float)
@@ -289,8 +288,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat1['targets'] = g_hat1['targets'].astype(float)
@@ -328,8 +327,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
         if not r0:
@@ -346,8 +345,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
         if not r1:
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 0c049b66..2df99cd4 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -161,7 +161,6 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     @property
@@ -216,7 +215,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        
         g0_external = external_predictions['ml_g0'] is not None
         g1_external = external_predictions['ml_g1'] is not None
         m_external = external_predictions['ml_m'] is not None
@@ -250,8 +248,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 6efd375f..d2d06ac2 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -188,7 +188,6 @@ def __init__(
                 stratify=strata,
             )
             self._smpls = obj_dml_resampling.split_samples()
-            
         self._external_predictions_implemented = True
 
     @property
@@ -385,7 +384,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
                 # preliminary propensity for z
                 ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
-                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim)[
+                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
+                                                 method="predict_proba", smpls=smpls_prelim)[
                     "preds"
                 ]
 
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index b7f6259c..6725e925 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -145,7 +145,6 @@ def __init__(self,
         if 'ml_g' in self._learner:
             self._predict_method['ml_g'] = 'predict'
         self._initialize_ml_nuisance_params()
-        
         self._external_predictions_implemented = True
 
     @classmethod
@@ -320,8 +319,8 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         # nuisance l
         if external_predictions['ml_l'] is not None:
             l_hat = {'preds': external_predictions['ml_l'],
-                      'targets': None,
-                      'models': None}
+                     'targets': None,
+                     'models': None}
         else:
             l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
@@ -335,11 +334,11 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         if self._dml_data.n_instr == 1:
             # one instrument: just identified
             x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                                force_all_finite=False)
+                             force_all_finite=False)
             if external_predictions['ml_m'] is not None:
                 m_hat = {'preds': external_predictions['ml_m'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
@@ -355,16 +354,17 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
             for i_instr in range(self._dml_data.n_instr):
                 z = self._dml_data.z
                 x, this_z = check_X_y(x, z[:, i_instr],
-                                    force_all_finite=False)
+                                      force_all_finite=False)
                 if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
                     m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
-                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions[
+                        'ml_m_' + self._dml_data.z_cols[i_instr]]
                     targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
                     models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
                 else:
                     res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                    est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                    method=self._predict_method['ml_m'], return_models=return_models)
+                                                     est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
+                                                     method=self._predict_method['ml_m'], return_models=return_models)
 
                     m_hat['preds'][:, i_instr] = res_cv_predict['preds']
 
@@ -394,8 +394,8 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
             # get an initial estimate for theta using the partialling out score
             if external_predictions['ml_g'] is not None:
                 g_hat = {'preds': external_predictions['ml_g'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
                 psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
@@ -425,7 +425,7 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls):
         # compute residuals
         u_hat = y - l_hat
         w_hat = d - r_hat
-        v_hat = z- m_hat
+        v_hat = z - m_hat
 
         r_hat_tilde = None
         if self._dml_data.n_instr > 1:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index c374d6ee..41e7df2f 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -150,7 +150,6 @@ def __init__(self,
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
@@ -173,7 +172,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
-        
         m_external = external_predictions['ml_m'] is not None
         l_external = external_predictions['ml_l'] is not None
         if 'ml_g' in self._learner:
@@ -224,8 +222,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # nuisance g
             if g_external:
                 g_hat = {'preds': external_predictions['ml_g'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index e7f42eae..dee3dc12 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.base import clone
 from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -115,25 +114,29 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
 
-    def __init__(
-        self,
-        obj_dml_data,
-        ml_g,
-        ml_m,
-        treatment=1,
-        quantile=0.5,
-        n_folds=5,
-        n_rep=1,
-        score="PQ",
-        dml_procedure="dml2",
-        normalize_ipw=True,
-        kde=None,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
-        draw_sample_splitting=True,
-        apply_cross_fitting=True,
-    ):
-        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
+    def __init__(self,
+                 obj_dml_data,
+                 ml_g,
+                 ml_m,
+                 treatment=1,
+                 quantile=0.5,
+                 n_folds=5,
+                 n_rep=1,
+                 score='PQ',
+                 dml_procedure='dml2',
+                 normalize_ipw=True,
+                 kde=None,
+                 trimming_rule='truncate',
+                 trimming_threshold=1e-2,
+                 draw_sample_splitting=True,
+                 apply_cross_fitting=True):
+        super().__init__(obj_dml_data,
+                         n_folds,
+                         n_rep,
+                         score,
+                         dml_procedure,
+                         draw_sample_splitting,
+                         apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -141,7 +144,8 @@ def __init__(
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
+                raise TypeError("kde should be either a callable or None. "
+                                "%r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
@@ -182,7 +186,6 @@ def __init__(
                 stratify=self._dml_data.d,
             )
             self._smpls = obj_dml_resampling.split_samples()
-            
         self._external_predictions_implemented = True
 
     @property
diff --git a/doubleml/double_ml_qte.py b/doubleml/double_ml_qte.py
index 8f2286d1..9633434f 100644
--- a/doubleml/double_ml_qte.py
+++ b/doubleml/double_ml_qte.py
@@ -161,7 +161,7 @@ def __init__(self,
         self._modellist_0, self._modellist_1 = self._initialize_models()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv,\
+        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
@@ -414,7 +414,7 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         -------
         self : object
         """
-        
+
         if external_predictions is not None:
             raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
@@ -423,7 +423,6 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
                                  for i_quant in range(self.n_quantiles))
 
-
         # combine the estimates and scores
         for i_quant in range(self.n_quantiles):
             self._i_quant = i_quant
diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 12d7e3c9..0d92c9e8 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 from ._utils import draw_smpls
 
+
 @pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
     return request.param
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 0eed900a..1498e571 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDIDCS
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 from ._utils import draw_smpls
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index 40bb02db..cce62032 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIIVM, DoubleMLData
 from doubleml.datasets import make_iivm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
@@ -45,14 +45,13 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     np.random.seed(3141)
 
     DMLIIVM.fit(store_predictions=True)
-    
+
     ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
     ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
     ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
     ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
     ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
 
-    
     DMLIIVM_ext = DoubleMLIIVM(
         ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
     )
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index 2a13b4bc..a5a9a5bb 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLLPQ, DoubleMLData
 from doubleml.datasets import make_iivm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import dummy_classifier
 from ._utils import draw_smpls
 
 
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index b17d6802..c3df97c4 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -76,7 +76,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
     res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
                          all_smpls, dml_procedure, score)
-    
+
     np.random.seed(3141)
     # test with external nuisance predictions
     if score == 'partialling out':
@@ -104,7 +104,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
         prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
                                  'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
                                  'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
-        
+
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
 
diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py
index 4f95f10a..9bbc2616 100644
--- a/doubleml/tests/test_plr_rep_cross.py
+++ b/doubleml/tests/test_plr_rep_cross.py
@@ -74,7 +74,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
 
     res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
                          all_smpls, dml_procedure, score, n_rep)
-    
+
     np.random.seed(3141)
     # test with external nuisance predictions
     if score == 'partialling out':
@@ -104,7 +104,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
         prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
                                  'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
                                  'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
-        
+
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_plr_obj.coef,

From 1ef67ab91aab3661ad95994868767ca2cfe18f4d Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 06:58:42 +0100
Subject: [PATCH 64/67] update irm tor remove deepcopy

---
 doubleml/double_ml_irm.py                       |  5 +++--
 doubleml/tests/_utils_irm_manual.py             | 12 ++++++++----
 doubleml/tests/test_irm.py                      | 12 ++++++------
 doubleml/tests/test_irm_external_predictions.py |  2 +-
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 2df99cd4..9149462f 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pandas as pd
 import warnings
-import copy
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -304,13 +303,15 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
             for _, test_index in smpls:
                 p_hat[test_index] = np.mean(d[test_index])
 
-        m_hat_adj = copy.deepcopy(m_hat)
+        m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
                 for _, test_index in smpls:
                     m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
             else:
                 m_hat_adj = _normalize_ipw(m_hat, d)
+        else:
+            m_hat_adj = m_hat
 
         # compute residuals
         u_hat0 = y - g_hat0
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 60c09db8..c46fe605 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.base import clone, is_classifier
 
 from ._utils_boot import boot_manual, draw_weights
@@ -131,10 +130,12 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         for _, test_index in smpls:
             m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+    else:
+        m_hat_adj = m_hat
 
     for idx, (_, test_index) in enumerate(smpls):
         thetas[idx] = irm_orth(g_hat0[test_index], g_hat1[test_index],
@@ -165,9 +166,10 @@ def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
     theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
                          u_hat0, u_hat1, d, score)
@@ -243,13 +245,15 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         if dml_procedure == 'dml1':
             for _, test_index in smpls:
                 m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
         else:
             m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
     if apply_cross_fitting:
         if score == 'ATE':
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 7c834754..73dd5cca 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -150,21 +150,21 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['coef'],
+    assert math.isclose(dml_irm_fixture['coef'][0],
                         dml_irm_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['coef'],
-                        dml_irm_fixture['coef_ext'],
+    assert math.isclose(dml_irm_fixture['coef'][0],
+                        dml_irm_fixture['coef_ext'][0],
                         rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['se'],
+    assert math.isclose(dml_irm_fixture['se'][0],
                         dml_irm_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['se'],
-                        dml_irm_fixture['se_ext'],
+    assert math.isclose(dml_irm_fixture['se'][0],
+                        dml_irm_fixture['se_ext'][0],
                         rel_tol=1e-9, abs_tol=1e-4)
 
 
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index ee55ce66..d291af29 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -67,7 +67,7 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g
     np.random.seed(3141)
     DMLIRM_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+    res_dict = {"coef_normal": DMLIRM.coef[0], "coef_ext": DMLIRM_ext.coef[0]}
 
     return res_dict
 

From 022976d51a5a4cdd7d489b3e2600ccab823bf0fd Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 07:12:07 +0100
Subject: [PATCH 65/67] remove deepcopy from lpq

---
 doubleml/double_ml_lpq.py  | 25 ++++++++++++++++++++-----
 doubleml/tests/test_lpq.py |  4 ++--
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index d2d06ac2..2b7260eb 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
 from sklearn.utils import check_X_y
@@ -316,10 +315,26 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
                 "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
             }
-            m_d_z0_hat = copy.deepcopy(m_z_hat)
-            m_d_z1_hat = copy.deepcopy(m_z_hat)
-            g_du_z0_hat = copy.deepcopy(m_z_hat)
-            g_du_z1_hat = copy.deepcopy(m_z_hat)
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
 
             # initialize models
             fitted_models = {}
diff --git a/doubleml/tests/test_lpq.py b/doubleml/tests/test_lpq.py
index e7550e06..beb8b6a0 100644
--- a/doubleml/tests/test_lpq.py
+++ b/doubleml/tests/test_lpq.py
@@ -119,9 +119,9 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
                              normalize_ipw=normalize_ipw, kde=kde,
                              n_rep=1, trimming_threshold=trimming_threshold)
 
-    res_dict = {'coef': dml_lpq_obj.coef,
+    res_dict = {'coef': dml_lpq_obj.coef[0],
                 'coef_manual': res_manual['lpq'],
-                'se': dml_lpq_obj.se,
+                'se': dml_lpq_obj.se[0],
                 'se_manual': res_manual['se']}
 
     return res_dict

From 3044f5c557970aa0f6783d9c531c83e5443ddc38 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 07:59:57 +0100
Subject: [PATCH 66/67] renaming external prediction tests

---
 .../tests/test_did_external_predictions.py    | 20 ++++++++--------
 .../tests/test_didcs_external_predictions.py  | 24 +++++++++----------
 .../tests/test_iivm_external_predictions.py   | 20 ++++++++--------
 .../tests/test_irm_external_predictions.py    | 16 ++++++-------
 .../tests/test_lpq_external_predictions.py    | 24 +++++++++----------
 .../tests/test_pliv_external_predictions.py   | 20 ++++++++--------
 .../tests/test_plr_external_predictions.py    | 16 ++++++-------
 .../tests/test_pq_external_predictions.py     | 18 +++++++-------
 8 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 0d92c9e8..59a1e6f1 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -35,21 +35,21 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
         "draw_sample_splitting": False
     }
-    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    DMLDID.set_sample_splitting(all_smpls)
+    dml_did = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDID.fit(store_predictions=True)
+    dml_did.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_g0"] = dml_did.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
 
-    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    DMLDID_ext.set_sample_splitting(all_smpls)
+    dml_did_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDID_ext.fit(external_predictions=ext_predictions)
+    dml_did_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+    res_dict = {"coef_normal": dml_did.coef, "coef_ext": dml_did_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 1498e571..90aafaef 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -36,23 +36,23 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
         "draw_sample_splitting": False
     }
-    DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    DMLDIDCS.set_sample_splitting(all_smpls)
+    dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did_cs.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDIDCS.fit(store_predictions=True)
+    dml_did_cs.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
-    ext_predictions["d"]["ml_g_d0_t1"] = DMLDIDCS.predictions["ml_g_d0_t1"][:, :, 0]
-    ext_predictions["d"]["ml_g_d1_t0"] = DMLDIDCS.predictions["ml_g_d1_t0"][:, :, 0]
-    ext_predictions["d"]["ml_g_d1_t1"] = DMLDIDCS.predictions["ml_g_d1_t1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t0"] = dml_did_cs.predictions["ml_g_d0_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t1"] = dml_did_cs.predictions["ml_g_d0_t1"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t0"] = dml_did_cs.predictions["ml_g_d1_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0]
 
-    DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    DMLDIDCS_ext.set_sample_splitting(all_smpls)
+    dml_did_cs_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_cs_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDIDCS_ext.fit(external_predictions=ext_predictions)
+    dml_did_cs_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLDIDCS.coef, "coef_ext": DMLDIDCS_ext.coef}
+    res_dict = {"coef_normal": dml_did_cs.coef, "coef_ext": dml_did_cs_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index cce62032..fc9d3665 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -36,7 +36,7 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
     }
 
-    DMLIIVM = DoubleMLIIVM(
+    dml_iivm = DoubleMLIIVM(
         ml_g=LinearRegression(),
         ml_m=LogisticRegression(),
         ml_r=LogisticRegression(),
@@ -44,22 +44,22 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     )
     np.random.seed(3141)
 
-    DMLIIVM.fit(store_predictions=True)
+    dml_iivm.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
-    ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
-    ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
+    ext_predictions["d"]["ml_g0"] = dml_iivm.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_iivm.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_iivm.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_r0"] = dml_iivm.predictions["ml_r0"][:, :, 0]
+    ext_predictions["d"]["ml_r1"] = dml_iivm.predictions["ml_r1"][:, :, 0]
 
-    DMLIIVM_ext = DoubleMLIIVM(
+    dml_iivm_ext = DoubleMLIIVM(
         ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
     )
 
     np.random.seed(3141)
-    DMLIIVM_ext.fit(external_predictions=ext_predictions)
+    dml_iivm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIIVM.coef, "coef_ext": DMLIIVM_ext.coef}
+    res_dict = {"coef_normal": dml_iivm.coef, "coef_ext": dml_iivm_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index d291af29..533e6250 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -44,30 +44,30 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g
 
     kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_irm = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
 
-    DMLIRM.fit(store_predictions=True)
+    dml_irm.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_irm.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
-        ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-        ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+        ext_predictions["d"]["ml_g0"] = dml_irm.predictions["ml_g0"][:, :, 0]
+        ext_predictions["d"]["ml_g1"] = dml_irm.predictions["ml_g1"][:, :, 0]
         ml_g = dummy_regressor()
     else:
         ml_g = LinearRegression()
 
-    DMLIRM_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_irm_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
 
     np.random.seed(3141)
-    DMLIRM_ext.fit(external_predictions=ext_predictions)
+    dml_irm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIRM.coef[0], "coef_ext": DMLIRM_ext.coef[0]}
+    res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index a5a9a5bb..be4b9f73 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -44,25 +44,25 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ml_g = LogisticRegression()
     ml_m = LogisticRegression()
 
-    DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLLPQ.set_sample_splitting(all_smpls)
+    dml_lpq = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_lpq.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLLPQ.fit(store_predictions=True)
+    dml_lpq.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_m_z"] = DMLLPQ.predictions["ml_m_z"][:, :, 0]
-    ext_predictions["d"]["ml_m_d_z0"] = DMLLPQ.predictions["ml_m_d_z0"][:, :, 0]
-    ext_predictions["d"]["ml_m_d_z1"] = DMLLPQ.predictions["ml_m_d_z1"][:, :, 0]
-    ext_predictions["d"]["ml_g_du_z0"] = DMLLPQ.predictions["ml_g_du_z0"][:, :, 0]
-    ext_predictions["d"]["ml_g_du_z1"] = DMLLPQ.predictions["ml_g_du_z1"][:, :, 0]
+    ext_predictions["d"]["ml_m_z"] = dml_lpq.predictions["ml_m_z"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z0"] = dml_lpq.predictions["ml_m_d_z0"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z1"] = dml_lpq.predictions["ml_m_d_z1"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z0"] = dml_lpq.predictions["ml_g_du_z0"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z1"] = dml_lpq.predictions["ml_g_du_z1"][:, :, 0]
 
-    DMLLPLQ_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
-    DMLLPLQ_ext.set_sample_splitting(all_smpls)
+    dml_lpq_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    dml_lpq_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLLPLQ_ext.fit(external_predictions=ext_predictions)
+    dml_lpq_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLLPQ.coef, "coef_ext": DMLLPLQ_ext.coef}
+    res_dict = {"coef_normal": dml_lpq.coef, "coef_ext": dml_lpq_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index b9061498..562ebe7d 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -55,7 +55,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         if score == "IV-type":
             kwargs["ml_g"] = LinearRegression()
 
-        DMLPLIV = DoubleMLPLIV(
+        dml_pliv = DoubleMLPLIV(
             ml_m=LinearRegression(),
             ml_l=LinearRegression(),
             ml_r=LinearRegression(),
@@ -63,29 +63,29 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         )
         np.random.seed(3141)
 
-        DMLPLIV.fit(store_predictions=True)
+        dml_pliv.fit(store_predictions=True)
 
-        ext_predictions["d"]["ml_l"] = DMLPLIV.predictions["ml_l"][:, :, 0]
-        ext_predictions["d"]["ml_r"] = DMLPLIV.predictions["ml_r"][:, :, 0]
+        ext_predictions["d"]["ml_l"] = dml_pliv.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_r"] = dml_pliv.predictions["ml_r"][:, :, 0]
 
         if dim_z == 1:
-            ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
+            ext_predictions["d"]["ml_m"] = dml_pliv.predictions["ml_m"][:, :, 0]
             if score == "IV-type":
                 kwargs["ml_g"] = dummy_regressor()
-                ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
+                ext_predictions["d"]["ml_g"] = dml_pliv.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
                 ml_m_key = "ml_m_" + "Z" + str(instr + 1)
-                ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
+                ext_predictions["d"][ml_m_key] = dml_pliv.predictions[ml_m_key][:, :, 0]
 
-        DMLPLIV_ext = DoubleMLPLIV(
+        dml_pliv_ext = DoubleMLPLIV(
             ml_m=dummy_regressor(), ml_l=dummy_regressor(), ml_r=dummy_regressor(), **kwargs
         )
 
         np.random.seed(3141)
-        DMLPLIV_ext.fit(external_predictions=ext_predictions)
+        dml_pliv_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
+        res_dict = {"coef_normal": dml_pliv.coef, "coef_ext": dml_pliv_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index f1386e11..3946ca05 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -52,37 +52,37 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
     if plr_score == "IV-type":
         kwargs["ml_g"] = LinearRegression()
 
-    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
+    dml_plr = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
     np.random.seed(3141)
 
-    DMLPLR.fit(store_predictions=True)
+    dml_plr.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_plr.predictions["ml_m"][:, :, 0]
         ml_m = dummy_regressor()
     else:
         ml_m = LinearRegression()
 
     if set_ml_l_ext:
-        ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_l"] = dml_plr.predictions["ml_l"][:, :, 0]
         ml_l = dummy_regressor()
     else:
         ml_l = LinearRegression()
 
     if plr_score == "IV-type" and set_ml_g_ext:
-        ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
+        ext_predictions["d"]["ml_g"] = dml_plr.predictions["ml_g"][:, :, 0]
         kwargs["ml_g"] = dummy_regressor()
     elif plr_score == "IV-type" and not set_ml_g_ext:
         kwargs["ml_g"] = LinearRegression()
     else:
         pass
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
+    dml_plr_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
 
     np.random.seed(3141)
-    DMLPLR_ext.fit(external_predictions=ext_predictions)
+    dml_plr_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLPLR.coef, "coef_ext": DMLPLR_ext.coef}
+    res_dict = {"coef_normal": dml_plr.coef, "coef_ext": dml_plr_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 0f3c0bc7..a2962ab0 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -54,28 +54,28 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
     ml_m = LogisticRegression(random_state=42)
     ml_g = LogisticRegression(random_state=42)
 
-    DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLPQ.set_sample_splitting(all_smpls)
+    dml_pq = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLPQ.fit(store_predictions=True)
+    dml_pq.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_pq.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
-        ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+        ext_predictions["d"]["ml_g"] = dml_pq.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
     else:
         ml_g = LogisticRegression(random_state=42)
 
-    DMLPLQ_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLPLQ_ext.set_sample_splitting(all_smpls)
+    dml_pq_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLPLQ_ext.fit(external_predictions=ext_predictions)
+    dml_pq_ext.fit(external_predictions=ext_predictions)
 
     if set_ml_m_ext and not set_ml_g_ext:
         # adjust tolerance for the case that ml_m is set to external predictions
@@ -86,7 +86,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         tol_rel = 1e-9
         tol_abs = 1e-4
 
-    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
+    res_dict = {"coef_normal": dml_pq.coef, "coef_ext": dml_pq_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 

From ee0403744522ead8ba8ccc6b35d567403652ae20 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 08:06:00 +0100
Subject: [PATCH 67/67] reduce test warnings

---
 doubleml/tests/test_did_external_predictions.py   | 2 +-
 doubleml/tests/test_didcs_external_predictions.py | 2 +-
 doubleml/tests/test_iivm_external_predictions.py  | 2 +-
 doubleml/tests/test_lpq_external_predictions.py   | 2 +-
 doubleml/tests/test_pliv_external_predictions.py  | 2 +-
 doubleml/tests/test_plr_external_predictions.py   | 2 +-
 doubleml/tests/test_pq_external_predictions.py    | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 59a1e6f1..43138cf8 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -49,7 +49,7 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     np.random.seed(3141)
     dml_did_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_did.coef, "coef_ext": dml_did_ext.coef}
+    res_dict = {"coef_normal": dml_did.coef[0], "coef_ext": dml_did_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 90aafaef..a1ffda5e 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -52,7 +52,7 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     np.random.seed(3141)
     dml_did_cs_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_did_cs.coef, "coef_ext": dml_did_cs_ext.coef}
+    res_dict = {"coef_normal": dml_did_cs.coef[0], "coef_ext": dml_did_cs_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index fc9d3665..548f2297 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -59,7 +59,7 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     np.random.seed(3141)
     dml_iivm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_iivm.coef, "coef_ext": dml_iivm_ext.coef}
+    res_dict = {"coef_normal": dml_iivm.coef[0], "coef_ext": dml_iivm_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index be4b9f73..af46191f 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -62,7 +62,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     np.random.seed(3141)
     dml_lpq_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_lpq.coef, "coef_ext": dml_lpq_ext.coef}
+    res_dict = {"coef_normal": dml_lpq.coef[0], "coef_ext": dml_lpq_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index 562ebe7d..5a5eb097 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -85,7 +85,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         np.random.seed(3141)
         dml_pliv_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {"coef_normal": dml_pliv.coef, "coef_ext": dml_pliv_ext.coef}
+        res_dict = {"coef_normal": dml_pliv.coef[0], "coef_ext": dml_pliv_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index 3946ca05..9d776710 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -82,7 +82,7 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
     np.random.seed(3141)
     dml_plr_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_plr.coef, "coef_ext": dml_plr_ext.coef}
+    res_dict = {"coef_normal": dml_plr.coef[0], "coef_ext": dml_plr_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index a2962ab0..4f23800f 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -86,7 +86,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         tol_rel = 1e-9
         tol_abs = 1e-4
 
-    res_dict = {"coef_normal": dml_pq.coef, "coef_ext": dml_pq_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
+    res_dict = {"coef_normal": dml_pq.coef[0], "coef_ext": dml_pq_ext.coef[0], "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict