From 3e7aea55ec7e722aeb85fe4699f1a0010cf3f5dd Mon Sep 17 00:00:00 2001 From: leostimpfle Date: Sat, 26 Oct 2024 14:41:41 +0200 Subject: [PATCH 01/11] enable fepois.predict() with fixed effects if newdata == None --- pyfixest/estimation/fepois_.py | 4 ---- tests/test_vs_fixest.py | 33 ++++++++++++++++----------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 393fcd1e..c3d91aa4 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -392,10 +392,6 @@ def predict( np.ndarray A flat array with the predicted values of the regression model. """ - if self._has_fixef: - raise NotImplementedError( - "Prediction with fixed effects is not yet implemented for Poisson regression." - ) if newdata is not None: raise NotImplementedError( "Prediction with function argument `newdata` is not yet implemented for Poisson regression." diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py index 4587cd08..2d8ee360 100644 --- a/tests/test_vs_fixest.py +++ b/tests/test_vs_fixest.py @@ -424,23 +424,22 @@ def test_single_fit_fepois( check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint") check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance") - if not mod._has_fixef: - py_predict_response = mod.predict(type="response") # noqa: F841 - py_predict_link = mod.predict(type="link") # noqa: F841 - r_predict_response = stats.predict(r_fixest, type="response") # noqa: F841 - r_predict_link = stats.predict(r_fixest, type="link") # noqa: F841 - check_absolute_diff( - py_predict_response[0:5], - r_predict_response[0:5], - 1e-07, - "py_predict_response != r_predict_response", - ) - check_absolute_diff( - py_predict_link[0:5], - r_predict_link[0:5], - 1e-07, - "py_predict_link != r_predict_link", - ) + py_predict_response = mod.predict(type="response") # noqa: F841 + py_predict_link = mod.predict(type="link") # noqa: F841 + r_predict_response = stats.predict(r_fixest, type="response") # noqa: F841 + r_predict_link = stats.predict(r_fixest, type="link") # noqa: F841 + check_absolute_diff( + py_predict_response[0:5], + r_predict_response[0:5], + 1e-07, + "py_predict_response != r_predict_response", + ) + check_absolute_diff( + py_predict_link[0:5], + r_predict_link[0:5], + 1e-07, + "py_predict_link != r_predict_link", + ) @pytest.mark.slow From 9c8d00de65def2e0a42b7af69f3913553b766041 Mon Sep 17 00:00:00 2001 From: leostimpfle Date: Mon, 28 Oct 2024 07:42:37 +0100 Subject: [PATCH 02/11] newdata with fixed effects raises error --- pyfixest/estimation/fepois_.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index c3d91aa4..e5efcac3 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -392,11 +392,13 @@ def predict( np.ndarray A flat array with the predicted values of the regression model. """ - if newdata is not None: - raise NotImplementedError( - "Prediction with function argument `newdata` is not yet implemented for Poisson regression." - ) - return super().predict(newdata=newdata, type=type, atol=atol, btol=btol) + if newdata is not None and self._has_fixef: + raise NotImplementedError() + + y_hat = super().predict(newdata=newdata, type=type, atol=atol, btol=btol) + if newdata is not None and type == "response": + y_hat = np.exp(y_hat) + return y_hat def _check_for_separation( From 91c2df806e593b77248062658d4b856eec7c9011 Mon Sep 17 00:00:00 2001 From: leostimpfle Date: Sun, 10 Nov 2024 11:18:57 +0100 Subject: [PATCH 03/11] add test of fepois.predit with newdata --- tests/test_vs_fixest.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py index 70be2a25..74b6341e 100644 --- a/tests/test_vs_fixest.py +++ b/tests/test_vs_fixest.py @@ -5,11 +5,13 @@ import pytest import rpy2.robjects as ro from rpy2.robjects import pandas2ri +from typing import get_args # rpy2 imports from rpy2.robjects.packages import importr import pyfixest as pf +from pyfixest.estimation import literals from pyfixest.estimation.estimation import feols from pyfixest.estimation.FixestMulti_ import FixestMulti from pyfixest.utils.set_rpy2_path import update_r_paths @@ -33,6 +35,9 @@ iwls_maxiter = 25 iwls_tol = 1e-08 +# currently, bug when using predict with newdata and i() or C() or "^" syntax +blocked_transforms = ["i(", "^", "poly("] + ols_fmls = [ ("Y~X1"), ("Y~X1+X2"), @@ -279,10 +284,7 @@ def test_single_fit_feols( (py_resid)[0:5], (r_resid)[0:5], 1e-07, "py_resid != r_resid" ) - # currently, bug when using predict with newdata and i() or C() or "^" syntax - blocked_transforms = ["i(", "^", "poly("] blocked_transform_found = any(bt in fml for bt in blocked_transforms) - if blocked_transform_found: with pytest.raises(NotImplementedError): py_predict_newsample = mod.predict( @@ -487,6 +489,33 @@ def test_single_fit_fepois( "py_predict_link != r_predict_link", ) + # check prediction with newdata + blocked_transform_found = any(bt in fml for bt in blocked_transforms) + if blocked_transform_found: + with pytest.raises(NotImplementedError): + py_predict_newsample = mod.predict( + newdata=data.iloc[0:100], atol=1e-08, btol=1e-08 + ) + else: + for prediction_type in get_args(literals.PredictionType): + py_predict_newsample = mod.predict( + newdata=data.iloc[0:100], + type=prediction_type, + atol=1e-12, + btol=1e-12, + ) + r_predict_newsample = stats.predict( + r_fixest, + newdata=data_r.iloc[0:100], + type=prediction_type, + ) + check_absolute_diff( + na_omit(py_predict_newsample)[0:5], + na_omit(r_predict_newsample)[0:5], + 1e-07, + f"py_predict_newdata != r_predict_newdata when type == '{prediction_type}'", + ) + @pytest.mark.slow @pytest.mark.parametrize("dropna", [False]) From d10bb58de3b3d3b96439b179b50efff2b8c3c1f7 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 24 Dec 2024 14:38:48 +0100 Subject: [PATCH 04/11] change default of fepois.predict to response to align with fixest default --- pyfixest/estimation/fepois_.py | 2 +- tests/test_vs_fixest.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index dde8144e..270c89f8 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -349,7 +349,7 @@ def predict( newdata: Optional[DataFrameType] = None, atol: float = 1e-6, btol: float = 1e-6, - type: PredictionType = "link", + type: PredictionType = "response", ) -> np.ndarray: """ Return predicted values from regression model. diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py index 74b6341e..643d9dc0 100644 --- a/tests/test_vs_fixest.py +++ b/tests/test_vs_fixest.py @@ -1,11 +1,11 @@ import re +from typing import get_args import numpy as np import pandas as pd import pytest import rpy2.robjects as ro from rpy2.robjects import pandas2ri -from typing import get_args # rpy2 imports from rpy2.robjects.packages import importr @@ -472,10 +472,20 @@ def test_single_fit_fepois( check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance") if not mod._has_fixef: + py_predict_default = mod.predict() + r_predict_default = stats.predict(r_fixest) py_predict_response = mod.predict(type="response") py_predict_link = mod.predict(type="link") r_predict_response = stats.predict(r_fixest, type="response") r_predict_link = stats.predict(r_fixest, type="link") + + check_absolute_diff( + py_predict_default[0:5], + r_predict_default[0:5], + 1e-07, + "py_predict_default != r_predict_default", + ) + check_absolute_diff( py_predict_response[0:5], r_predict_response[0:5], From 3cb251b603e5737e04b7b4bc0994c674824efa9e Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 24 Dec 2024 15:35:31 +0100 Subject: [PATCH 05/11] cleanups --- docs/changelog.qmd | 3 +++ pyfixest/estimation/feols_.py | 3 ++- pyfixest/estimation/fepois_.py | 13 ++++++++----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/docs/changelog.qmd b/docs/changelog.qmd index 68205a1d..3e0749cb 100644 --- a/docs/changelog.qmd +++ b/docs/changelog.qmd @@ -2,6 +2,9 @@ ## PyFixest 0.28.0 (In Development, can be installed from github) +- Add support for the `newdata` argument for `Fepois.predict`. +- Changes `Fepois.predict()` defaults `type` argument to `response`, to match `fixest` defaults. +- Updates the `Fepois._Y_hat_response` attribute, which would previously return `Fepois._Y_hat_link`. - Fix a bug that caused reindexing of `LPDID._coeftable` when calling `LPDID.iplot()`. As a result, a second call of `LPDID.iplot()` would fail. ## PyFixest 0.27.0 diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index e4092224..7214e8ff 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -1766,7 +1766,7 @@ def predict( newdata: Optional[DataFrameType] = None, atol: float = 1e-6, btol: float = 1e-6, - type: PredictionType = "link", + type: PredictionType = "response", ) -> np.ndarray: """ Predict values of the model on new data. @@ -1809,6 +1809,7 @@ def predict( _validate_literal_argument(type, PredictionType) + # import pdb; pdb.set_trace() if newdata is None: if type == "link" or self._method == "feols": return self._Y_hat_link diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 76d00e28..0ab1043b 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -292,9 +292,8 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray): stop_iterating = crit < _tol self._beta_hat = delta_new.flatten() - self._Y_hat_response = mu.flatten() - self._Y_hat_link = eta.flatten() - # (Y - self._Y_hat) + self._Y_hat_link = mu.flatten() + self._Y_hat_response = np.exp(self._Y_hat_link) # needed for the calculation of the vcov # update for inference @@ -393,11 +392,15 @@ def predict( A flat array with the predicted values of the regression model. """ if newdata is not None and self._has_fixef: - raise NotImplementedError() + raise NotImplementedError( + "Prediction with new fixed effects is not supported for Poisson regression." + ) y_hat = super().predict(newdata=newdata, type=type, atol=atol, btol=btol) - if newdata is not None and type == "response": + + if type == "response" and self._has_fixef and newdata is not None: y_hat = np.exp(y_hat) + return y_hat From eea2fbc8219c22b27ea7f4a760fffba7fae0756c Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 24 Dec 2024 16:35:26 +0100 Subject: [PATCH 06/11] fix link, response y hat --- pyfixest/estimation/feols_.py | 1 - pyfixest/estimation/fepois_.py | 9 ++++----- pyfixest/estimation/model_matrix_fixest_.py | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 7214e8ff..4fb935b0 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -1809,7 +1809,6 @@ def predict( _validate_literal_argument(type, PredictionType) - # import pdb; pdb.set_trace() if newdata is None: if type == "link" or self._method == "feols": return self._Y_hat_link diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 0ab1043b..5c64c65c 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -292,8 +292,8 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray): stop_iterating = crit < _tol self._beta_hat = delta_new.flatten() - self._Y_hat_link = mu.flatten() - self._Y_hat_response = np.exp(self._Y_hat_link) + self._Y_hat_link = np.log(mu.flatten()) + self._Y_hat_response = mu.flatten() # needed for the calculation of the vcov # update for inference @@ -396,9 +396,8 @@ def predict( "Prediction with new fixed effects is not supported for Poisson regression." ) - y_hat = super().predict(newdata=newdata, type=type, atol=atol, btol=btol) - - if type == "response" and self._has_fixef and newdata is not None: + y_hat = super().predict(newdata=newdata, type="link", atol=atol, btol=btol) + if type == "response": y_hat = np.exp(y_hat) return y_hat diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py index 0942cbfe..9b8e379e 100644 --- a/pyfixest/estimation/model_matrix_fixest_.py +++ b/pyfixest/estimation/model_matrix_fixest_.py @@ -209,11 +209,11 @@ def model_matrix_fixest( } -def _get_na_index(N: int, Y_index: pd.Series) -> np.ndarray: +def _get_na_index(N: int, Y_index: pd.Series[int]) -> np.ndarray: all_indices = np.arange(N) max_index = all_indices.max() + 1 mask = np.ones(max_index, dtype=bool) - Y_index_arr = Y_index.to_numpy() + Y_index_arr = Y_index.to_numpy(dtype=int) mask[Y_index_arr] = False na_index = np.nonzero(mask)[0] return na_index From 7c1422351eb0d7becd14d6f3ebcdf54d4fc5dab8 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 24 Dec 2024 16:35:32 +0100 Subject: [PATCH 07/11] fix link, response y hat --- pyfixest/estimation/model_matrix_fixest_.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py index 9b8e379e..4cf205f1 100644 --- a/pyfixest/estimation/model_matrix_fixest_.py +++ b/pyfixest/estimation/model_matrix_fixest_.py @@ -208,13 +208,12 @@ def model_matrix_fixest( "X_is_empty": X_is_empty, } - def _get_na_index(N: int, Y_index: pd.Series[int]) -> np.ndarray: all_indices = np.arange(N) max_index = all_indices.max() + 1 mask = np.ones(max_index, dtype=bool) - Y_index_arr = Y_index.to_numpy(dtype=int) - mask[Y_index_arr] = False + Y_index_arr: np.ndarray = Y_index.to_numpy().astype(int) + mask[Y_index_arr] = False # type: ignore[index] na_index = np.nonzero(mask)[0] return na_index From bdd956c288705ed0fda58fb526b442993696e23c Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 24 Dec 2024 17:33:04 +0100 Subject: [PATCH 08/11] fixef predict gets close --- pyfixest/estimation/feols_.py | 8 ++++---- pyfixest/estimation/fepois_.py | 9 ++++----- pyfixest/estimation/model_matrix_fixest_.py | 8 +++++--- tests/test_vs_fixest.py | 16 ++++++++-------- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 4fb935b0..7b55d3f2 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -1810,10 +1810,10 @@ def predict( _validate_literal_argument(type, PredictionType) if newdata is None: - if type == "link" or self._method == "feols": - return self._Y_hat_link - else: - return self._Y_hat_response + #if type == "link" or self._method == "feols": + return self._Y_hat_link + #else: + # return self._Y_hat_response newdata = _narwhals_to_pandas(newdata).reset_index(drop=False) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 5c64c65c..779ab7bd 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -391,17 +391,16 @@ def predict( np.ndarray A flat array with the predicted values of the regression model. """ - if newdata is not None and self._has_fixef: - raise NotImplementedError( - "Prediction with new fixed effects is not supported for Poisson regression." - ) y_hat = super().predict(newdata=newdata, type="link", atol=atol, btol=btol) + if type == "response": y_hat = np.exp(y_hat) - return y_hat + if self._has_fixef and newdata is not None: + y_hat = np.log(y_hat) + return y_hat def _check_for_separation( fml: str, diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py index 4cf205f1..09e61668 100644 --- a/pyfixest/estimation/model_matrix_fixest_.py +++ b/pyfixest/estimation/model_matrix_fixest_.py @@ -208,12 +208,14 @@ def model_matrix_fixest( "X_is_empty": X_is_empty, } -def _get_na_index(N: int, Y_index: pd.Series[int]) -> np.ndarray: + +def _get_na_index(N: int, Y_index: pd.Series) -> np.ndarray: all_indices = np.arange(N) + max_index = all_indices.max() + 1 mask = np.ones(max_index, dtype=bool) - Y_index_arr: np.ndarray = Y_index.to_numpy().astype(int) - mask[Y_index_arr] = False # type: ignore[index] + Y_index_arr = Y_index.to_numpy() + mask[Y_index_arr] = False na_index = np.nonzero(mask)[0] return na_index diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py index 643d9dc0..07a6f3ec 100644 --- a/tests/test_vs_fixest.py +++ b/tests/test_vs_fixest.py @@ -471,31 +471,31 @@ def test_single_fit_fepois( check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint") check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance") - if not mod._has_fixef: - py_predict_default = mod.predict() + if True: + py_predict_default = mod.predict(atol = 1e-12, btol = 1e-12) r_predict_default = stats.predict(r_fixest) - py_predict_response = mod.predict(type="response") + py_predict_response = mod.predict(type="response", atol = 1e-12, btol = 1e-12) py_predict_link = mod.predict(type="link") - r_predict_response = stats.predict(r_fixest, type="response") + r_predict_response = stats.predict(r_fixest, type="response", atol = 1e-12, btol = 1e-12) r_predict_link = stats.predict(r_fixest, type="link") check_absolute_diff( py_predict_default[0:5], r_predict_default[0:5], - 1e-07, + 1e-04 if mod._has_fixef else 1e-07, "py_predict_default != r_predict_default", ) check_absolute_diff( py_predict_response[0:5], r_predict_response[0:5], - 1e-07, + 1e-03 if mod._has_fixef else 1e-07, "py_predict_response != r_predict_response", ) check_absolute_diff( py_predict_link[0:5], r_predict_link[0:5], - 1e-07, + 1e-03 if mod._has_fixef else 1e-07, "py_predict_link != r_predict_link", ) @@ -522,7 +522,7 @@ def test_single_fit_fepois( check_absolute_diff( na_omit(py_predict_newsample)[0:5], na_omit(r_predict_newsample)[0:5], - 1e-07, + 1e-03 if mod._has_fixef else 1e-07, f"py_predict_newdata != r_predict_newdata when type == '{prediction_type}'", ) From e501b54118efad48049dade77cfa98022aa587e1 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Wed, 25 Dec 2024 13:18:47 +0100 Subject: [PATCH 09/11] enable newdata without fixed effects for fepois --- pyfixest/estimation/feols_.py | 10 ++++++---- pyfixest/estimation/fepois_.py | 11 ++++++++--- tests/test_predict_resid_fixef.py | 23 +++++++++++++++++++++-- tests/test_vs_fixest.py | 12 +++++++----- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 7b55d3f2..58825557 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -1775,6 +1775,11 @@ def predict( If new fixed effect levels are introduced in `newdata`, predicted values for such observations will be set to NaN. + The method always returns predictions for the "link" function; for linear + models, this is identical to the "response" function. Transformations to + "response" functions for models where this is not the case - GLMs - + this happens in the dedicated predict method of the respective GLM class. + Parameters ---------- newdata : Optional[DataFrameType], optional @@ -1810,10 +1815,7 @@ def predict( _validate_literal_argument(type, PredictionType) if newdata is None: - #if type == "link" or self._method == "feols": return self._Y_hat_link - #else: - # return self._Y_hat_response newdata = _narwhals_to_pandas(newdata).reset_index(drop=False) @@ -1835,7 +1837,7 @@ def predict( if self._has_fixef: if self._sumFE is None: - self.fixef(atol, btol) + self.fixef(atol=atol, btol=btol) fvals = self._fixef.split("+") df_fe = newdata[fvals].astype(str) # populate fixed effect dicts with omitted categories handling diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 779ab7bd..0271e5ff 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -292,7 +292,7 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray): stop_iterating = crit < _tol self._beta_hat = delta_new.flatten() - self._Y_hat_link = np.log(mu.flatten()) + self._Y_hat_link = eta.flatten() self._Y_hat_response = mu.flatten() # needed for the calculation of the vcov @@ -391,17 +391,22 @@ def predict( np.ndarray A flat array with the predicted values of the regression model. """ + if self._has_fixef and newdata is not None: + raise NotImplementedError( + "Predictions with new data and fixed effect are not yet supported." + ) y_hat = super().predict(newdata=newdata, type="link", atol=atol, btol=btol) if type == "response": y_hat = np.exp(y_hat) - if self._has_fixef and newdata is not None: - y_hat = np.log(y_hat) + # if self._has_fixef and newdata is not None: + # y_hat = np.log(y_hat) return y_hat + def _check_for_separation( fml: str, data: pd.DataFrame, diff --git a/tests/test_predict_resid_fixef.py b/tests/test_predict_resid_fixef.py index 18aa711f..1707d2a8 100644 --- a/tests/test_predict_resid_fixef.py +++ b/tests/test_predict_resid_fixef.py @@ -49,10 +49,16 @@ def test_ols_prediction_internally(data, fml, weights): Currently only for OLS. """ # predict via feols, without fixed effect + + data = data.dropna() + mod = feols(fml=fml, data=data, vcov="iid", weights=weights) original_prediction = mod.predict() updated_prediction = mod.predict(newdata=mod._data) - np.allclose(original_prediction, updated_prediction) + + assert np.allclose( + original_prediction, updated_prediction + ), "preditction with newdata should be identical" assert mod._data.shape[0] == original_prediction.shape[0] assert mod._data.shape[0] == updated_prediction.shape[0] @@ -62,9 +68,22 @@ def test_ols_prediction_internally(data, fml, weights): np.allclose(original_prediction, updated_prediction) -@pytest.mark.parametrize("fml", ["Y ~ X1", "Y~X1 |f1", "Y ~ X1 | f1 + f2"]) +@pytest.mark.parametrize("fml", ["Y ~ X1", "Y ~ X1*X2", "Y~X1 |f1", "Y ~ X1 | f1 + f2"]) @pytest.mark.parametrize("weights", ["weights"]) def test_poisson_prediction_internally(data, weights, fml): + data = data.dropna() + mod = fepois(fml=fml, data=data, vcov="iid") + original_prediction = mod.predict() + + if mod._has_fixef: + with pytest.raises(NotImplementedError): + updated_prediction = mod.predict(newdata=mod._data) + else: + updated_prediction = mod.predict(newdata=mod._data) + assert np.allclose( + original_prediction, updated_prediction + ), "preditction with newdata should be identical" + with pytest.raises(TypeError): fit = fepois(fml=fml, data=data, vcov="hetero", weights=weights) fit.predict(newdata=fit._data) diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py index 07a6f3ec..1839608d 100644 --- a/tests/test_vs_fixest.py +++ b/tests/test_vs_fixest.py @@ -384,7 +384,7 @@ def test_single_fit_feols_empty( @pytest.mark.slow -@pytest.mark.parametrize("dropna", [False]) +@pytest.mark.parametrize("dropna", [False, True]) @pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}]) @pytest.mark.parametrize("f3_type", ["str"]) @pytest.mark.parametrize("fml", ols_fmls) @@ -471,12 +471,14 @@ def test_single_fit_fepois( check_absolute_diff(py_confint, r_confint, 1e-06, "py_confint != r_confint") check_absolute_diff(py_deviance, r_deviance, 1e-08, "py_deviance != r_deviance") - if True: - py_predict_default = mod.predict(atol = 1e-12, btol = 1e-12) + if not mod._has_fixef: + py_predict_default = mod.predict(atol=1e-12, btol=1e-12) r_predict_default = stats.predict(r_fixest) - py_predict_response = mod.predict(type="response", atol = 1e-12, btol = 1e-12) + py_predict_response = mod.predict(type="response", atol=1e-12, btol=1e-12) py_predict_link = mod.predict(type="link") - r_predict_response = stats.predict(r_fixest, type="response", atol = 1e-12, btol = 1e-12) + r_predict_response = stats.predict( + r_fixest, type="response", atol=1e-12, btol=1e-12 + ) r_predict_link = stats.predict(r_fixest, type="link") check_absolute_diff( From c81f02120b2b7fb00141a56600e0f20113d353ee Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Wed, 25 Dec 2024 13:44:45 +0100 Subject: [PATCH 10/11] fix mypy error --- pyfixest/estimation/feols_.py | 10 +++++----- pyfixest/estimation/model_matrix_fixest_.py | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 58825557..f778a686 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -3,7 +3,7 @@ import re import warnings from importlib import import_module -from typing import Literal, Optional, Union +from typing import Any, Literal, Optional, Union import numba as nb import numpy as np @@ -1844,9 +1844,9 @@ def predict( fixef_dicts = {} for f in fvals: fdict = self._fixef_dict[f"C({f})"] - omitted_cat = set(self._data[f].unique().astype(str).tolist()) - set( - fdict.keys() - ) + omitted_cat = { + str(x) for x in self._data[f].unique() if str(x) not in fdict + } if omitted_cat: fdict.update({x: 0 for x in omitted_cat}) fixef_dicts[f"C({f})"] = fdict @@ -2514,7 +2514,7 @@ def _get_vcov_type(vcov: str, fval: str): def _drop_multicollinear_variables( X: np.ndarray, names: list[str], collin_tol: float -) -> tuple[np.ndarray, list[str], list[str], list[int]]: +) -> Any: """ Check for multicollinearity in the design matrices X and Z. diff --git a/pyfixest/estimation/model_matrix_fixest_.py b/pyfixest/estimation/model_matrix_fixest_.py index 09e61668..581b3bd8 100644 --- a/pyfixest/estimation/model_matrix_fixest_.py +++ b/pyfixest/estimation/model_matrix_fixest_.py @@ -209,14 +209,15 @@ def model_matrix_fixest( } -def _get_na_index(N: int, Y_index: pd.Series) -> np.ndarray: +def _get_na_index(N: int, Y_index: pd.Index) -> np.ndarray: all_indices = np.arange(N) max_index = all_indices.max() + 1 mask = np.ones(max_index, dtype=bool) Y_index_arr = Y_index.to_numpy() + mask[Y_index_arr] = False - na_index = np.nonzero(mask)[0] + na_index = np.flatnonzero(mask) return na_index From 3d84dff865537b40f1d5aaf4bd4169098a37ef71 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Wed, 25 Dec 2024 13:47:42 +0100 Subject: [PATCH 11/11] fix mypy error --- pyfixest/estimation/feols_.py | 4 +--- pyfixest/utils/dev_utils.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index f778a686..e0405c32 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -1844,9 +1844,7 @@ def predict( fixef_dicts = {} for f in fvals: fdict = self._fixef_dict[f"C({f})"] - omitted_cat = { - str(x) for x in self._data[f].unique() if str(x) not in fdict - } + omitted_cat = {str(x) for x in self._data[f].unique() if x not in fdict} if omitted_cat: fdict.update({x: 0 for x in omitted_cat}) fixef_dicts[f"C({f})"] = fdict diff --git a/pyfixest/utils/dev_utils.py b/pyfixest/utils/dev_utils.py index bc8966f0..5f200624 100644 --- a/pyfixest/utils/dev_utils.py +++ b/pyfixest/utils/dev_utils.py @@ -167,7 +167,7 @@ def _drop_cols(_data: pd.DataFrame, na_index: np.ndarray): return _data -def _extract_variable_level(fe_string: str): +def _extract_variable_level(fe_string: str) -> tuple[str, str]: """ Extract the variable and level from a given string.