py-econometrics · s3alfisc · Jul 21, 2024 · Jun 28, 2024 · Jul 1, 2024 · Jul 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -22,4 +22,5 @@ readme.ipynb
 #objects.json
 #docs/site_libs
 #site_libs
+tests/.coverage
 
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -1358,7 +1358,7 @@ def ccv(
             n_splits=n_splits,
         )
 
-    def fixef(self) -> dict[str, dict[str, float]]:
+    def fixef(self, atol : float = 1e-06, btol : float = 1e-06) -> dict[str, dict[str, float]]:
         """
         Compute the coefficients of (swept out) fixed effects for a regression model.
 
@@ -1404,13 +1404,13 @@ def fixef(self) -> dict[str, dict[str, float]]:
             X = X[self._coefnames]  # drop intercept, potentially multicollinear vars
             Y = Y.to_numpy().flatten().astype(np.float64)
             X = X.to_numpy()
-            uhat = csr_matrix(Y - X @ self._beta_hat).transpose()
+            uhat = (Y - X @ self._beta_hat).flatten()
 
         D2 = Formula("-1+" + fixef_fml).get_model_matrix(_data, output="sparse")
         cols = D2.model_spec.column_names
 
-        alpha = spsolve(D2.transpose() @ D2, D2.transpose() @ uhat)
-
+        alpha = lsqr(D2, uhat, atol=atol, btol=btol)[0]
+        
         res: dict[str, dict[str, float]] = {}
         for i, col in enumerate(cols):
             variable, level = _extract_variable_level(col)
@@ -1429,7 +1429,7 @@ def fixef(self) -> dict[str, dict[str, float]]:
 
         return self._fixef_dict
 
-    def predict(self, newdata: Optional[DataFrameType] = None) -> np.ndarray:
+    def predict(self, newdata: Optional[DataFrameType] = None, atol: float = 1e-6, btol: float = 1e-6) -> np.ndarray:
         """
         Predict values of the model on new data.
 
@@ -1442,6 +1442,12 @@ def predict(self, newdata: Optional[DataFrameType] = None) -> np.ndarray:
         newdata : Optional[DataFrameType], optional
             A pd.DataFrame or pl.DataFrame with the data to be used for prediction.
             If None (default), the data used for fitting the model is used.
+        atol : Float, default 1e-6
+            Stopping tolerance for scipy.sparse.linalg.lsqr(). 
+            See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
+        btol : Float, default 1e-6
+            Another stopping tolerance for scipy.sparse.linalg.lsqr().
+            See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
 
         Returns
         -------
@@ -1472,7 +1478,7 @@ def predict(self, newdata: Optional[DataFrameType] = None) -> np.ndarray:
 
         if self._has_fixef:
             if self._sumFE is None:
-                self.fixef()
+                self.fixef(atol, btol)
             fvals = self._fixef.split("+")
             df_fe = newdata[fvals].astype(str)
             # populate fixed effect dicts with omitted categories handling

diff --git a/tests/.coverage b/tests/.coverage
diff --git a/tests/test_did.py b/tests/test_did.py
@@ -47,8 +47,8 @@ def test_event_study(data):
     did2s_df = pd.DataFrame(did2s_df).T
 
     if True:
-        np.testing.assert_allclose(fit_did2s.coef(), stats.coef(fit_did2s_r))
-        np.testing.assert_allclose(fit_did2s.se(), float(did2s_df[2]))
+        np.testing.assert_allclose(fit_did2s.coef(), stats.coef(fit_did2s_r), atol = 1e-05, rtol = 1e-05)
+        np.testing.assert_allclose(fit_did2s.se(), float(did2s_df[2]), atol = 1e-05, rtol = 1e-05)
 
 
 def test_did2s(data):
@@ -78,8 +78,8 @@ def test_did2s(data):
     did2s_df = broom.tidy_fixest(fit_did2s_r, conf_int=ro.BoolVector([True]))
     did2s_df = pd.DataFrame(did2s_df).T
 
-    np.testing.assert_allclose(fit_did2s.coef(), stats.coef(fit_did2s_r))
-    np.testing.assert_allclose(fit_did2s.se(), float(did2s_df[2]))
+    np.testing.assert_allclose(fit_did2s.coef(), stats.coef(fit_did2s_r), atol = 1e-05, rtol = 1e-05)
+    np.testing.assert_allclose(fit_did2s.se(), float(did2s_df[2]), atol = 1e-05, rtol = 1e-05)
 
     if True:
         # ATT, event study
@@ -105,8 +105,8 @@ def test_did2s(data):
         did2s_df = broom.tidy_fixest(fit_r, conf_int=ro.BoolVector([True]))
         did2s_df = pd.DataFrame(did2s_df).T
 
-        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r))
-        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float))
+        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r), atol = 1e-05, rtol = 1e-05)
+        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float), atol = 1e-05, rtol = 1e-05)
 
     if True:
         # test event study with covariate in first stage
@@ -131,8 +131,8 @@ def test_did2s(data):
         did2s_df = broom.tidy_fixest(fit_r, conf_int=ro.BoolVector([True]))
         did2s_df = pd.DataFrame(did2s_df).T
 
-        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r))
-        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float))
+        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r), atol = 1e-05, rtol = 1e-05)
+        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float), atol = 1e-05, rtol = 1e-05)
 
     if True:
         # test event study with covariate in first stage and second stage
@@ -157,8 +157,8 @@ def test_did2s(data):
         did2s_df = broom.tidy_fixest(fit_r, conf_int=ro.BoolVector([True]))
         did2s_df = pd.DataFrame(did2s_df).T
 
-        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r))
-        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float))
+        np.testing.assert_allclose(fit.coef(), stats.coef(fit_r), atol = 1e-05, rtol = 1e-05)
+        np.testing.assert_allclose(fit.se(), did2s_df[2].values.astype(float), atol = 1e-05, rtol = 1e-05)
 
     if True:
         # binary non boolean treatment variable, just check that it runs

diff --git a/tests/test_predict_resid_fixef.py b/tests/test_predict_resid_fixef.py
@@ -174,7 +174,7 @@ def test_predict_nas():
     res = fit.predict(newdata=data)
     fit_r = fixest.feols(ro.Formula(fml), data=data)
     res_r = stats.predict(fit_r, newdata=data)
-    np.testing.assert_allclose(res, res_r)
+    np.testing.assert_allclose(res, res_r, atol=1e-05, rtol=1e-05)
     assert data.shape[0] == len(res)
     assert len(res) == len(res_r)
 
@@ -187,14 +187,14 @@ def test_predict_nas():
     res = fit.predict(newdata=newdata)
     fit_r = fixest.feols(ro.Formula(fml), data=data)
     res_r = stats.predict(fit_r, newdata=newdata)
-    np.testing.assert_allclose(res, res_r)
+    np.testing.assert_allclose(res, res_r, atol=1e-05, rtol=1e-05)
     assert newdata.shape[0] == len(res)
     assert len(res) == len(res_r)
 
     newdata.loc[198, "Y"] = np.nan
     res = fit.predict(newdata=newdata)
     res_r = stats.predict(fit_r, newdata=newdata)
-    np.testing.assert_allclose(res, res_r)
+    np.testing.assert_allclose(res, res_r, atol=1e-05, rtol=1e-05)
     assert newdata.shape[0] == len(res)
     assert len(res) == len(res_r)
 
@@ -204,7 +204,7 @@ def test_predict_nas():
     res = fit.predict(newdata=data)
     fit_r = fixest.feols(ro.Formula(fml), data=data)
     res_r = stats.predict(fit_r, newdata=data)
-    np.testing.assert_allclose(res, res_r)
+    np.testing.assert_allclose(res, res_r, atol=1e-05, rtol=1e-05)
     assert data.shape[0] == len(res)
     assert len(res) == len(res_r)