py-econometrics · s3alfisc · Jul 12, 2024 · Jul 12, 2024
diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py
@@ -24,6 +24,7 @@ def __init__(
         data: DataFrameType,
         copy_data: bool,
         store_data: bool,
+        lean: bool,
         fixef_tol: float,
         weights_type: str,
     ) -> None:
@@ -38,6 +39,8 @@ def __init__(
             Whether to copy the data or not.
         store_data : bool
             Whether to store the data in the resulting model object or not.
+        lean: bool
+            Whether to store large-memory objects in the resulting model object or not.
         fixef_tol: float
             The tolerance for the convergence of the demeaning algorithm.
         weights_type: str
@@ -51,6 +54,7 @@ def __init__(
         """
         self._copy_data = copy_data
         self._store_data = store_data
+        self._lean = lean
         self._fixef_tol = fixef_tol
         self._weights_type = weights_type
 
@@ -189,6 +193,7 @@ def _estimate_all_models(
         _has_fixef = False
         _fixef_tol = self._fixef_tol
         _weights_type = self._weights_type
+        _lean = self._lean
 
         FixestFormulaDict = self.FixestFormulaDict
         _fixef_keys = list(FixestFormulaDict.keys())
@@ -413,6 +418,9 @@ def _estimate_all_models(
                     else:
                         FIT._icovars = None
 
+                if _lean:
+                    FIT._clear_attributes()
+
                     # store fitted model
                 self.all_fitted_models[FixestFormula.fml] = FIT
 

diff --git a/pyfixest/estimation/estimation.py b/pyfixest/estimation/estimation.py
@@ -23,6 +23,7 @@ def feols(
     i_ref1=None,
     copy_data: bool = True,
     store_data: bool = True,
+    lean: bool = False,
     weights_type: str = "aweights",
 ) -> Union[Feols, FixestMulti]:
     """
@@ -87,6 +88,13 @@ def feols(
         impact on post-estimation capabilities that rely on the data, e.g. `predict()`
         or `vcov()`.
 
+    lean: bool, optional
+        False by default. If True, then all large objects are removed from the
+        returned result: this will save memory but will block the possibility
+        to use many methods. It is recommended to use the argument vcov
+        to obtain the appropriate standard-errors at estimation time,
+        since obtaining different SEs won't be possible afterwards.
+
     weights_type: str, optional
         Options include `aweights` or `fweights`. `aweights` implement analytic or
         precision weights, while `fweights` implement frequency weights. For details
@@ -319,6 +327,7 @@ class for multiple models specified via `fml`.
         collin_tol=collin_tol,
         copy_data=copy_data,
         store_data=store_data,
+        lean=lean,
         fixef_tol=fixef_tol,
         weights_type=weights_type,
     )
@@ -327,6 +336,7 @@ class for multiple models specified via `fml`.
         data=data,
         copy_data=copy_data,
         store_data=store_data,
+        lean=lean,
         fixef_tol=fixef_tol,
         weights_type=weights_type,
     )
@@ -358,6 +368,7 @@ def fepois(
     i_ref1=None,
     copy_data: bool = True,
     store_data: bool = True,
+    lean: bool = False,
 ) -> Union[Feols, Fepois, FixestMulti]:
     """
     Estimate Poisson regression model with fixed effects using the `ppmlhdfe` algorithm.
@@ -427,6 +438,13 @@ def fepois(
         impact on post-estimation capabilities that rely on the data, e.g. `predict()`
         or `vcov()`.
 
+    lean: bool, optional
+        False by default. If True, then all large objects are removed from the
+        returned result: this will save memory but will block the possibility
+        to use many methods. It is recommended to use the argument vcov
+        to obtain the appropriate standard-errors at estimation time,
+        since obtaining different SEs won't be possible afterwards.
+
     Returns
     -------
     object
@@ -473,6 +491,7 @@ def fepois(
         collin_tol=collin_tol,
         copy_data=copy_data,
         store_data=store_data,
+        lean=lean,
         fixef_tol=fixef_tol,
         weights_type=weights_type,
     )
@@ -481,6 +500,7 @@ def fepois(
         data=data,
         copy_data=copy_data,
         store_data=store_data,
+        lean=lean,
         fixef_tol=fixef_tol,
         weights_type=weights_type,
     )
@@ -516,6 +536,7 @@ def _estimation_input_checks(
     collin_tol: float,
     copy_data: bool,
     store_data: bool,
+    lean: bool,
     fixef_tol: float,
     weights_type: str,
 ):
@@ -550,11 +571,10 @@ def _estimation_input_checks(
     if weights is not None:
         assert weights in data.columns, "weights must be a column in data"
 
-    if not isinstance(copy_data, bool):
-        raise TypeError("copy_data must be a boolean")
-
-    if not isinstance(store_data, bool):
-        raise TypeError("store_data must be a boolean")
+    bool_args = [copy_data, store_data, lean]
+    for arg in bool_args:
+        if not isinstance(arg, bool):
+            raise TypeError(f"The function argument {arg} must be of type bool.")
 
     if not isinstance(fixef_tol, float):
         raise TypeError(

diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -1,4 +1,5 @@
 import functools
+import gc
 import re
 import warnings
 from importlib import import_module
@@ -795,6 +796,30 @@ def add_fixest_multi_context(
         else:
             self._has_fixef = False
 
+    def _clear_attributes(self):
+        attributes = [
+            "_X",
+            "_Y",
+            "_Z",
+            "_data",
+            "_cluster_df",
+            "_tXZ",
+            "_tZy",
+            "_tZX",
+            "_weights",
+            "_scores",
+            "_tZZinv",
+            "_u_hat",
+            "_Y_hat_link",
+            "_Y_hat_response",
+            "_Y_untransformed",
+        ]
+
+        for attr in attributes:
+            if hasattr(self, attr):
+                delattr(self, attr)
+        gc.collect()
+
     def wald_test(self, R=None, q=None, distribution="F"):
         """
         Conduct Wald test.

diff --git a/tests/test_api.py b/tests/test_api.py
@@ -62,3 +62,12 @@ def test_fepois_args():
 
     assert fit1.coef().xs("X1") != fit3.coef().xs("X1")
     assert np.abs(fit1.coef().xs("X1") - fit3.coef().xs("X1")) < 0.01
+
+
+def test_lean():
+    data = pf.get_data()
+    fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)
+
+    assert not hasattr(fit, "_data")
+    assert not hasattr(fit, "_X")
+    assert not hasattr(fit, "_Y")