Skip to content

Commit

Permalink
add lean function argument (#548)
Browse files Browse the repository at this point in the history
  • Loading branch information
s3alfisc authored Jul 12, 2024
1 parent ca1200e commit 3ab4397
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 5 deletions.
8 changes: 8 additions & 0 deletions pyfixest/estimation/FixestMulti_.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
data: DataFrameType,
copy_data: bool,
store_data: bool,
lean: bool,
fixef_tol: float,
weights_type: str,
) -> None:
Expand All @@ -38,6 +39,8 @@ def __init__(
Whether to copy the data or not.
store_data : bool
Whether to store the data in the resulting model object or not.
lean: bool
Whether to store large-memory objects in the resulting model object or not.
fixef_tol: float
The tolerance for the convergence of the demeaning algorithm.
weights_type: str
Expand All @@ -51,6 +54,7 @@ def __init__(
"""
self._copy_data = copy_data
self._store_data = store_data
self._lean = lean
self._fixef_tol = fixef_tol
self._weights_type = weights_type

Expand Down Expand Up @@ -189,6 +193,7 @@ def _estimate_all_models(
_has_fixef = False
_fixef_tol = self._fixef_tol
_weights_type = self._weights_type
_lean = self._lean

FixestFormulaDict = self.FixestFormulaDict
_fixef_keys = list(FixestFormulaDict.keys())
Expand Down Expand Up @@ -413,6 +418,9 @@ def _estimate_all_models(
else:
FIT._icovars = None

if _lean:
FIT._clear_attributes()

# store fitted model
self.all_fitted_models[FixestFormula.fml] = FIT

Expand Down
30 changes: 25 additions & 5 deletions pyfixest/estimation/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def feols(
i_ref1=None,
copy_data: bool = True,
store_data: bool = True,
lean: bool = False,
weights_type: str = "aweights",
) -> Union[Feols, FixestMulti]:
"""
Expand Down Expand Up @@ -87,6 +88,13 @@ def feols(
impact on post-estimation capabilities that rely on the data, e.g. `predict()`
or `vcov()`.
lean: bool, optional
False by default. If True, then all large objects are removed from the
returned result: this will save memory but will block the possibility
to use many methods. It is recommended to use the argument vcov
to obtain the appropriate standard-errors at estimation time,
since obtaining different SEs won't be possible afterwards.
weights_type: str, optional
Options include `aweights` or `fweights`. `aweights` implement analytic or
precision weights, while `fweights` implement frequency weights. For details
Expand Down Expand Up @@ -319,6 +327,7 @@ class for multiple models specified via `fml`.
collin_tol=collin_tol,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand All @@ -327,6 +336,7 @@ class for multiple models specified via `fml`.
data=data,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand Down Expand Up @@ -358,6 +368,7 @@ def fepois(
i_ref1=None,
copy_data: bool = True,
store_data: bool = True,
lean: bool = False,
) -> Union[Feols, Fepois, FixestMulti]:
"""
Estimate Poisson regression model with fixed effects using the `ppmlhdfe` algorithm.
Expand Down Expand Up @@ -427,6 +438,13 @@ def fepois(
impact on post-estimation capabilities that rely on the data, e.g. `predict()`
or `vcov()`.
lean: bool, optional
False by default. If True, then all large objects are removed from the
returned result: this will save memory but will block the possibility
to use many methods. It is recommended to use the argument vcov
to obtain the appropriate standard-errors at estimation time,
since obtaining different SEs won't be possible afterwards.
Returns
-------
object
Expand Down Expand Up @@ -473,6 +491,7 @@ def fepois(
collin_tol=collin_tol,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand All @@ -481,6 +500,7 @@ def fepois(
data=data,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand Down Expand Up @@ -516,6 +536,7 @@ def _estimation_input_checks(
collin_tol: float,
copy_data: bool,
store_data: bool,
lean: bool,
fixef_tol: float,
weights_type: str,
):
Expand Down Expand Up @@ -550,11 +571,10 @@ def _estimation_input_checks(
if weights is not None:
assert weights in data.columns, "weights must be a column in data"

if not isinstance(copy_data, bool):
raise TypeError("copy_data must be a boolean")

if not isinstance(store_data, bool):
raise TypeError("store_data must be a boolean")
bool_args = [copy_data, store_data, lean]
for arg in bool_args:
if not isinstance(arg, bool):
raise TypeError(f"The function argument {arg} must be of type bool.")

if not isinstance(fixef_tol, float):
raise TypeError(
Expand Down
25 changes: 25 additions & 0 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import functools
import gc
import re
import warnings
from importlib import import_module
Expand Down Expand Up @@ -795,6 +796,30 @@ def add_fixest_multi_context(
else:
self._has_fixef = False

def _clear_attributes(self):
attributes = [
"_X",
"_Y",
"_Z",
"_data",
"_cluster_df",
"_tXZ",
"_tZy",
"_tZX",
"_weights",
"_scores",
"_tZZinv",
"_u_hat",
"_Y_hat_link",
"_Y_hat_response",
"_Y_untransformed",
]

for attr in attributes:
if hasattr(self, attr):
delattr(self, attr)
gc.collect()

def wald_test(self, R=None, q=None, distribution="F"):
"""
Conduct Wald test.
Expand Down
9 changes: 9 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,12 @@ def test_fepois_args():

assert fit1.coef().xs("X1") != fit3.coef().xs("X1")
assert np.abs(fit1.coef().xs("X1") - fit3.coef().xs("X1")) < 0.01


def test_lean():
data = pf.get_data()
fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)

assert not hasattr(fit, "_data")
assert not hasattr(fit, "_X")
assert not hasattr(fit, "_Y")

0 comments on commit 3ab4397

Please sign in to comment.