Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lean function argument to feols(), fepois() #548

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions pyfixest/estimation/FixestMulti_.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
data: DataFrameType,
copy_data: bool,
store_data: bool,
lean: bool,
fixef_tol: float,
weights_type: str,
) -> None:
Expand All @@ -38,6 +39,8 @@ def __init__(
Whether to copy the data or not.
store_data : bool
Whether to store the data in the resulting model object or not.
lean: bool
Whether to store large-memory objects in the resulting model object or not.
fixef_tol: float
The tolerance for the convergence of the demeaning algorithm.
weights_type: str
Expand All @@ -51,6 +54,7 @@ def __init__(
"""
self._copy_data = copy_data
self._store_data = store_data
self._lean = lean
self._fixef_tol = fixef_tol
self._weights_type = weights_type

Expand Down Expand Up @@ -189,6 +193,7 @@ def _estimate_all_models(
_has_fixef = False
_fixef_tol = self._fixef_tol
_weights_type = self._weights_type
_lean = self._lean

FixestFormulaDict = self.FixestFormulaDict
_fixef_keys = list(FixestFormulaDict.keys())
Expand Down Expand Up @@ -413,6 +418,9 @@ def _estimate_all_models(
else:
FIT._icovars = None

if _lean:
FIT._clear_attributes()

# store fitted model
self.all_fitted_models[FixestFormula.fml] = FIT

Expand Down
30 changes: 25 additions & 5 deletions pyfixest/estimation/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def feols(
i_ref1=None,
copy_data: bool = True,
store_data: bool = True,
lean: bool = False,
weights_type: str = "aweights",
) -> Union[Feols, FixestMulti]:
"""
Expand Down Expand Up @@ -87,6 +88,13 @@ def feols(
impact on post-estimation capabilities that rely on the data, e.g. `predict()`
or `vcov()`.

lean: bool, optional
False by default. If True, then all large objects are removed from the
returned result: this will save memory but will block the possibility
to use many methods. It is recommended to use the argument vcov
to obtain the appropriate standard-errors at estimation time,
since obtaining different SEs won't be possible afterwards.

weights_type: str, optional
Options include `aweights` or `fweights`. `aweights` implement analytic or
precision weights, while `fweights` implement frequency weights. For details
Expand Down Expand Up @@ -319,6 +327,7 @@ class for multiple models specified via `fml`.
collin_tol=collin_tol,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand All @@ -327,6 +336,7 @@ class for multiple models specified via `fml`.
data=data,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand Down Expand Up @@ -358,6 +368,7 @@ def fepois(
i_ref1=None,
copy_data: bool = True,
store_data: bool = True,
lean: bool = False,
) -> Union[Feols, Fepois, FixestMulti]:
"""
Estimate Poisson regression model with fixed effects using the `ppmlhdfe` algorithm.
Expand Down Expand Up @@ -427,6 +438,13 @@ def fepois(
impact on post-estimation capabilities that rely on the data, e.g. `predict()`
or `vcov()`.

lean: bool, optional
False by default. If True, then all large objects are removed from the
returned result: this will save memory but will block the possibility
to use many methods. It is recommended to use the argument vcov
to obtain the appropriate standard-errors at estimation time,
since obtaining different SEs won't be possible afterwards.

Returns
-------
object
Expand Down Expand Up @@ -473,6 +491,7 @@ def fepois(
collin_tol=collin_tol,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand All @@ -481,6 +500,7 @@ def fepois(
data=data,
copy_data=copy_data,
store_data=store_data,
lean=lean,
fixef_tol=fixef_tol,
weights_type=weights_type,
)
Expand Down Expand Up @@ -516,6 +536,7 @@ def _estimation_input_checks(
collin_tol: float,
copy_data: bool,
store_data: bool,
lean: bool,
fixef_tol: float,
weights_type: str,
):
Expand Down Expand Up @@ -550,11 +571,10 @@ def _estimation_input_checks(
if weights is not None:
assert weights in data.columns, "weights must be a column in data"

if not isinstance(copy_data, bool):
raise TypeError("copy_data must be a boolean")

if not isinstance(store_data, bool):
raise TypeError("store_data must be a boolean")
bool_args = [copy_data, store_data, lean]
for arg in bool_args:
if not isinstance(arg, bool):
raise TypeError(f"The function argument {arg} must be of type bool.")

if not isinstance(fixef_tol, float):
raise TypeError(
Expand Down
25 changes: 25 additions & 0 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import functools
import gc
import re
import warnings
from importlib import import_module
Expand Down Expand Up @@ -795,6 +796,30 @@ def add_fixest_multi_context(
else:
self._has_fixef = False

def _clear_attributes(self):
attributes = [
"_X",
"_Y",
"_Z",
"_data",
"_cluster_df",
"_tXZ",
"_tZy",
"_tZX",
"_weights",
"_scores",
"_tZZinv",
"_u_hat",
"_Y_hat_link",
"_Y_hat_response",
"_Y_untransformed",
]

for attr in attributes:
if hasattr(self, attr):
delattr(self, attr)
gc.collect()

def wald_test(self, R=None, q=None, distribution="F"):
"""
Conduct Wald test.
Expand Down
9 changes: 9 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,12 @@ def test_fepois_args():

assert fit1.coef().xs("X1") != fit3.coef().xs("X1")
assert np.abs(fit1.coef().xs("X1") - fit3.coef().xs("X1")) < 0.01


def test_lean():
data = pf.get_data()
fit = pf.feols("Y ~ X1 + X2 | f1", data=data, lean=True)

assert not hasattr(fit, "_data")
assert not hasattr(fit, "_X")
assert not hasattr(fit, "_Y")
Loading