Skip to content

Commit

Permalink
add statsmodels support for stargazer (#564)
Browse files Browse the repository at this point in the history
* add statsmodels support for stargazer

* rebuild lock file

* dontrun stargazer test
  • Loading branch information
s3alfisc authored Jul 23, 2024
1 parent d77a7ef commit a177a38
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 146 deletions.
Binary file modified .coverage
Binary file not shown.
284 changes: 146 additions & 138 deletions poetry.lock

Large diffs are not rendered by default.

23 changes: 17 additions & 6 deletions pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import pandas as pd
import polars as pl
from formulaic import Formula
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import lsqr, spsolve
from scipy.sparse.linalg import lsqr
from scipy.stats import chi2, f, norm, t

from pyfixest.errors import VcovTypeNotSupportedError
Expand Down Expand Up @@ -1358,7 +1357,9 @@ def ccv(
n_splits=n_splits,
)

def fixef(self, atol : float = 1e-06, btol : float = 1e-06) -> dict[str, dict[str, float]]:
def fixef(
self, atol: float = 1e-06, btol: float = 1e-06
) -> dict[str, dict[str, float]]:
"""
Compute the coefficients of (swept out) fixed effects for a regression model.
Expand Down Expand Up @@ -1410,7 +1411,7 @@ def fixef(self, atol : float = 1e-06, btol : float = 1e-06) -> dict[str, dict[st
cols = D2.model_spec.column_names

alpha = lsqr(D2, uhat, atol=atol, btol=btol)[0]

res: dict[str, dict[str, float]] = {}
for i, col in enumerate(cols):
variable, level = _extract_variable_level(col)
Expand All @@ -1429,7 +1430,13 @@ def fixef(self, atol : float = 1e-06, btol : float = 1e-06) -> dict[str, dict[st

return self._fixef_dict

def predict(self, newdata: Optional[DataFrameType] = None, atol: float = 1e-6, btol: float = 1e-6) -> np.ndarray:
def predict(
self,
newdata: Optional[DataFrameType] = None,
type: str = "link",
atol: float = 1e-6,
btol: float = 1e-6,
) -> np.ndarray:
"""
Predict values of the model on new data.
Expand All @@ -1442,8 +1449,12 @@ def predict(self, newdata: Optional[DataFrameType] = None, atol: float = 1e-6, b
newdata : Optional[DataFrameType], optional
A pd.DataFrame or pl.DataFrame with the data to be used for prediction.
If None (default), the data used for fitting the model is used.
type : str, optional
The type of prediction to be computed.
Can be either "response" (default) or "link". For linear models, both are
identical.
atol : Float, default 1e-6
Stopping tolerance for scipy.sparse.linalg.lsqr().
Stopping tolerance for scipy.sparse.linalg.lsqr().
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
btol : Float, default 1e-6
Another stopping tolerance for scipy.sparse.linalg.lsqr().
Expand Down
15 changes: 13 additions & 2 deletions pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,11 @@ def compute_deviance(_Y: np.ndarray, mu: np.ndarray):
self._convergence = True

def predict(
self, newdata: Optional[DataFrameType] = None, type: str = "link"
self,
newdata: Optional[DataFrameType] = None,
type: str = "link",
atol: float = 1e-06,
btol: float = 1e-06,
) -> np.ndarray:
"""
Return predicted values from regression model.
Expand All @@ -292,6 +296,13 @@ def predict(
i.e., it is the expected predictor E(Y|X).
If "link", the output is at the level of the explanatory variables,
i.e., the linear predictor X @ beta.
atol : Float, default 1e-6
Stopping tolerance for scipy.sparse.linalg.lsqr().
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
btol : Float, default 1e-6
Another stopping tolerance for scipy.sparse.linalg.lsqr().
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html
Returns
-------
Expand All @@ -313,7 +324,7 @@ def predict(
if type not in ["response", "link"]:
raise ValueError("type must be one of 'response' or 'link'.")

y_hat = super().predict(newdata=newdata)
y_hat = super().predict(newdata=newdata, type=type, atol=atol, btol=btol)
if type == "link":
y_hat = np.exp(y_hat)

Expand Down
16 changes: 16 additions & 0 deletions pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,25 @@ def __init__(self, models):
A list of regression model objects to be included in the table.
"""
super().__init__(models)

# temporarily add the _fixef attribute to the models if not present
for x in self.models:
if not hasattr(x, "_fixef"):
x._fixef = None

if any([x._fixef is not None for x in self.models]):
self.add_fixef()

# delete the _fixef attribute from the models if
# not of type Feols, Feiv, Fepois
for x in self.models:
if (
not isinstance(x, Feols)
or not isinstance(x, Fepois)
or not isinstance(x, Feiv)
):
del x._fixef

def add_fixef(self):
"""
Add information on fixed effects to the regression table.
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pre-commit = "^3.6.0"
doubleml = "^0.7.1"
marginaleffects = "^0.0.10"
stargazer = ">=0.0.7"
statsmodels = "^0.14.2"

[tool.poetry.group.docs.dependencies]
quartodoc = ">=0.7.2"
Expand Down
13 changes: 13 additions & 0 deletions tests/test_summarise.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import pandas as pd
import pytest
import statsmodels.formula.api as smf

import pyfixest as pf
from pyfixest.estimation.estimation import feols, fepois
from pyfixest.report.summarize import _select_order_coefs, etable, summary
from pyfixest.utils.utils import get_data
Expand Down Expand Up @@ -94,3 +97,13 @@ def test_summary():
"x11",
"x21",
]


@pytest.mark.skip("Pyfixest PR is not yet merged into stargazer.")
def test_stargazer():
data = pf.get_data()

fit = pf.feols("Y ~ X1", data=data)
fit_smf = smf.ols("Y ~ X1", data=data).fit()

pf.Stargazer([fit, fit_smf])

0 comments on commit a177a38

Please sign in to comment.