Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Stargazer Dependency #568

Merged
merged 8 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 43 additions & 47 deletions docs/difference-in-differences.ipynb

Large diffs are not rendered by default.

1,070 changes: 953 additions & 117 deletions docs/stargazer.ipynb

Large diffs are not rendered by default.

429 changes: 211 additions & 218 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyfixest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
fepois,
rwolf,
)
from pyfixest.report import Stargazer, coefplot, etable, iplot, summary
from pyfixest.report import coefplot, etable, iplot, summary
from pyfixest.utils import (
get_data,
get_ssc,
Expand Down
1 change: 1 addition & 0 deletions pyfixest/estimation/fepois_.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ def predict(
See https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.lsqr.html



Returns
-------
np.ndarray
Expand Down
3 changes: 1 addition & 2 deletions pyfixest/report/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyfixest.report.summarize import Stargazer, etable, summary
from pyfixest.report.summarize import etable, summary
from pyfixest.report.visualize import (
coefplot,
iplot,
Expand All @@ -9,5 +9,4 @@
"etable",
"iplot",
"coefplot",
"Stargazer",
]
194 changes: 73 additions & 121 deletions pyfixest/report/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

import numpy as np
import pandas as pd
from stargazer.stargazer import LineLocation
from stargazer.stargazer import Stargazer as BaseStargazer
from tabulate import tabulate

from pyfixest.estimation.feiv_ import Feiv
Expand All @@ -14,67 +12,9 @@
from pyfixest.utils.dev_utils import _select_order_coefs


class Stargazer(BaseStargazer):
"""
A wrapper around the Stargazer class from the stargazer package.
Adds fixed effects to the regression table. For details,
see the Stargazer documentation:
https://github.com/StatsReporting/stargazer.

Parameters
----------
models : list
A list of regression model objects to be included in the table.
"""

def __init__(self, models):
"""
Initialize the Stargazer object with a list of models.

Parameters
----------
models : list
A list of regression model objects to be included in the table.
"""
super().__init__(models)

# temporarily add the _fixef attribute to the models if not present
for x in self.models:
if not hasattr(x, "_fixef"):
x._fixef = None

if any([x._fixef is not None for x in self.models]):
self.add_fixef()

# delete the _fixef attribute from the models if
# not of type Feols, Feiv, Fepois
for x in self.models:
if (
not isinstance(x, Feols)
or not isinstance(x, Fepois)
or not isinstance(x, Feiv)
):
del x._fixef

def add_fixef(self):
"""
Add information on fixed effects to the regression table.

This method deparses the fixed effects contained in
Feols._fixef and attaches it to the Stargazer
regression table.
"""
deparsed_fixef_lists = _deparse_fixef_for_stargazer(
[x._fixef for x in self.models]
)

for _, key in enumerate(deparsed_fixef_lists):
self.add_line(key, deparsed_fixef_lists[key], LineLocation.FOOTER_TOP)


def etable(
models: Union[list[Union[Feols, Fepois, Feiv]], FixestMulti],
type: str = "md",
type: str = "df",
signif_code: list = [0.001, 0.01, 0.05],
coef_fmt: str = "b (se)",
custom_stats: Optional[dict] = None,
Expand Down Expand Up @@ -135,8 +75,8 @@ def etable(
A DataFrame with the coefficients and standard errors of the models.
""" # noqa: D301
assert (
signif_code is None or len(signif_code) == 3
), "signif_code must be a list of length 3 or None"
isinstance([0.1, 0.2, 0.3], list) and len(signif_code) == 3
), "signif_code must be a list of length 3"
if signif_code:
assert all(
[0 < i < 1 for i in signif_code]
Expand Down Expand Up @@ -315,19 +255,28 @@ def etable(
res_all = pd.concat([depvars, res, nobs_fixef_df], ignore_index=True)
res_all.columns = pd.Index([""] + list(res_all.columns[1:]))

if type == "tex":
return res_all.to_latex()
elif type == "md":
res_all = _tabulate_etable(res_all, len(models), n_fixef)
caption = (
f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}. "
+ f"Format of coefficient cell:\n{coef_fmt_title}"
)

if type == "md":
res_all = _tabulate_etable_md(res_all, len(models), n_fixef)
print(res_all)
if signif_code:
print(
f"Significance levels: * p < {signif_code[2]}, ** p < {signif_code[1]}, *** p < {signif_code[0]}"
)
print(f"Format of coefficient cell:\n{coef_fmt_title}")
print(f"Format of coefficient cell:\n{coef_fmt_title}")
return None
elif type in ["df", "tex"]:
res_all = _tabulate_etable_df(res_all, n_fixef, caption)
if type == "df":
return res_all
else:
return res_all.to_latex()
else:
return res_all
raise ValueError("type must be either 'df', 'md' or 'tex'")


def summary(
Expand Down Expand Up @@ -463,7 +412,57 @@ def _post_processing_input_checks(
return models


def _tabulate_etable(df, n_models, n_fixef):
def _tabulate_etable_df(df, n_fixef, caption):
k, _ = df.shape
n_coef = k - 3 - 2 - n_fixef

line1 = 2 + n_coef
line2 = line1 + n_fixef
line3 = k

styler = (
df.style.set_properties(**{"text-align": "right"})
.set_table_styles(
[
# {'selector': 'thead th', 'props': 'border-bottom: 2px solid black; text-align: center;'}, # Header row
{
"selector": "tbody tr:nth-child(0) td",
"props": "background-color: #f0f0f0",
}, # First row
{
"selector": "tbody tr:nth-child(1) td",
"props": "border-bottom: 2px solid black",
}, # Line below row 1 (index 1)
{
"selector": f"tbody tr:nth-child({line1}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": f"tbody tr:nth-child({line2}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": f"tbody tr:nth-child({line3}) td",
"props": "border-bottom: 1px solid black;",
}, # Line below fixef_bar row
{
"selector": "tbody td",
"props": "background-color: #ffffff;",
}, # Background color for all cells
{
"selector": "tbody tr td:first-child",
"props": "background-color: #f0f0f0; font-weight: bold;text-align: left;",
}, # Set first column to grey and bold
]
)
.hide(axis="index")
.set_caption(caption)
)

return styler


def _tabulate_etable_md(df, n_models, n_fixef):
"""
Format and tabulate a DataFrame.

Expand All @@ -479,7 +478,10 @@ def _tabulate_etable(df, n_models, n_fixef):
"""
# Format the DataFrame for tabulate
table = tabulate(
df, headers="keys", showindex=False, colalign=["left"] + n_models * ["right"]
df,
headers="keys",
showindex=False,
colalign=["left"] + n_models * ["right"],
)

# Split the table into header and body
Expand Down Expand Up @@ -599,53 +601,3 @@ def _number_formatter(x: float, **kwargs) -> str:
_int, _float = str(x_str).split(".")
_float = _float.ljust(digits, "0")
return _int if digits == 0 else f"{_int}.{_float}"


def _deparse_fixef_for_stargazer(fixef_list: list[str]) -> dict[str, list[str]]:
"""
Deparse Feols._fixef to a dict of lists for easy use with Stargazer
to add fixed effects to the regression table.

Parameters
----------
fixef_list : list
List of fixed effects from Feols._fixef.

Returns
-------
dict
Dictionary of lists, where each list contains the fixed
effects for a given variable.

Example
-------
# basic example
fixef_list = ['f1', 'f2', 'f1+f2', 'f1', 'f2', 'f1+f2']
deparse_fixef_for_stargazer(fixef_list)
# Output
{'f1': ['f1', '-', 'f1', 'f1', '-', 'f1'],
'f2': ['-', 'f2', 'f2', '-', 'f2', 'f2']
}
"""

def identify_variables(lst):
variables = set()
for item in lst:
if item:
parts = item.split("+")
for part in parts:
variables.add(part)
return list(variables)

unique_variables = identify_variables(fixef_list)

variable_lists: dict[str, list[str]] = {var: [] for var in unique_variables}

for item in fixef_list:
for var in unique_variables:
if item and var in item:
variable_lists[var].append("x")
else:
variable_lists[var].append("-")

return variable_lists
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ wildboottest = ">=0.2.0"
pre-commit = "^3.6.0"
doubleml = "^0.7.1"
marginaleffects = "^0.0.10"
stargazer = ">=0.0.7"
statsmodels = "^0.14.2"

[tool.poetry.group.docs.dependencies]
quartodoc = ">=0.7.2"
Expand Down Expand Up @@ -95,6 +93,7 @@ ignore = [
"SIM110", # Use all instead of `for` loop
"TRY003", # Avoid specifying long messages outside the exception class
"D205", # 1 blank line required between summary line and description
"W505", # Doc line too long
]

[tool.ruff.lint.per-file-ignores]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_summary():

# Test significance code
etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1])
etable([fit1, fit2], signif_code=None)
etable([fit1, fit2], signif_code=[0.02, 0.06, 0.1])

# Test coefficient format
etable([fit1, fit2], coef_fmt="b (se)\nt [p]")
Expand Down
Loading