Skip to content

Commit

Permalink
dev: Support custom statistics in etable() output (#306) (#318)
Browse files Browse the repository at this point in the history
* dev: add support for custom statistics in etable()

* dev: Fix bug

* fix: test coverage for etable customized statistics.

* fix: Fix bug in coverage test.

* fix: fix bug. wrong logic.

* fix: pass local test.
  • Loading branch information
Wenzhi-Ding authored Feb 21, 2024
1 parent e8bfc17 commit b0503e0
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 10 deletions.
51 changes: 43 additions & 8 deletions pyfixest/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def etable(
type: Optional[str] = "md",
signif_code: Optional[list] = [0.001, 0.01, 0.05],
coef_fmt: Optional[str] = "b (se)",
custom_statistics: Optional[dict] = dict(),
) -> Union[pd.DataFrame, str]:
"""
Create an esttab-like table from a list of models.
Expand All @@ -37,6 +38,8 @@ def etable(
p-value (p). Default is `"b (se)"`.
Spaces ` `, parentheses `()`, brackets `[]`, newlines `\n` are supported.
Newline is not support for LaTeX output.
custom_statistics: dict, optional
A dictionary of custom statistics. "b", "se", "t", or "p" are reserved.
Returns
-------
Expand All @@ -56,6 +59,16 @@ def etable(
), "signif_code must be in increasing order"
models = _post_processing_input_checks(models)

if custom_statistics:
assert isinstance(custom_statistics, dict), "custom_statistics must be a dict"
for key in custom_statistics:
assert isinstance(
custom_statistics[key], list
), "custom_statistics values must be a list"
assert len(custom_statistics[key]) == len(
models
), f"custom_statistics {key} must have the same number as models"

assert digits >= 0, "digits must be a positive integer"
assert type in [
"df",
Expand Down Expand Up @@ -111,7 +124,7 @@ def etable(
colnames.reverse()
nobs_fixef_df = nobs_fixef_df[colnames].T.reset_index()

coef_fmt_elements, coef_fmt_title = _parse_coef_fmt(coef_fmt)
coef_fmt_elements, coef_fmt_title = _parse_coef_fmt(coef_fmt, custom_statistics)

etable_list = []
for i, model in enumerate(models):
Expand Down Expand Up @@ -139,8 +152,15 @@ def etable(
model[coef_fmt_title] += model["t value"].astype(str)
elif element == "p":
model[coef_fmt_title] += model["Pr(>|t|)"].astype(str)
elif element in custom_statistics:
assert len(custom_statistics[element][i]) == len(
model["Estimate"]
), f"Custom_statistics {element} has unequal length to the number of coefficients in model {i}"
model[coef_fmt_title] += pd.Series(
np.round(custom_statistics[element][i], digits)
).astype(str)
elif element == "\n" and type == "tex":
raise ValueError("Newline is not supported for LaTeX output.")
raise ValueError("Newline is currently not supported for LaTeX output.")
else:
model[coef_fmt_title] += element
model[coef_fmt_title] = pd.Categorical(model[coef_fmt_title])
Expand Down Expand Up @@ -351,27 +371,42 @@ def _tabulate_etable(df, n_models, n_fixef):
return formatted_table


def _parse_coef_fmt(coef_fmt: str):
def _parse_coef_fmt(coef_fmt: str, custom_statistics: Optional[dict] = None):
"""
Parse the coef_fmt string.
Parameters
----------
- coef_fmt (str): The coef_fmt string.
coef_fmt: str
The coef_fmt string.
custom_statistics: dict, optional
A dictionary of custom statistics. Key should be lowercased (e.g., simul_intv).
If you provide "b", "se", "t", or "p" as a key, it will overwrite the default
values.
Returns
-------
- coef_fmt_elements (str): The parsed coef_fmt string.
- coef_fmt_title (str): The title for the coef_fmt string.
coef_fmt_elements: str
The parsed coef_fmt string.
coef_fmt_title: str
The title for the coef_fmt string.
"""
allowed_elements = ["b", "se", "t", "p", " ", r"\(", r"\)", r"\[", r"\]", "\n"]
coef_fmt_elements = re.findall("|".join(allowed_elements), coef_fmt)
title_map = {
"b": "Coefficient",
"se": "Std. Error",
"t": "t-stats",
"p": "p-value",
}

allowed_elements = ["b", "se", "t", "p", " ", "\n", r"\(", r"\)", r"\[", r"\]", ","]
if custom_statistics:
custom_elements = list(custom_statistics.keys())
if any([x in ["b", "se", "t", "p"] for x in custom_elements]):
raise ValueError(
"You cannot use 'b', 'se', 't', or 'p' as a key in custom_statistics."
)
allowed_elements += list(custom_statistics.keys())
coef_fmt_elements = re.findall("|".join(allowed_elements), coef_fmt)
coef_fmt_title = "".join([title_map.get(x, x) for x in coef_fmt_elements])

return coef_fmt_elements, coef_fmt_title
40 changes: 40 additions & 0 deletions tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,43 @@ def test_errors_etable():

with pytest.raises(AssertionError):
etable([fit1, fit2], signif_code=[0.1, 0.5, 1.5])

with pytest.raises(ValueError):
etable([fit1, fit2], coef_fmt="b (se) \n t [p]", type="tex")

with pytest.raises(AssertionError):
etable(
models=[fit1, fit2],
custom_statistics={
"conf_int_lb": [
fit2._conf_int[0]
], # length of customized statistics not equal to the number of models
"conf_int_ub": [fit2._conf_int[1]],
},
coef_fmt="b [conf_int_lb, conf_int_ub]",
)

with pytest.raises(AssertionError):
etable(
models=[fit1, fit2],
custom_statistics={
"conf_int_lb": [
[0.1, 0.1, 0.1],
fit2._conf_int[0],
], # length of customized statistics not equal to length of model
"conf_int_ub": [fit1._conf_int[1], fit2._conf_int[1]],
},
coef_fmt="b [conf_int_lb, conf_int_ub]",
)

with pytest.raises(ValueError):
etable(
models=[fit1, fit2],
custom_statistics={
"b": [
fit2._conf_int[0],
fit2._conf_int[0],
], # preserved keyword cannot be used as a custom statistic
},
coef_fmt="b [se]",
)
12 changes: 10 additions & 2 deletions tests/test_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,13 @@ def test_summary():
etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1])
etable([fit1, fit2], signif_code=None)

etable([fit1, fit2], coef_fmt="t (p)")
etable([fit1, fit2], coef_fmt="t (p) \n t (t)")
etable([fit1, fit2], coef_fmt="b (se) \n t [p]")

etable(
models=[fit1, fit2],
custom_statistics={
"conf_int_lb": [fit1._conf_int[0], fit2._conf_int[0]],
"conf_int_ub": [fit1._conf_int[1], fit2._conf_int[1]],
},
coef_fmt="b [conf_int_lb, conf_int_ub]",
)

0 comments on commit b0503e0

Please sign in to comment.