dev: Support custom statistics in etable() output (#306) (#318)

* dev: add support for custom statistics in etable() * dev: Fix bug * fix: test coverage for etable customized statistics. * fix: Fix bug in coverage test. * fix: fix bug. wrong logic. * fix: pass local test.
py-econometrics · Feb 21, 2024 · b0503e0 · b0503e0
1 parent e8bfc17
commit b0503e0
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 10 deletions.
diff --git a/pyfixest/summarize.py b/pyfixest/summarize.py
@@ -16,6 +16,7 @@ def etable(
     type: Optional[str] = "md",
     signif_code: Optional[list] = [0.001, 0.01, 0.05],
     coef_fmt: Optional[str] = "b (se)",
+    custom_statistics: Optional[dict] = dict(),
 ) -> Union[pd.DataFrame, str]:
     """
     Create an esttab-like table from a list of models.
@@ -37,6 +38,8 @@ def etable(
         p-value (p). Default is `"b (se)"`.
         Spaces ` `, parentheses `()`, brackets `[]`, newlines `\n` are supported.
         Newline is not support for LaTeX output.
+    custom_statistics: dict, optional
+        A dictionary of custom statistics. "b", "se", "t", or "p" are reserved.
 
     Returns
     -------
@@ -56,6 +59,16 @@ def etable(
         ), "signif_code must be in increasing order"
     models = _post_processing_input_checks(models)
 
+    if custom_statistics:
+        assert isinstance(custom_statistics, dict), "custom_statistics must be a dict"
+        for key in custom_statistics:
+            assert isinstance(
+                custom_statistics[key], list
+            ), "custom_statistics values must be a list"
+            assert len(custom_statistics[key]) == len(
+                models
+            ), f"custom_statistics {key} must have the same number as models"
+
     assert digits >= 0, "digits must be a positive integer"
     assert type in [
         "df",
@@ -111,7 +124,7 @@ def etable(
     colnames.reverse()
     nobs_fixef_df = nobs_fixef_df[colnames].T.reset_index()
 
-    coef_fmt_elements, coef_fmt_title = _parse_coef_fmt(coef_fmt)
+    coef_fmt_elements, coef_fmt_title = _parse_coef_fmt(coef_fmt, custom_statistics)
 
     etable_list = []
     for i, model in enumerate(models):
@@ -139,8 +152,15 @@ def etable(
                 model[coef_fmt_title] += model["t value"].astype(str)
             elif element == "p":
                 model[coef_fmt_title] += model["Pr(>|t|)"].astype(str)
+            elif element in custom_statistics:
+                assert len(custom_statistics[element][i]) == len(
+                    model["Estimate"]
+                ), f"Custom_statistics {element} has unequal length to the number of coefficients in model {i}"
+                model[coef_fmt_title] += pd.Series(
+                    np.round(custom_statistics[element][i], digits)
+                ).astype(str)
             elif element == "\n" and type == "tex":
-                raise ValueError("Newline is not supported for LaTeX output.")
+                raise ValueError("Newline is currently not supported for LaTeX output.")
             else:
                 model[coef_fmt_title] += element
         model[coef_fmt_title] = pd.Categorical(model[coef_fmt_title])
@@ -351,27 +371,42 @@ def _tabulate_etable(df, n_models, n_fixef):
     return formatted_table
 
 
-def _parse_coef_fmt(coef_fmt: str):
+def _parse_coef_fmt(coef_fmt: str, custom_statistics: Optional[dict] = None):
     """
     Parse the coef_fmt string.
 
     Parameters
     ----------
-    - coef_fmt (str): The coef_fmt string.
+    coef_fmt: str
+        The coef_fmt string.
+    custom_statistics: dict, optional
+        A dictionary of custom statistics. Key should be lowercased (e.g., simul_intv).
+        If you provide "b", "se", "t", or "p" as a key, it will overwrite the default
+        values.
 
     Returns
     -------
-    - coef_fmt_elements (str): The parsed coef_fmt string.
-    - coef_fmt_title (str): The title for the coef_fmt string.
+    coef_fmt_elements: str
+        The parsed coef_fmt string.
+    coef_fmt_title: str
+        The title for the coef_fmt string.
     """
-    allowed_elements = ["b", "se", "t", "p", " ", r"\(", r"\)", r"\[", r"\]", "\n"]
-    coef_fmt_elements = re.findall("|".join(allowed_elements), coef_fmt)
     title_map = {
         "b": "Coefficient",
         "se": "Std. Error",
         "t": "t-stats",
         "p": "p-value",
     }
+
+    allowed_elements = ["b", "se", "t", "p", " ", "\n", r"\(", r"\)", r"\[", r"\]", ","]
+    if custom_statistics:
+        custom_elements = list(custom_statistics.keys())
+        if any([x in ["b", "se", "t", "p"] for x in custom_elements]):
+            raise ValueError(
+                "You cannot use 'b', 'se', 't', or 'p' as a key in custom_statistics."
+            )
+        allowed_elements += list(custom_statistics.keys())
+    coef_fmt_elements = re.findall("|".join(allowed_elements), coef_fmt)
     coef_fmt_title = "".join([title_map.get(x, x) for x in coef_fmt_elements])
 
     return coef_fmt_elements, coef_fmt_title
diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -225,3 +225,43 @@ def test_errors_etable():
 
     with pytest.raises(AssertionError):
         etable([fit1, fit2], signif_code=[0.1, 0.5, 1.5])
+
+    with pytest.raises(ValueError):
+        etable([fit1, fit2], coef_fmt="b (se) \n t [p]", type="tex")
+
+    with pytest.raises(AssertionError):
+        etable(
+            models=[fit1, fit2],
+            custom_statistics={
+                "conf_int_lb": [
+                    fit2._conf_int[0]
+                ],  # length of customized statistics not equal to the number of models
+                "conf_int_ub": [fit2._conf_int[1]],
+            },
+            coef_fmt="b [conf_int_lb, conf_int_ub]",
+        )
+
+    with pytest.raises(AssertionError):
+        etable(
+            models=[fit1, fit2],
+            custom_statistics={
+                "conf_int_lb": [
+                    [0.1, 0.1, 0.1],
+                    fit2._conf_int[0],
+                ],  # length of customized statistics not equal to length of model
+                "conf_int_ub": [fit1._conf_int[1], fit2._conf_int[1]],
+            },
+            coef_fmt="b [conf_int_lb, conf_int_ub]",
+        )
+
+    with pytest.raises(ValueError):
+        etable(
+            models=[fit1, fit2],
+            custom_statistics={
+                "b": [
+                    fit2._conf_int[0],
+                    fit2._conf_int[0],
+                ],  # preserved keyword cannot be used as a custom statistic
+            },
+            coef_fmt="b [se]",
+        )
diff --git a/tests/test_summarise.py b/tests/test_summarise.py
@@ -32,5 +32,13 @@ def test_summary():
     etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1])
     etable([fit1, fit2], signif_code=None)
 
-    etable([fit1, fit2], coef_fmt="t (p)")
-    etable([fit1, fit2], coef_fmt="t (p) \n t (t)")
+    etable([fit1, fit2], coef_fmt="b (se) \n t [p]")
+
+    etable(
+        models=[fit1, fit2],
+        custom_statistics={
+            "conf_int_lb": [fit1._conf_int[0], fit2._conf_int[0]],
+            "conf_int_ub": [fit1._conf_int[1], fit2._conf_int[1]],
+        },
+        coef_fmt="b [conf_int_lb, conf_int_ub]",
+    )