Skip to content

Commit

Permalink
Implement the Romano Wolf Multiple Testing Correction #277 (#278)
Browse files Browse the repository at this point in the history
  • Loading branch information
s3alfisc authored Feb 7, 2024
1 parent 47b9aea commit c9bcf84
Show file tree
Hide file tree
Showing 15 changed files with 389 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
uses: eddelbuettel/github-actions/r2u-setup@master

- name: install R packages
run: Rscript -e 'install.packages(c("fixest", "broom", "did2s", "clubSandwich"))'
run: Rscript -e 'install.packages(c("fixest", "broom", "did2s", "clubSandwich", "wildrwolf"))'
shell: bash

- name: Run tests
Expand Down
1 change: 1 addition & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ quartodoc:
- did.estimation.did2s
- did.estimation.lpdid
- did.estimation.event_study
- multcomp.rwolf
- title: Estimation Classes
desc: |
Details on Methods and Attributes
Expand Down
3 changes: 2 additions & 1 deletion docs/contributing.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ Tests run with R require the following packages:
- did2s
- fixest
- stats
- wildrwolf

```{bash}
Rscript -e 'install.packages(c("broom", "clubSandwich", "did2s", "fixest"), repos="https://cran.rstudio.com")'
Rscript -e 'install.packages(c("broom", "clubSandwich", "did2s", "fixest", "wildrwolf"), repos="https://cran.rstudio.com")'
```

Documentation for `pyfixest` is written, compiled, and published using Quarto.
Expand Down
23 changes: 21 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions pyfixest/FixestMulti.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,17 @@ def set_fixest_multi_flag(self):
"""
)

def to_list(self):
"""
Returns a list of all fitted models.
Args:
None
Returns:
A list of all fitted models of types Feols or Fepois.
"""

return list(self.all_fitted_models.values())

def vcov(self, vcov: Union[str, Dict[str, str]]):
"""
Update regression inference "on the fly".
Expand Down
15 changes: 14 additions & 1 deletion pyfixest/feols.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ def wildboottest(
adj: Optional[bool] = True,
cluster_adj: Optional[bool] = True,
parallel: Optional[bool] = False,
return_bootstrapped_t_stats=False,
):
"""
Run a wild cluster bootstrap based on an object of type "Feols".
Expand Down Expand Up @@ -870,12 +871,15 @@ def wildboottest(
Indicates whether to run the bootstrap in parallel. Defaults to False.
seed : Union[str, None], optional
An option to provide a random seed. Defaults to None.
return_bootstrapped_t_stats : bool, optional:
If True, the method returns a tuple of the regular output and the bootstrapped t-stats. Defaults to False.
Returns
-------
pd.DataFrame
A DataFrame with the original, non-bootstrapped t-statistic and bootstrapped p-value, along with
the bootstrap type, inference type (HC vs CRV), and whether the null hypothesis was imposed on the bootstrap DGP.
If `return_bootstrapped_t_stats` is True, the method returns a tuple of the regular output and the bootstrapped t-stats.
"""

_is_iv = self._is_iv
Expand All @@ -887,6 +891,12 @@ def wildboottest(
_clustervar = self._clustervar
_supports_wildboottest = self._supports_wildboottest

if param is not None:
if param not in _xnames:
raise ValueError(
f"Parameter {param} not found in the model's coefficients."
)

if not _supports_wildboottest:
if self._is_iv:
raise NotImplementedError(
Expand Down Expand Up @@ -999,7 +1009,10 @@ def wildboottest(

res_df = pd.Series(res)

return res_df
if return_bootstrapped_t_stats:
return res_df, boot.t_boot
else:
return res_df

def fixef(self) -> None:
"""
Expand Down
128 changes: 128 additions & 0 deletions pyfixest/multcomp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import numpy as np
import pandas as pd
from typing import Union, List
from pyfixest.summarize import _post_processing_input_checks
from pyfixest.feols import Feols
from pyfixest.FixestMulti import FixestMulti
from tqdm import tqdm


def rwolf(
models: Union[List[Feols], Feols], param: str, B: int, seed: int
) -> pd.DataFrame:
"""
Compute Romano-Wolf adjusted p-values for multiple hypothesis testing.
For each model, it is assumed that the adjustment is for the family of hypotheses is
"param = 0". This function uses the `wildboottest()` method for running the bootstrap,
hence models of type `Feiv` or `Fepois` are not supported.
Parameters
----------
models : List[Feols] or FixestMulti
A list of models for which the p-values should be computed, or a FixestMulti object.
Models of type `Feiv` or `Fepois` are not supported.
param : str
The parameter for which the p-values should be computed.
B : int
The number of bootstrap replications.
seed : int
The seed for the random number generator.
Returns
-------
pd.DataFrame
A DataFrame containing estimation statistics, including the Romano-Wolf adjusted p-values.
"""

models = _post_processing_input_checks(models)
all_model_stats = pd.DataFrame()

S = 0
for model in models:

if param not in model._coefnames:
raise ValueError(
f"Parameter '{param}' not found in the model {model._fml}."
)

model_tidy = model.tidy().xs(param)
all_model_stats = pd.concat([all_model_stats, model_tidy], axis=1)
S += 1

t_stats = all_model_stats.xs("t value").values
t_stats = np.zeros(S)
boot_t_stats = np.zeros((B, S))

for i in tqdm(range(S)):

model = models[i]

wildboot_res_df, bootstrapped_t_stats = model.wildboottest(
param=param,
B=B,
return_bootstrapped_t_stats=True,
seed=seed, # all S iterations require the same bootstrap samples, hence seed needs to be reset
)
t_stats[i] = wildboot_res_df["t value"]
boot_t_stats[:, i] = bootstrapped_t_stats

pval = _get_rwolf_pval(t_stats, boot_t_stats)

all_model_stats.loc["RW Pr(>|t|)"] = pval
all_model_stats.columns = [f"est{i}" for i, _ in enumerate(models)]

return all_model_stats


def _get_rwolf_pval(t_stats, boot_t_stats):
"""
Compute Romano-Wolf adjusted p-values based on bootstrapped t-statistics.
Parameters:
t_stats (np.ndarray): A vector of length S - where S is the number of
tested hypotheses - containing the original,
non-bootstrappe t-statisics.
boot_t_stats (np.ndarray): A (B x S) matrix containing the
bootstrapped t-statistics.
Returns:
np.ndarray: A vector of Romano-Wolf corrected p-values.
"""

t_stats = np.abs(t_stats)
boot_t_stats = np.abs(boot_t_stats)

S = boot_t_stats.shape[1]
B = boot_t_stats.shape[0]

pinit = corr_padj = pval = np.zeros(S)
stepdown_index = np.argsort(t_stats)[::-1]
ro = np.argsort(stepdown_index)

for s in range(S):
if s == 0:
max_stat = np.max(boot_t_stats, axis=1)
pinit[s] = min(
1,
(np.sum(max_stat >= np.abs(t_stats[stepdown_index[s]])) + 1) / (B + 1),
)
else:
boot_t_stat_udp = np.delete(boot_t_stats, stepdown_index[:s], axis=1)
max_stat = np.max(boot_t_stat_udp, axis=1)
pinit[s] = min(
1,
(np.sum(max_stat >= np.abs(t_stats[stepdown_index[s]])) + 1) / (B + 1),
)

for j in range(S):
if j == 0:
corr_padj[j] = pinit[j]
else:
corr_padj[j] = max(pinit[j], corr_padj[j - 1])

# Collect the results
pval = corr_padj[ro]

return pval
34 changes: 24 additions & 10 deletions pyfixest/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@ def etable(
models = _post_processing_input_checks(models)

assert digits >= 0, "digits must be a positive integer"
assert type in ["df", "tex", "md"], "type must be either 'df', 'md' or 'tex'"
assert type in [
"df",
"tex",
"md",
"html",
], "type must be either 'df', 'md', 'html' or 'tex'"

dep_var_list = []
nobs_list = []
Expand Down Expand Up @@ -281,23 +286,32 @@ def _post_processing_input_checks(models):

# check if models instance of Feols or Fepois
if isinstance(models, (Feols, Fepois)):

models = [models]

else:

if isinstance(models, list):

for model in models:
if not isinstance(model, (Feols, Fepois)):
raise TypeError(
"""
The models argument must be either a list of Feols or Fepois instances,
a dict of Feols or Fepois instances, or simply a Feols or Fepois instance.
f"""
Each element of the passed list needs to be of type Feols or Fepois, but {type(model)} was passed.
If you want to summarize a FixestMulti object, please use FixestMulti.to_list() to convert it to a list of Feols or Fepois instances.
"""
)
elif isinstance(models, dict):
for model in models.keys():
if not isinstance(models[model], (Feols, Fepois)):
raise TypeError(
"The models argument must be a list of Feols or Fepois instances."
)

else:

raise TypeError(
"""
The models argument must be either a list of Feols or Fepois instances, or
simply a single Feols or Fepois instance. The models argument does not accept instances
of type FixestMulti - please use models.to_list() to convert the FixestMulti
instance to a list of Feols or Fepois instances.
"""
)

return models

Expand Down
2 changes: 2 additions & 0 deletions pyfixest/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pandas as pd
from formulaic import model_matrix
import sys
import time


def ssc(adj=True, fixef_k="none", cluster_adj=True, cluster_df="min"):
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pyfixest"
version = "0.15.1"
version = "0.15.2"


description = "Fast high dimensional fixed effect estimation following syntax of the fixest R package. Supports OLS, IV and Poisson regression and a range of inference procedures. Additionally, experimentally supports (some of) the regression based new Difference-in-Differences Estimators (Did2s)."
Expand All @@ -19,6 +19,7 @@ formulaic = ">=0.6.6,<1.0.0"
lets-plot = ">=4.0.1"
numba = ">=0.58.0"
tabulate = ">=0.9.0"
tqdm = "^4.66.1"

[tool.poetry.group.dev.dependencies]
pytest=">=7.0.0"
Expand Down
Loading

0 comments on commit c9bcf84

Please sign in to comment.