diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index a3ee5717..12e9f504 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -40,7 +40,7 @@ jobs: uses: eddelbuettel/github-actions/r2u-setup@master - name: install R packages - run: Rscript -e 'install.packages(c("fixest", "broom", "did2s", "clubSandwich"))' + run: Rscript -e 'install.packages(c("fixest", "broom", "did2s", "clubSandwich", "wildrwolf"))' shell: bash - name: Run tests diff --git a/docs/_quarto.yml b/docs/_quarto.yml index b70f647d..0a50cb9a 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -47,6 +47,7 @@ quartodoc: - did.estimation.did2s - did.estimation.lpdid - did.estimation.event_study + - multcomp.rwolf - title: Estimation Classes desc: | Details on Methods and Attributes diff --git a/docs/contributing.qmd b/docs/contributing.qmd index eadc2514..a4a82b0c 100644 --- a/docs/contributing.qmd +++ b/docs/contributing.qmd @@ -86,9 +86,10 @@ Tests run with R require the following packages: - did2s - fixest - stats +- wildrwolf ```{bash} -Rscript -e 'install.packages(c("broom", "clubSandwich", "did2s", "fixest"), repos="https://cran.rstudio.com")' +Rscript -e 'install.packages(c("broom", "clubSandwich", "did2s", "fixest", "wildrwolf"), repos="https://cran.rstudio.com")' ``` Documentation for `pyfixest` is written, compiled, and published using Quarto. diff --git a/poetry.lock b/poetry.lock index 00e5f986..717f933c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2904,7 +2904,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -3731,6 +3730,26 @@ files = [ {file = "tornado-6.4.tar.gz", hash = "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee"}, ] +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "traitlets" version = "5.14.1" @@ -4151,4 +4170,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "07db9726b0c0be4d812948fdc7cc1c0f0f71c467bd2d1e6ca02fb504f8f1fd16" +content-hash = "2bc200dd9772d42d166e062ada33d1d56efe99c38e90bff0df6fbcb426d20ad7" diff --git a/pyfixest/FixestMulti.py b/pyfixest/FixestMulti.py index 35ea57b9..0c0d7f7b 100644 --- a/pyfixest/FixestMulti.py +++ b/pyfixest/FixestMulti.py @@ -386,6 +386,17 @@ def set_fixest_multi_flag(self): """ ) + def to_list(self): + """ + Returns a list of all fitted models. + Args: + None + Returns: + A list of all fitted models of types Feols or Fepois. + """ + + return list(self.all_fitted_models.values()) + def vcov(self, vcov: Union[str, Dict[str, str]]): """ Update regression inference "on the fly". diff --git a/pyfixest/feols.py b/pyfixest/feols.py index ff223028..85147dea 100644 --- a/pyfixest/feols.py +++ b/pyfixest/feols.py @@ -839,6 +839,7 @@ def wildboottest( adj: Optional[bool] = True, cluster_adj: Optional[bool] = True, parallel: Optional[bool] = False, + return_bootstrapped_t_stats=False, ): """ Run a wild cluster bootstrap based on an object of type "Feols". @@ -870,12 +871,15 @@ def wildboottest( Indicates whether to run the bootstrap in parallel. Defaults to False. seed : Union[str, None], optional An option to provide a random seed. Defaults to None. + return_bootstrapped_t_stats : bool, optional: + If True, the method returns a tuple of the regular output and the bootstrapped t-stats. Defaults to False. Returns ------- pd.DataFrame A DataFrame with the original, non-bootstrapped t-statistic and bootstrapped p-value, along with the bootstrap type, inference type (HC vs CRV), and whether the null hypothesis was imposed on the bootstrap DGP. + If `return_bootstrapped_t_stats` is True, the method returns a tuple of the regular output and the bootstrapped t-stats. """ _is_iv = self._is_iv @@ -887,6 +891,12 @@ def wildboottest( _clustervar = self._clustervar _supports_wildboottest = self._supports_wildboottest + if param is not None: + if param not in _xnames: + raise ValueError( + f"Parameter {param} not found in the model's coefficients." + ) + if not _supports_wildboottest: if self._is_iv: raise NotImplementedError( @@ -999,7 +1009,10 @@ def wildboottest( res_df = pd.Series(res) - return res_df + if return_bootstrapped_t_stats: + return res_df, boot.t_boot + else: + return res_df def fixef(self) -> None: """ diff --git a/pyfixest/multcomp.py b/pyfixest/multcomp.py new file mode 100644 index 00000000..c0c77818 --- /dev/null +++ b/pyfixest/multcomp.py @@ -0,0 +1,128 @@ +import numpy as np +import pandas as pd +from typing import Union, List +from pyfixest.summarize import _post_processing_input_checks +from pyfixest.feols import Feols +from pyfixest.FixestMulti import FixestMulti +from tqdm import tqdm + + +def rwolf( + models: Union[List[Feols], Feols], param: str, B: int, seed: int +) -> pd.DataFrame: + """ + Compute Romano-Wolf adjusted p-values for multiple hypothesis testing. + + For each model, it is assumed that the adjustment is for the family of hypotheses is + "param = 0". This function uses the `wildboottest()` method for running the bootstrap, + hence models of type `Feiv` or `Fepois` are not supported. + + Parameters + ---------- + models : List[Feols] or FixestMulti + A list of models for which the p-values should be computed, or a FixestMulti object. + Models of type `Feiv` or `Fepois` are not supported. + param : str + The parameter for which the p-values should be computed. + B : int + The number of bootstrap replications. + seed : int + The seed for the random number generator. + + Returns + ------- + pd.DataFrame + A DataFrame containing estimation statistics, including the Romano-Wolf adjusted p-values. + + """ + + models = _post_processing_input_checks(models) + all_model_stats = pd.DataFrame() + + S = 0 + for model in models: + + if param not in model._coefnames: + raise ValueError( + f"Parameter '{param}' not found in the model {model._fml}." + ) + + model_tidy = model.tidy().xs(param) + all_model_stats = pd.concat([all_model_stats, model_tidy], axis=1) + S += 1 + + t_stats = all_model_stats.xs("t value").values + t_stats = np.zeros(S) + boot_t_stats = np.zeros((B, S)) + + for i in tqdm(range(S)): + + model = models[i] + + wildboot_res_df, bootstrapped_t_stats = model.wildboottest( + param=param, + B=B, + return_bootstrapped_t_stats=True, + seed=seed, # all S iterations require the same bootstrap samples, hence seed needs to be reset + ) + t_stats[i] = wildboot_res_df["t value"] + boot_t_stats[:, i] = bootstrapped_t_stats + + pval = _get_rwolf_pval(t_stats, boot_t_stats) + + all_model_stats.loc["RW Pr(>|t|)"] = pval + all_model_stats.columns = [f"est{i}" for i, _ in enumerate(models)] + + return all_model_stats + + +def _get_rwolf_pval(t_stats, boot_t_stats): + """ + Compute Romano-Wolf adjusted p-values based on bootstrapped t-statistics. + + Parameters: + t_stats (np.ndarray): A vector of length S - where S is the number of + tested hypotheses - containing the original, + non-bootstrappe t-statisics. + boot_t_stats (np.ndarray): A (B x S) matrix containing the + bootstrapped t-statistics. + + Returns: + np.ndarray: A vector of Romano-Wolf corrected p-values. + """ + + t_stats = np.abs(t_stats) + boot_t_stats = np.abs(boot_t_stats) + + S = boot_t_stats.shape[1] + B = boot_t_stats.shape[0] + + pinit = corr_padj = pval = np.zeros(S) + stepdown_index = np.argsort(t_stats)[::-1] + ro = np.argsort(stepdown_index) + + for s in range(S): + if s == 0: + max_stat = np.max(boot_t_stats, axis=1) + pinit[s] = min( + 1, + (np.sum(max_stat >= np.abs(t_stats[stepdown_index[s]])) + 1) / (B + 1), + ) + else: + boot_t_stat_udp = np.delete(boot_t_stats, stepdown_index[:s], axis=1) + max_stat = np.max(boot_t_stat_udp, axis=1) + pinit[s] = min( + 1, + (np.sum(max_stat >= np.abs(t_stats[stepdown_index[s]])) + 1) / (B + 1), + ) + + for j in range(S): + if j == 0: + corr_padj[j] = pinit[j] + else: + corr_padj[j] = max(pinit[j], corr_padj[j - 1]) + + # Collect the results + pval = corr_padj[ro] + + return pval diff --git a/pyfixest/summarize.py b/pyfixest/summarize.py index b1b1c534..b2d17396 100644 --- a/pyfixest/summarize.py +++ b/pyfixest/summarize.py @@ -54,7 +54,12 @@ def etable( models = _post_processing_input_checks(models) assert digits >= 0, "digits must be a positive integer" - assert type in ["df", "tex", "md"], "type must be either 'df', 'md' or 'tex'" + assert type in [ + "df", + "tex", + "md", + "html", + ], "type must be either 'df', 'md', 'html' or 'tex'" dep_var_list = [] nobs_list = [] @@ -281,23 +286,32 @@ def _post_processing_input_checks(models): # check if models instance of Feols or Fepois if isinstance(models, (Feols, Fepois)): + models = [models] + else: + if isinstance(models, list): + for model in models: if not isinstance(model, (Feols, Fepois)): raise TypeError( - """ - The models argument must be either a list of Feols or Fepois instances, - a dict of Feols or Fepois instances, or simply a Feols or Fepois instance. + f""" + Each element of the passed list needs to be of type Feols or Fepois, but {type(model)} was passed. + If you want to summarize a FixestMulti object, please use FixestMulti.to_list() to convert it to a list of Feols or Fepois instances. """ ) - elif isinstance(models, dict): - for model in models.keys(): - if not isinstance(models[model], (Feols, Fepois)): - raise TypeError( - "The models argument must be a list of Feols or Fepois instances." - ) + + else: + + raise TypeError( + """ + The models argument must be either a list of Feols or Fepois instances, or + simply a single Feols or Fepois instance. The models argument does not accept instances + of type FixestMulti - please use models.to_list() to convert the FixestMulti + instance to a list of Feols or Fepois instances. + """ + ) return models diff --git a/pyfixest/utils.py b/pyfixest/utils.py index 6d492fd4..6533c4bb 100644 --- a/pyfixest/utils.py +++ b/pyfixest/utils.py @@ -1,6 +1,8 @@ import numpy as np import pandas as pd from formulaic import model_matrix +import sys +import time def ssc(adj=True, fixef_k="none", cluster_adj=True, cluster_df="min"): diff --git a/pyproject.toml b/pyproject.toml index b6a2c8e1..4dee6e95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pyfixest" -version = "0.15.1" +version = "0.15.2" description = "Fast high dimensional fixed effect estimation following syntax of the fixest R package. Supports OLS, IV and Poisson regression and a range of inference procedures. Additionally, experimentally supports (some of) the regression based new Difference-in-Differences Estimators (Did2s)." @@ -19,6 +19,7 @@ formulaic = ">=0.6.6,<1.0.0" lets-plot = ">=4.0.1" numba = ">=0.58.0" tabulate = ">=0.9.0" +tqdm = "^4.66.1" [tool.poetry.group.dev.dependencies] pytest=">=7.0.0" diff --git a/readme.md b/readme.md index c30bb30e..bc135e76 100644 --- a/readme.md +++ b/readme.md @@ -55,9 +55,35 @@ pip install git+https://github.com/s3alfisc/pyfixest.git ## News -`PyFixest` `0.13` adds support for the [local projections -Difference-in-Differences -Estimator](https://s3alfisc.github.io/pyfixest/difference-in-differences-estimation/). +`PyFixest` `0.15.2` adds support Romano-Wolf Corrected p-values: + +```py +from pyfixest.estimation import feols +from pyfixest.multcomp import rwolf +from pyfixest.utils import get_data + +rng = np.random.default_rng(12345) +data = get_data() +data["Y2"] = data["Y"] * rng.normal(0, 0.5, size=len(data)) +data["Y3"] = data["Y2"] + rng.normal(0, 0.5, size=len(data)) + +# test set 1 + +fit1 = feols("Y ~ X1", data=data) +fit2 = feols("Y2 ~ X1", data=data) +fit3 = feols("Y3 ~ X1", data=data) + +rwolf_df = rwolf([fit1, fit2, fit3], "X1", B=9999, seed=12345) +rwolf_df.round(3) +# est0 est1 est2 +# Estimate -1.000 0.027 0.011 +# Std. Error 0.085 0.046 0.050 +# t value -11.802 0.594 0.216 +# Pr(>|t|) 0.000 0.553 0.829 +# 2.5 % -1.166 -0.062 -0.088 +# 97.5 % -0.834 0.116 0.110 +# RW Pr(>|t|) 0.000 0.671 0.832 +``` ## Benchmarks @@ -85,18 +111,18 @@ feols("Y ~ X1 | f1 + f2", data=data).summary() ``` ### - + Estimation: OLS Dep. var.: Y, Fixed effects: f1+f2 Inference: CRV1 Observations: 997 - + | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5 % | 97.5 % | |:--------------|-----------:|-------------:|----------:|-----------:|--------:|---------:| | X1 | -0.919 | 0.065 | -14.057 | 0.000 | -1.053 | -0.786 | --- RMSE: 1.441 R2: 0.609 R2 Within: 0.2 - + ### Multiple Estimation @@ -133,7 +159,7 @@ etable([fit.fetch_model(i) for i in range(6)]) Observations 998 999 997 998 997 998 ----------------------------------------------------------------------------------------------------------------------------- Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001 - + @@ -149,19 +175,19 @@ fit1.vcov("hetero").summary() Model: Y~X1 ### - + Estimation: OLS Dep. var.: Y Inference: hetero Observations: 998 - + | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5 % | 97.5 % | |:--------------|-----------:|-------------:|----------:|-----------:|--------:|---------:| | Intercept | 0.919 | 0.112 | 8.223 | 0.000 | 0.699 | 1.138 | | X1 | -1.000 | 0.082 | -12.134 | 0.000 | -1.162 | -0.838 | --- RMSE: 2.158 R2: 0.123 - + ### Poisson Regression via `fepois()` @@ -174,19 +200,19 @@ fepois("Y ~ X1 + X2 | f1 + f2", data = poisson_data).summary() ``` ### - + Estimation: Poisson Dep. var.: Y, Fixed effects: f1+f2 Inference: CRV1 Observations: 997 - + | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5 % | 97.5 % | |:--------------|-----------:|-------------:|----------:|-----------:|--------:|---------:| | X1 | -0.008 | 0.035 | -0.239 | 0.811 | -0.076 | 0.060 | | X2 | -0.015 | 0.010 | -1.471 | 0.141 | -0.035 | 0.005 | --- Deviance: 1068.836 - + ### IV Estimation via three-part formulas @@ -200,14 +226,14 @@ fit_iv.summary() ``` ### - + Estimation: IV Dep. var.: Y, Fixed effects: f1 Inference: CRV1 Observations: 997 - + | Coefficient | Estimate | Std. Error | t value | Pr(>|t|) | 2.5 % | 97.5 % | |:--------------|-----------:|-------------:|----------:|-----------:|--------:|---------:| | X1 | -1.025 | 0.115 | -8.930 | 0.000 | -1.259 | -0.790 | --- - + diff --git a/tests/test_errors.py b/tests/test_errors.py index b6f718f4..f9f533c9 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,3 +1,4 @@ +from typing import Type import pytest import numpy as np import pandas as pd @@ -14,6 +15,8 @@ ) from pyfixest.estimation import feols, fepois from pyfixest.FormulaParser import FixestFormulaParser +from pyfixest.multcomp import rwolf +from pyfixest.summarize import etable, summary from pyfixest.summarize import etable @@ -185,6 +188,40 @@ def test_wls_errors(): feols("Y ~ X1", data=data, weights="weights", vcov="iid").wildboottest(B=999) +def test_multcomp_errors(): + + data = get_data().dropna() + + # param not in model + fit1 = feols("Y + Y2 ~ X1 | f1", data=data) + with pytest.raises(ValueError): + rwolf(fit1.to_list(), param="X2", B=999, seed=92) + + +def test_wildboottest_errors(): + + data = get_data() + fit = feols("Y ~ X1", data=data) + with pytest.raises(ValueError): + fit.wildboottest(param="X2", B=999, seed=213) + + +def test_summary_errors(): + + data = get_data() + fit1 = feols("Y + Y2 ~ X1 | f1", data=data) + fit2 = feols("Y ~ X1 + X2 | f1", data=data) + + with pytest.raises(TypeError): + etable(fit1) + with pytest.raises(TypeError): + etable([fit1, fit2]) + with pytest.raises(TypeError): + summary(fit1) + with pytest.raises(TypeError): + summary([fit1, fit2]) + + def test_errors_etable(): data = get_data() diff --git a/tests/test_multcomp.py b/tests/test_multcomp.py new file mode 100644 index 00000000..959c73e4 --- /dev/null +++ b/tests/test_multcomp.py @@ -0,0 +1,75 @@ +from pyfixest.estimation import feols +from pyfixest.utils import get_data +from pyfixest.multcomp import rwolf, _get_rwolf_pval +import numpy as np +import pandas as pd + +from rpy2.robjects.packages import importr +import rpy2.robjects as ro +from rpy2.robjects import pandas2ri + +pandas2ri.activate() + +fixest = importr("fixest") +wildrwolf = importr("wildrwolf") + + +def test_wildrwolf(): + + rng = np.random.default_rng(12345) + data = get_data() + data["Y2"] = data["Y"] * rng.normal(0, 0.5, size=len(data)) + data["Y3"] = data["Y2"] + rng.normal(0, 0.5, size=len(data)) + + # test set 1 + + fit1 = feols("Y ~ X1", data=data) + fit2 = feols("Y2 ~ X1", data=data) + fit3 = feols("Y3 ~ X1", data=data) + + rwolf_py = rwolf([fit1, fit2, fit3], "X1", B=9999, seed=12345) + + # R + fit_r = fixest.feols(ro.Formula("c(Y, Y2, Y3) ~ X1"), data=data) + rwolf_r = wildrwolf.rwolf(fit_r, param="X1", B=9999, seed=12345) + + np.testing.assert_allclose( + rwolf_py.iloc[6].values, + pd.DataFrame(rwolf_r).iloc[5].values.astype(float), + atol=1e-2, + rtol=np.inf, + ) + + # test set 2 + + fit1 = feols("Y ~ X1 | f1 + f2", data=data) + fit2 = feols("Y2 ~ X1 | f1 + f2", data=data) + fit3 = feols("Y3 ~ X1 | f1 + f2", data=data) + + rwolf_py = rwolf([fit1, fit2, fit3], "X1", B=9999, seed=12345) + + # R + fit_r = fixest.feols(ro.Formula("c(Y, Y2, Y3) ~ X1 | f1 + f2"), data=data) + rwolf_r = wildrwolf.rwolf(fit_r, param="X1", B=9999, seed=12345) + + np.testing.assert_allclose( + rwolf_py.iloc[6].values, + pd.DataFrame(rwolf_r).iloc[5].values.astype(float), + atol=1e-2, + rtol=np.inf, + ) + + +def test_stepwise_function(): + + B = 1000 + S = 5 + + rng = np.random.default_rng(33) + t_stat = rng.normal(0, 1, size=S) + t_boot = rng.normal(0, 1, size=(B, S)) + + stepwise_py = _get_rwolf_pval(t_stat, t_boot) + stepwise_r = wildrwolf.get_rwolf_pval(t_stat, t_boot) + + np.testing.assert_allclose(stepwise_py, stepwise_r) diff --git a/tests/test_summarise.py b/tests/test_summarise.py index 0cc4255a..74f00dd4 100644 --- a/tests/test_summarise.py +++ b/tests/test_summarise.py @@ -27,6 +27,9 @@ def test_summary(): fit_iv = feols("Y ~ X2 | f1 | X1 ~ Z1", data=df1) etable([fit_iv, fit1]) + fit_multi = feols("Y + Y2 ~ X1 + X2 | f1", data=df1) + etable(fit_multi.to_list()) + etable([fit1, fit2], signif_code=[0.01, 0.05, 0.1]) etable([fit1, fit2], signif_code=None) diff --git a/tests/test_visualize.py b/tests/test_visualize.py new file mode 100644 index 00000000..972626ed --- /dev/null +++ b/tests/test_visualize.py @@ -0,0 +1,26 @@ +from pyfixest.estimation import feols, fepois +from pyfixest.utils import get_data +from pyfixest.visualize import coefplot, iplot + + +def test_visualize(): + + data = get_data() + fit1 = feols("Y ~ X1 + X2 | f1", data=data) + coefplot(fit1) + + data_pois = get_data(model="Fepois") + fit2 = fepois("Y ~ X1 + X2 + f2 | f1", data=data_pois, vcov={"CRV1": "f1+f2"}) + coefplot(fit2) + + coefplot([fit1, fit2]) + + # FixestMulti + fit3 = feols("Y + Y2 ~ X1 + X2 | f1", data=data) + coefplot(fit3.to_list()) + + fit3.coefplot() + + # iplot + fit5 = feols("Y ~ i(f1)", data=data) + iplot(fit5)