Skip to content

Commit

Permalink
hypothesis=float
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentarelbundock committed Oct 1, 2023
1 parent 8dac66e commit faf8282
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 15 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dev

* `hypothesis` accepts a float or integer to specify a different null hypothesis.

# 0.0.5

* `predictions()` supports categorical predictors when `newdata` does not include all levels (internal padding).
Expand Down
5 changes: 3 additions & 2 deletions marginaleffects/comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .estimands import estimands
from .hypothesis import get_hypothesis
from .predictions import get_predictions
from .sanity import sanitize_newdata, sanitize_variables, sanitize_vcov, sanitize_by
from .sanity import sanitize_newdata, sanitize_variables, sanitize_vcov, sanitize_by, sanitize_hypothesis_null
from .transform import get_transform
from .uncertainty import get_jacobian, get_se, get_z_p_ci
from .utils import get_pad, sort_columns, upcast, get_modeldata
Expand Down Expand Up @@ -101,6 +101,7 @@ def comparisons(
V = sanitize_vcov(vcov, model)
newdata = sanitize_newdata(model, newdata=newdata, wts=wts, by=by)
modeldata = get_modeldata(model)
hypothesis_null = sanitize_hypothesis_null(hypothesis)

# after sanitize_newdata()
variables = sanitize_variables(
Expand Down Expand Up @@ -268,7 +269,7 @@ def outer(x):
J = get_jacobian(func=outer, coefs=model.params.to_numpy())
se = get_se(J, V)
out = out.with_columns(pl.Series(se).alias("std_error"))
out = get_z_p_ci(out, model, conf_level=conf_level)
out = get_z_p_ci(out, model, conf_level=conf_level, hypothesis_null=hypothesis_null)

out = get_transform(out, transform=transform)
out = get_equivalence(out, equivalence=equivalence, df=np.inf)
Expand Down
71 changes: 67 additions & 4 deletions marginaleffects/hypotheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,78 @@
import polars as pl

from .hypothesis import get_hypothesis
from .sanity import sanitize_vcov
from .sanity import sanitize_vcov, sanitize_hypothesis_null
from .uncertainty import get_jacobian, get_se, get_z_p_ci
from .equivalence import get_equivalence
from .utils import sort_columns
from .classes import MarginaleffectsDataFrame


def hypotheses(model, hypothesis=None, conf_level=0.95, vcov=True):
# sanity checks
def hypotheses(model, hypothesis=None, conf_level=0.95, vcov=True, equivalence=None):
"""
(Non-)Linear Tests for Null Hypotheses, Joint Hypotheses, Equivalence, Non Superiority, and Non Inferiority.
This function calculates uncertainty estimates as first-order approximate standard errors for linear or non-linear
functions of a vector of random variables with known or estimated covariance matrix. It emulates the behavior of
the excellent and well-established `car::deltaMethod` and `car::linearHypothesis` functions in R, but it supports
more models; requires fewer dependencies; expands the range of tests to equivalence and superiority/inferiority;
and offers convenience features like robust standard errors.
To learn more, visit the package website: <https://marginaleffects.com/>
Warning #1: Tests are conducted directly on the scale defined by the `type` argument. For some models, it can make
sense to conduct hypothesis or equivalence tests on the `"link"` scale instead of the `"response"` scale which is
often the default.
Warning #2: For hypothesis tests on objects produced by the `marginaleffects` package, it is safer to use the
`hypothesis` argument of the original function. Using `hypotheses()` may not work in certain environments, in lists,
or when working programmatically with *apply style functions.
Warning #3: The tests assume that the `hypothesis` expression is (approximately) normally distributed, which for
non-linear functions of the parameters may not be realistic. More reliable confidence intervals can be obtained using
the `inferences()` function with `method = "boot"`.
Parameters:
model : object
Model object estimated by `statsmodels`
hypothesis : str formula, int, float, or optional
The null hypothesis value. Default is None.
conf_level : float, optional
Confidence level for the hypothesis test. Default is 0.95.
vcov : bool, optional
Whether to use the covariance matrix in the hypothesis test. Default is True.
equivalence : tuple, optional
The equivalence range for the hypothesis test. Default is None.
Returns:
MarginaleffectsDataFrame
A DataFrame containing the results of the hypothesis tests.
Examples:
# When `hypothesis` is `None`, `hypotheses()` returns a DataFrame of parameters
hypotheses(model)
# A different null hypothesis
hypotheses(model, hypothesis = 3)
# Test of equality between coefficients
hypotheses(model, hypothesis="param1 = param2")
# Non-linear function
hypotheses(model, hypothesis="exp(param1 + param2) = 0.1")
# Robust standard errors
hypotheses(model, hypothesis="param1 = param2", vcov="HC3")
# Equivalence, non-inferiority, and non-superiority tests
hypotheses(model, equivalence=(0, 10))
"""

V = sanitize_vcov(vcov, model)

hypothesis_null = sanitize_hypothesis_null(hypothesis)

# estimands
def fun(x):
out = pl.DataFrame({"estimate": x})
Expand All @@ -23,7 +85,8 @@ def fun(x):
J = get_jacobian(fun, model.params.to_numpy())
se = get_se(J, V)
out = out.with_columns(pl.Series(se).alias("std_error"))
out = get_z_p_ci(out, model, conf_level=conf_level)
out = get_z_p_ci(out, model, conf_level=conf_level, hypothesis_null=hypothesis_null)
out = get_equivalence(out, equivalence=equivalence)
out = sort_columns(out, by=None)
out = MarginaleffectsDataFrame(out, conf_level=conf_level)
return out
6 changes: 4 additions & 2 deletions marginaleffects/hypothesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def eval_string_function(vec, hypothesis, rowlabels):

# function extracts the estimate column from a data frame and sets it to x. If `hypothesis` argument is a numpy array, it feeds it directly to lincome_multiply. If lincome is a string, it checks if the string is valid, and then calls the corresponding function.
def get_hypothesis(x, hypothesis):
msg = f"Invalid hypothesis argument: {hypothesis}. Valid arguments are: 'reference', 'revreference', 'sequential', 'revsequential', 'pairwise', 'revpairwise' or a numpy array."
if hypothesis is None:
msg = f"Invalid hypothesis argument: {hypothesis}. Valid arguments are: 'reference', 'revreference', 'sequential', 'revsequential', 'pairwise', 'revpairwise' or a numpy array or a float."

if hypothesis is None or isinstance(hypothesis, (int, float)):
return x
if isinstance(hypothesis, np.ndarray):
out = lincom_multiply(x, hypothesis)
Expand All @@ -70,6 +71,7 @@ def get_hypothesis(x, hypothesis):
elif hypothesis == "revpairwise":
hypmat = lincom_revpairwise(x)
else:

raise ValueError(msg)
out = lincom_multiply(x, hypmat.to_numpy())
out = out.with_columns(pl.Series(hypothesis.columns).alias("term"))
Expand Down
6 changes: 3 additions & 3 deletions marginaleffects/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .by import get_by
from .equivalence import get_equivalence
from .hypothesis import get_hypothesis
from .sanity import sanitize_newdata, sanitize_vcov, get_variables_names, sanitize_by
from .sanity import sanitize_newdata, sanitize_vcov, get_variables_names, sanitize_by, sanitize_hypothesis_null
from .transform import get_transform
from .uncertainty import get_jacobian, get_se, get_z_p_ci
from .utils import sort_columns, get_modeldata, get_pad, upcast
Expand Down Expand Up @@ -88,12 +88,12 @@ def predictions(
- conf_low: lower bound of the confidence interval (or equal-tailed interval for Bayesian models)
- conf_high: upper bound of the confidence interval (or equal-tailed interval for Bayesian models)
"""
pass

# sanity checks
by = sanitize_by(by)
V = sanitize_vcov(vcov, model)
newdata = sanitize_newdata(model, newdata, wts=wts, by=by)
hypothesis_null = sanitize_hypothesis_null(hypothesis)

# pad
modeldata = get_modeldata(model)
Expand Down Expand Up @@ -126,7 +126,7 @@ def fun(x):
J = get_jacobian(fun, model.params)
se = get_se(J, V)
out = out.with_columns(pl.Series(se).alias("std_error"))
out = get_z_p_ci(out, model, conf_level=conf_level)
out = get_z_p_ci(out, model, conf_level=conf_level, hypothesis_null=hypothesis_null)
out = get_transform(out, transform=transform)
out = get_equivalence(out, equivalence=equivalence)
out = sort_columns(out, by=by)
Expand Down
9 changes: 9 additions & 0 deletions marginaleffects/sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,12 @@ def sanitize_variables(variables, model, newdata, comparison, eps, by, wts=None)
out = [item for sublist in out for item in sublist]

return out



def sanitize_hypothesis_null(hypothesis):
if isinstance(hypothesis, (int, float)):
hypothesis_null = hypothesis
else:
hypothesis_null = 0
return hypothesis_null
9 changes: 6 additions & 3 deletions marginaleffects/uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ def get_se(J, V):
return se


def get_z_p_ci(df, model, conf_level):
def get_z_p_ci(df, model, conf_level, hypothesis_null=0):
if "std_error" not in df.columns:
return df
df = df.with_columns((pl.col("estimate") / pl.col("std_error")).alias("statistic"))
df = df.with_columns(
((pl.col("estimate") - float(hypothesis_null)) / pl.col("std_error")).alias("statistic")
)
if hasattr(model, "df_resid") and isinstance(model.df_resid, float):
dof = model.df_resid
else:
Expand All @@ -53,6 +55,7 @@ def get_z_p_ci(df, model, conf_level):
df = df.with_columns(
(pl.col("estimate") + critical_value * pl.col("std_error")).alias("conf_high")
)

df = df.with_columns(
pl.col("statistic")
.apply(lambda x: (2 * (1 - stats.t.cdf(np.abs(x), dof))))
Expand All @@ -66,4 +69,4 @@ def get_z_p_ci(df, model, conf_level):
)
except:
pass
return df
return df
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "marginaleffects"
version = "0.0.5.9000"
version = "0.0.5.9001"
description = ""
authors = ["Vincent Arel-Bundock <[email protected]>"]
readme = "README.md"
Expand Down
26 changes: 26 additions & 0 deletions tests/test_hypotheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

mod = smf.ols("Literacy ~ Pop1831 * Desertion", dat).fit()

avg_predictions(mod, by = "Region")

def test_coefs():
hyp_py = hypotheses(mod, hypothesis = np.array([1, -1, 0, 0]))
hyp_r = pl.read_csv("tests/r/test_hypotheses_coefs.csv")
Expand All @@ -21,3 +23,27 @@ def test_comparisons():
# absolute because the order of rows is different in R and Python `comparisons()` output
assert_series_equal(hyp_r["estimate"].abs(), hyp_py["estimate"].abs())
assert_series_equal(hyp_r["std.error"], hyp_py["std_error"], check_names = False)


def test_null_hypothesis():
# Test with hypothesis = 0
hyp_py_0 = hypotheses(mod, hypothesis=np.array([1, -1, 0, 0]), hypothesis_null=0)
hyp_r_0 = pl.read_csv("tests/r/test_hypotheses_coefs.csv")
assert_series_equal(hyp_r_0["estimate"], hyp_py_0["estimate"])
assert_series_equal(hyp_r_0["std.error"], hyp_py_0["std_error"], check_names=False)

# Test with hypothesis = 1
hyp_py_1 = hypotheses(mod, hypothesis=np.array([1, -1, 0, 0]), hypothesis_null=1)
hyp_r_1 = pl.read_csv("tests/r/test_hypotheses_coefs_hypothesis_1.csv")
assert_series_equal(hyp_r_1["estimate"], hyp_py_1["estimate"])
assert_series_equal(hyp_r_1["std.error"], hyp_py_1["std_error"], check_names=False)


def test_hypothesis_list():
# Hypothesis values from R
hypothesis_values = [0.4630551, -112.8876651, -10.6664417, -5384.2708089]
mod = smf.ols("Literacy ~ Pop1831 * Desertion", dat).fit()
hyp = hypotheses(mod, hypothesis=3)
assert np.allclose(hyp["statistic"], hypothesis_values)
hyp = hypotheses(mod, hypothesis=3.)
assert np.allclose(hyp["statistic"], hypothesis_values)

0 comments on commit faf8282

Please sign in to comment.