Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plot_predictions: tests and bugs #60

Merged
merged 7 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# dev

* New `eps_vcov` argument to control the step size in the computation of the Jacobian used for standard errors.
* Refactor and several bug fixes in the `plot_*()` functions.

# 0.0.6

* `hypothesis` accepts a float or integer to specify a different null hypothesis.
Expand Down
3 changes: 2 additions & 1 deletion marginaleffects/comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def comparisons(
equivalence=None,
transform=None,
eps=1e-4,
eps_vcov=None,
):
"""
`comparisons()` and `avg_comparisons()` are functions for predicting the outcome variable at different regressor values and comparing those predictions by computing a difference, ratio, or some other function. These functions can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.
Expand Down Expand Up @@ -275,7 +276,7 @@ def outer(x):
out = outer(model.coef)

if vcov is not None and vcov is not False:
J = get_jacobian(func=outer, coefs=model.coef)
J = get_jacobian(func=outer, coefs=model.coef, eps_vcov=eps_vcov)
se = get_se(J, V)
out = out.with_columns(pl.Series(se).alias("std_error"))
out = get_z_p_ci(
Expand Down
6 changes: 4 additions & 2 deletions marginaleffects/hypotheses.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from .utils import sort_columns


def hypotheses(model, hypothesis=None, conf_level=0.95, vcov=True, equivalence=None):
def hypotheses(
model, hypothesis=None, conf_level=0.95, vcov=True, equivalence=None, eps_vcov=None
):
"""
(Non-)Linear Tests for Null Hypotheses, Joint Hypotheses, Equivalence, Non Superiority, and Non Inferiority.

Expand Down Expand Up @@ -83,7 +85,7 @@ def fun(x):

out = fun(model.coef)
if vcov is not None:
J = get_jacobian(fun, model.coef)
J = get_jacobian(fun, model.coef, eps_vcov=eps_vcov)
se = get_se(J, V)
out = out.with_columns(pl.Series(se).alias("std_error"))
out = get_z_p_ci(
Expand Down
20 changes: 5 additions & 15 deletions marginaleffects/plot_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,14 @@ def dt_on_condition(model, condition):
modeldata[key], [0, 25, 50, 75, 100], method="midpoint"
).tolist()

elif variable_type == "boolean" or variable_type == "character":
to_datagrid[key] = modeldata[key].unique().to_list()
elif variable_type in ["boolean", "character", "binary"]:
to_datagrid[key] = modeldata[key].unique().sort().to_list()
assert (
len(to_datagrid[key]) <= 10
), f"Character type variables of more than 10 unique values are not supported. {key} variable has {len(to_datagrid[key])} unique values."

dt_code = "datagrid(newdata=modeldata"
for key, value in to_datagrid.items():
dt_code += ", " + key + "="
if isinstance(value, str):
dt_code += "'" + value + "'"
else:
dt_code += str(value)
dt_code += ")"

# TODO: this is weird. I'd prefer someting more standard than evaluating text
exec("global dt; dt = " + dt_code)

to_datagrid["newdata"] = modeldata
dt = datagrid(**to_datagrid)
return dt # noqa: F821


Expand All @@ -99,7 +89,7 @@ def plotter(dt, x_name, x_type, fig=None, axe=None, label=None, color=None):
plot_obj.fill_between(x, y_low, y_high, color=color, alpha=0.2)
plot_obj.plot(x, y, color=color, label=label)

elif x_type == "character" or x_type == "boolean":
elif x_type in ["character", "binary", "boolean"]:
y_low = np.absolute(y - y_low)
y_high = np.absolute(y_high - y)
if color is None:
Expand Down
94 changes: 36 additions & 58 deletions marginaleffects/plot_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,64 +85,42 @@ def plot_comparisons(
wts is not None and not by
), "The `wts` argument requires a `by` argument."

if by:
if newdata is not None:
dt = comparisons(
model,
variables=variables,
newdata=newdata,
comparison=comparison,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
hypothesis=hypothesis,
equivalence=equivalence,
transform=transform,
eps=eps,
)
else:
dt = comparisons(
model,
variables=variables,
comparison=comparison,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
hypothesis=hypothesis,
equivalence=equivalence,
transform=transform,
eps=eps,
)

var_list = [by] if isinstance(by, str) else by

elif condition is not None:
dt_condition = dt_on_condition(model, condition)
if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
dt = comparisons(
model,
variables=variables,
newdata=dt_condition,
comparison=comparison,
vcov=vcov,
conf_level=conf_level,
by=var_list,
wts=wts,
hypothesis=hypothesis,
equivalence=equivalence,
transform=transform,
eps=eps,
)

dt = dt.drop_nulls(var_list[0])
dt = dt.sort(var_list[0])
if condition is not None:
newdata = dt_on_condition(model, condition)

dt = comparisons(
model,
variables=variables,
newdata=newdata,
comparison=comparison,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
hypothesis=hypothesis,
equivalence=equivalence,
transform=transform,
eps=eps,
)

if not draw:
return dt

if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
elif isinstance(by, str):
var_list = [by]
elif isinstance(by, list):
var_list = by
elif isinstance(by, dict):
var_list = list(by.keys())

# not sure why these get appended
var_list = [x for x in var_list if x not in ["newdata", "model"]]

if not draw:
return dt
Expand Down
76 changes: 31 additions & 45 deletions marginaleffects/plot_predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,58 +68,44 @@ def plot_predictions(
not by and newdata is not None
), "The `newdata` argument requires a `by` argument."

assert (condition is None and by) or (
condition is not None and not by
), "One of the `condition` and `by` arguments must be supplied, but not both."

assert not (
wts is not None and not by
), "The `wts` argument requires a `by` argument."

if by:
if newdata is not None:
dt = predictions(
model,
by=by,
newdata=newdata,
conf_level=conf_level,
vcov=vcov,
transform=transform,
wts=wts,
)
else:
dt = predictions(
model,
by=by,
conf_level=conf_level,
vcov=vcov,
transform=transform,
wts=wts,
)

var_list = [by] if isinstance(by, str) else by
assert not (
condition is None and by is None
), "One of the `condition` and `by` arguments must be supplied, but not both."

if condition is not None:
dt_condition = dt_on_condition(model, condition)
if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
dt = predictions(
model,
by=var_list,
newdata=dt_condition,
conf_level=conf_level,
vcov=vcov,
transform=transform,
)

dt = dt.drop_nulls(var_list[0])
dt = dt.sort(var_list[0])
newdata = dt_on_condition(model, condition)

dt = predictions(
model,
by=by,
newdata=newdata,
conf_level=conf_level,
vcov=vcov,
transform=transform,
wts=wts,
)

if not draw:
return dt

return plot_common(dt, model.response_name, var_list)
if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
elif isinstance(by, str):
var_list = [by]
elif isinstance(by, list):
var_list = by
elif isinstance(by, dict):
var_list = list(by.keys())

# not sure why these get appended
var_list = [x for x in var_list if x not in ["newdata", "model"]]

return plot_common(dt, model.response_name, var_list=var_list)
91 changes: 45 additions & 46 deletions marginaleffects/plot_slopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def plot_slopes(
by=False,
wts=None,
draw=True,
eps=1e-4,
eps_vcov=None,
):
"""
Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).
Expand Down Expand Up @@ -77,54 +79,51 @@ def plot_slopes(
wts is not None and not by
), "The `wts` argument requires a `by` argument."

if by:
if newdata is not None:
dt = slopes(
model,
variables=variables,
newdata=newdata,
slope=slope,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
)
else:
dt = slopes(
model,
variables=variables,
slope=slope,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
)

var_list = [by] if isinstance(by, str) else by

elif condition is not None:
dt_condition = dt_on_condition(model, condition)
if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
dt = slopes(
model,
variables=variables,
newdata=dt_condition,
slope=slope,
vcov=vcov,
conf_level=conf_level,
by=var_list,
wts=wts,
)

dt = dt.drop_nulls(var_list[0])
dt = dt.sort(var_list[0])
assert not (
not by and newdata is not None
), "The `newdata` argument requires a `by` argument."

assert not (
wts is not None and not by
), "The `wts` argument requires a `by` argument."

assert not (
condition is None and by is None
), "One of the `condition` and `by` arguments must be supplied, but not both."

if condition is not None:
newdata = dt_on_condition(model, condition)

dt = slopes(
model,
variables=variables,
newdata=newdata,
slope=slope,
vcov=vcov,
conf_level=conf_level,
by=by,
wts=wts,
eps=eps,
eps_vcov=eps_vcov,
)

if not draw:
return dt

if isinstance(condition, str):
var_list = [condition]
elif isinstance(condition, list):
var_list = condition
elif isinstance(condition, dict):
var_list = list(condition.keys())
elif isinstance(by, str):
var_list = [by]
elif isinstance(by, list):
var_list = by
elif isinstance(by, dict):
var_list = list(by.keys())

# not sure why these get appended
var_list = [x for x in var_list if x not in ["newdata", "model"]]

return plot_common(dt, "Slope", var_list)
Loading
Loading