vincentarelbundock · vincentarelbundock · Dec 26, 2023 · Dec 25, 2023 · Dec 25, 2023 · Dec 25, 2023
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# dev
+
+* New `eps_vcov` argument to control the step size in the computation of the Jacobian used for standard errors.
+* Refactor and several bug fixes in the `plot_*()` functions.
+
 # 0.0.6
 
 * `hypothesis` accepts a float or integer to specify a different null hypothesis.

diff --git a/marginaleffects/comparisons.py b/marginaleffects/comparisons.py
@@ -35,6 +35,7 @@ def comparisons(
     equivalence=None,
     transform=None,
     eps=1e-4,
+    eps_vcov=None,
 ):
     """
     `comparisons()` and `avg_comparisons()` are functions for predicting the outcome variable at different regressor values and comparing those predictions by computing a difference, ratio, or some other function. These functions can return many quantities of interest, such as contrasts, differences, risk ratios, changes in log odds, lift, slopes, elasticities, etc.
@@ -275,7 +276,7 @@ def outer(x):
     out = outer(model.coef)
 
     if vcov is not None and vcov is not False:
-        J = get_jacobian(func=outer, coefs=model.coef)
+        J = get_jacobian(func=outer, coefs=model.coef, eps_vcov=eps_vcov)
         se = get_se(J, V)
         out = out.with_columns(pl.Series(se).alias("std_error"))
         out = get_z_p_ci(

diff --git a/marginaleffects/hypotheses.py b/marginaleffects/hypotheses.py
@@ -9,7 +9,9 @@
 from .utils import sort_columns
 
 
-def hypotheses(model, hypothesis=None, conf_level=0.95, vcov=True, equivalence=None):
+def hypotheses(
+    model, hypothesis=None, conf_level=0.95, vcov=True, equivalence=None, eps_vcov=None
+):
     """
     (Non-)Linear Tests for Null Hypotheses, Joint Hypotheses, Equivalence, Non Superiority, and Non Inferiority.
 
@@ -83,7 +85,7 @@ def fun(x):
 
     out = fun(model.coef)
     if vcov is not None:
-        J = get_jacobian(fun, model.coef)
+        J = get_jacobian(fun, model.coef, eps_vcov=eps_vcov)
         se = get_se(J, V)
         out = out.with_columns(pl.Series(se).alias("std_error"))
         out = get_z_p_ci(

diff --git a/marginaleffects/plot_common.py b/marginaleffects/plot_common.py
@@ -58,24 +58,14 @@ def dt_on_condition(model, condition):
                     modeldata[key], [0, 25, 50, 75, 100], method="midpoint"
                 ).tolist()
 
-        elif variable_type == "boolean" or variable_type == "character":
-            to_datagrid[key] = modeldata[key].unique().to_list()
+        elif variable_type in ["boolean", "character", "binary"]:
+            to_datagrid[key] = modeldata[key].unique().sort().to_list()
             assert (
                 len(to_datagrid[key]) <= 10
             ), f"Character type variables of more than 10 unique values are not supported. {key} variable has {len(to_datagrid[key])} unique values."
 
-    dt_code = "datagrid(newdata=modeldata"
-    for key, value in to_datagrid.items():
-        dt_code += ", " + key + "="
-        if isinstance(value, str):
-            dt_code += "'" + value + "'"
-        else:
-            dt_code += str(value)
-    dt_code += ")"
-
-    # TODO: this is weird. I'd prefer someting more standard than evaluating text
-    exec("global dt; dt = " + dt_code)
-
+    to_datagrid["newdata"] = modeldata
+    dt = datagrid(**to_datagrid)
     return dt  # noqa: F821
 
 
@@ -99,7 +89,7 @@ def plotter(dt, x_name, x_type, fig=None, axe=None, label=None, color=None):
             plot_obj.fill_between(x, y_low, y_high, color=color, alpha=0.2)
             plot_obj.plot(x, y, color=color, label=label)
 
-    elif x_type == "character" or x_type == "boolean":
+    elif x_type in ["character", "binary", "boolean"]:
         y_low = np.absolute(y - y_low)
         y_high = np.absolute(y_high - y)
         if color is None:

diff --git a/marginaleffects/plot_comparisons.py b/marginaleffects/plot_comparisons.py
@@ -85,64 +85,42 @@ def plot_comparisons(
         wts is not None and not by
     ), "The `wts` argument requires a `by` argument."
 
-    if by:
-        if newdata is not None:
-            dt = comparisons(
-                model,
-                variables=variables,
-                newdata=newdata,
-                comparison=comparison,
-                vcov=vcov,
-                conf_level=conf_level,
-                by=by,
-                wts=wts,
-                hypothesis=hypothesis,
-                equivalence=equivalence,
-                transform=transform,
-                eps=eps,
-            )
-        else:
-            dt = comparisons(
-                model,
-                variables=variables,
-                comparison=comparison,
-                vcov=vcov,
-                conf_level=conf_level,
-                by=by,
-                wts=wts,
-                hypothesis=hypothesis,
-                equivalence=equivalence,
-                transform=transform,
-                eps=eps,
-            )
-
-        var_list = [by] if isinstance(by, str) else by
-
-    elif condition is not None:
-        dt_condition = dt_on_condition(model, condition)
-        if isinstance(condition, str):
-            var_list = [condition]
-        elif isinstance(condition, list):
-            var_list = condition
-        elif isinstance(condition, dict):
-            var_list = list(condition.keys())
-        dt = comparisons(
-            model,
-            variables=variables,
-            newdata=dt_condition,
-            comparison=comparison,
-            vcov=vcov,
-            conf_level=conf_level,
-            by=var_list,
-            wts=wts,
-            hypothesis=hypothesis,
-            equivalence=equivalence,
-            transform=transform,
-            eps=eps,
-        )
-
-    dt = dt.drop_nulls(var_list[0])
-    dt = dt.sort(var_list[0])
+    if condition is not None:
+        newdata = dt_on_condition(model, condition)
+
+    dt = comparisons(
+        model,
+        variables=variables,
+        newdata=newdata,
+        comparison=comparison,
+        vcov=vcov,
+        conf_level=conf_level,
+        by=by,
+        wts=wts,
+        hypothesis=hypothesis,
+        equivalence=equivalence,
+        transform=transform,
+        eps=eps,
+    )
+
+    if not draw:
+        return dt
+
+    if isinstance(condition, str):
+        var_list = [condition]
+    elif isinstance(condition, list):
+        var_list = condition
+    elif isinstance(condition, dict):
+        var_list = list(condition.keys())
+    elif isinstance(by, str):
+        var_list = [by]
+    elif isinstance(by, list):
+        var_list = by
+    elif isinstance(by, dict):
+        var_list = list(by.keys())
+
+    # not sure why these get appended
+    var_list = [x for x in var_list if x not in ["newdata", "model"]]
 
     if not draw:
         return dt

diff --git a/marginaleffects/plot_predictions.py b/marginaleffects/plot_predictions.py
@@ -68,58 +68,44 @@ def plot_predictions(
         not by and newdata is not None
     ), "The `newdata` argument requires a `by` argument."
 
-    assert (condition is None and by) or (
-        condition is not None and not by
-    ), "One of the `condition` and `by` arguments must be supplied, but not both."
-
     assert not (
         wts is not None and not by
     ), "The `wts` argument requires a `by` argument."
 
-    if by:
-        if newdata is not None:
-            dt = predictions(
-                model,
-                by=by,
-                newdata=newdata,
-                conf_level=conf_level,
-                vcov=vcov,
-                transform=transform,
-                wts=wts,
-            )
-        else:
-            dt = predictions(
-                model,
-                by=by,
-                conf_level=conf_level,
-                vcov=vcov,
-                transform=transform,
-                wts=wts,
-            )
-
-        var_list = [by] if isinstance(by, str) else by
+    assert not (
+        condition is None and by is None
+    ), "One of the `condition` and `by` arguments must be supplied, but not both."
 
     if condition is not None:
-        dt_condition = dt_on_condition(model, condition)
-        if isinstance(condition, str):
-            var_list = [condition]
-        elif isinstance(condition, list):
-            var_list = condition
-        elif isinstance(condition, dict):
-            var_list = list(condition.keys())
-        dt = predictions(
-            model,
-            by=var_list,
-            newdata=dt_condition,
-            conf_level=conf_level,
-            vcov=vcov,
-            transform=transform,
-        )
-
-    dt = dt.drop_nulls(var_list[0])
-    dt = dt.sort(var_list[0])
+        newdata = dt_on_condition(model, condition)
+
+    dt = predictions(
+        model,
+        by=by,
+        newdata=newdata,
+        conf_level=conf_level,
+        vcov=vcov,
+        transform=transform,
+        wts=wts,
+    )
 
     if not draw:
         return dt
 
-    return plot_common(dt, model.response_name, var_list)
+    if isinstance(condition, str):
+        var_list = [condition]
+    elif isinstance(condition, list):
+        var_list = condition
+    elif isinstance(condition, dict):
+        var_list = list(condition.keys())
+    elif isinstance(by, str):
+        var_list = [by]
+    elif isinstance(by, list):
+        var_list = by
+    elif isinstance(by, dict):
+        var_list = list(by.keys())
+
+    # not sure why these get appended
+    var_list = [x for x in var_list if x not in ["newdata", "model"]]
+
+    return plot_common(dt, model.response_name, var_list=var_list)
diff --git a/marginaleffects/plot_slopes.py b/marginaleffects/plot_slopes.py
@@ -14,6 +14,8 @@ def plot_slopes(
     by=False,
     wts=None,
     draw=True,
+    eps=1e-4,
+    eps_vcov=None,
 ):
     """
     Plot slopes on the y-axis against values of one or more predictors (x-axis, colors/shapes, and facets).
@@ -77,54 +79,51 @@ def plot_slopes(
         wts is not None and not by
     ), "The `wts` argument requires a `by` argument."
 
-    if by:
-        if newdata is not None:
-            dt = slopes(
-                model,
-                variables=variables,
-                newdata=newdata,
-                slope=slope,
-                vcov=vcov,
-                conf_level=conf_level,
-                by=by,
-                wts=wts,
-            )
-        else:
-            dt = slopes(
-                model,
-                variables=variables,
-                slope=slope,
-                vcov=vcov,
-                conf_level=conf_level,
-                by=by,
-                wts=wts,
-            )
-
-        var_list = [by] if isinstance(by, str) else by
-
-    elif condition is not None:
-        dt_condition = dt_on_condition(model, condition)
-        if isinstance(condition, str):
-            var_list = [condition]
-        elif isinstance(condition, list):
-            var_list = condition
-        elif isinstance(condition, dict):
-            var_list = list(condition.keys())
-        dt = slopes(
-            model,
-            variables=variables,
-            newdata=dt_condition,
-            slope=slope,
-            vcov=vcov,
-            conf_level=conf_level,
-            by=var_list,
-            wts=wts,
-        )
-
-    dt = dt.drop_nulls(var_list[0])
-    dt = dt.sort(var_list[0])
+    assert not (
+        not by and newdata is not None
+    ), "The `newdata` argument requires a `by` argument."
+
+    assert not (
+        wts is not None and not by
+    ), "The `wts` argument requires a `by` argument."
+
+    assert not (
+        condition is None and by is None
+    ), "One of the `condition` and `by` arguments must be supplied, but not both."
+
+    if condition is not None:
+        newdata = dt_on_condition(model, condition)
+
+    dt = slopes(
+        model,
+        variables=variables,
+        newdata=newdata,
+        slope=slope,
+        vcov=vcov,
+        conf_level=conf_level,
+        by=by,
+        wts=wts,
+        eps=eps,
+        eps_vcov=eps_vcov,
+    )
 
     if not draw:
         return dt
 
+    if isinstance(condition, str):
+        var_list = [condition]
+    elif isinstance(condition, list):
+        var_list = condition
+    elif isinstance(condition, dict):
+        var_list = list(condition.keys())
+    elif isinstance(by, str):
+        var_list = [by]
+    elif isinstance(by, list):
+        var_list = by
+    elif isinstance(by, dict):
+        var_list = list(by.keys())
+
+    # not sure why these get appended
+    var_list = [x for x in var_list if x not in ["newdata", "model"]]
+
     return plot_common(dt, "Slope", var_list)