From aede5c16e2d92695b7863b857ce20e223218a72b Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 26 Jun 2020 18:23:07 +0100
Subject: [PATCH 01/39] #1031 start to reformat jax_bdf_integrate to fit with
 jax custom_vjp code

---
 pybamm/solvers/jax_bdf_solver.py | 251 ++++++++++++++++++++++---------
 pybamm/solvers/jax_solver.py     |   8 +-
 2 files changed, 182 insertions(+), 77 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 079307407a..7dc49666aa 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -1,11 +1,27 @@
+from functools import partial
+import operator as op
+
 import jax
-import jax.numpy as np
+import jax.numpy as jnp
+from jax import core
+from jax import lax
+from jax import ops
+from jax.util import safe_map, safe_zip, cache, split_list
+from jax.api_util import flatten_fun_nokwargs
+from jax.flatten_util import ravel_pytree
+from jax.tree_util import tree_map, tree_flatten, tree_unflatten
+from jax.interpreters import partial_eval as pe
+from jax import linear_util as lu
+
+map = safe_map
+zip = safe_zip
+
 
 from jax.config import config
 config.update("jax_enable_x64", True)
 
 
-def jax_bdf_integrate(fun, y0, t_eval, jac=None, inputs=None, rtol=1e-6, atol=1e-6):
+def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
     """
     Backward Difference formula (BDF) implicit multistep integrator. The basic algorithm
     is derived in [2]_. This particular implementation follows that implemented in the
@@ -18,19 +34,17 @@ def jax_bdf_integrate(fun, y0, t_eval, jac=None, inputs=None, rtol=1e-6, atol=1e
     Parameters
     ----------
 
-    fun: callable
-        function with signature (t, y, in), where t is a scalar time, y is a ndarray
-        with shape (n,), in is a dict of input parameters. Returns the rhs of the system
-        of ODE equations as an nd array with shape (n,)
+    func: callable
+        function to evaluate the time derivative of the solution `y` at time
+        `t` as `func(y, t, *args)`, producing the same shape/structure as `y0`.
     y0: ndarray
         initial state vector
     t_eval: ndarray
         time points to evaluate the solution, has shape (m,)
-    jac: (optional) callable
-        function with signature (t, y, in),returns the jacobian matrix of fun as an
-        ndarray with shape (n,n)
-    inputs: (optional) dict
-        dict mapping input parameter names to values
+    args: (optional)
+        tuple of additional arguments for `fun`, which must be arrays
+        scalars, or (nested) standard Python containers (tuples, lists, dicts,
+        namedtuples, i.e. pytrees) of those types.
     rtol: (optional) float
         relative tolerance for the solver
     atol: (optional) float
@@ -56,11 +70,17 @@ def jax_bdf_integrate(fun, y0, t_eval, jac=None, inputs=None, rtol=1e-6, atol=1e
            fundamental algorithms for scientific computing in Python.
            Nature methods, 17(3), 261-272.
     """
+    def _check_arg(arg):
+        if not isinstance(arg, core.Tracer) and not core.valid_jaxtype(arg):
+            msg = ("The contents of odeint *args must be arrays or scalars, but got "
+                   "\n{}.")
+        raise TypeError(msg.format(arg))
 
-    y0_device = jax.device_put(y0).reshape(-1)
-    t_eval_device = jax.device_put(t_eval)
-    y_out, stepper = _bdf_odeint(fun, jac, rtol, atol, y0_device, t_eval_device, inputs)
-    return y_out, stepper
+    flat_args, in_tree = tree_flatten((y0, t_eval[0], *args))
+    in_avals = tuple(map(abstractify, flat_args))
+    converted, consts = closure_convert(func, in_tree, in_avals)
+
+    return _bdf_odeint_wrapper(converted, rtol, atol, y0, t_eval, *consts, *args)
 
 
 MAX_ORDER = 5
@@ -111,13 +131,13 @@ def _compute_R(order, factor):
     Note that the U matrix also defined in the same section can be also be
     found using factor = 1, which corresponds to R with a constant step size
     """
-    I = np.arange(1, MAX_ORDER + 1).reshape(-1, 1)
-    J = np.arange(1, MAX_ORDER + 1)
-    M = np.empty((MAX_ORDER + 1, MAX_ORDER + 1))
+    I = jnp.arange(1, MAX_ORDER + 1).reshape(-1, 1)
+    J = jnp.arange(1, MAX_ORDER + 1)
+    M = jnp.empty((MAX_ORDER + 1, MAX_ORDER + 1))
     M = jax.ops.index_update(M, jax.ops.index[1:, 1:],
                              (I - 1 - factor * J) / I)
     M = jax.ops.index_update(M, jax.ops.index[0], 1)
-    R = np.cumprod(M, axis=0)
+    R = jnp.cumprod(M, axis=0)
 
     return R
 
@@ -160,25 +180,25 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     order = 1
     state['order'] = order
     state['h'] = _select_initial_step(state, fun, t0, y0, f0, h0)
-    EPS = np.finfo(y0.dtype).eps
-    state['newton_tol'] = np.max((10 * EPS / rtol, np.min((0.03, rtol ** 0.5))))
+    EPS = jnp.finfo(y0.dtype).eps
+    state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
     state['n_equal_steps'] = 0
-    D = np.empty((MAX_ORDER + 1, len(y0)), dtype=y0.dtype)
+    D = jnp.empty((MAX_ORDER + 1, len(y0)), dtype=y0.dtype)
     D = jax.ops.index_update(D, jax.ops.index[0, :], y0)
     D = jax.ops.index_update(D, jax.ops.index[1, :], f0 * h0)
     state['D'] = D
     state['y0'] = None
     state['scale_y0'] = None
     state = _predict(state)
-    I = np.identity(len(y0), dtype=y0.dtype)
+    I = jnp.identity(len(y0), dtype=y0.dtype)
     state['I'] = I
 
     # kappa values for difference orders, taken from Table 1 of [1]
-    kappa = np.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
-    gamma = np.hstack((0, np.cumsum(1 / np.arange(1, MAX_ORDER + 1))))
+    kappa = jnp.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
+    gamma = jnp.hstack((0, jnp.cumsum(1 / jnp.arange(1, MAX_ORDER + 1))))
     alpha = 1.0 / ((1 - kappa) * gamma)
     c = h0 * alpha[order]
-    error_const = kappa * gamma + 1 / np.arange(1, MAX_ORDER + 2)
+    error_const = kappa * gamma + 1 / jnp.arange(1, MAX_ORDER + 2)
 
     state['kappa'] = kappa
     state['gamma'] = gamma
@@ -214,13 +234,13 @@ def _select_initial_step(state, fun, t0, y0, f0, h0):
     .. [1] E. Hairer, S. P. Norsett G. Wanner, "Solving Ordinary Differential
                Equations I: Nonstiff Problems", Sec. II.4.
     """
-    scale = state['atol'] + np.abs(y0) * state['rtol']
+    scale = state['atol'] + jnp.abs(y0) * state['rtol']
     y1 = y0 + h0 * f0
     f1 = fun(t0 + h0, y1)
-    d2 = np.sqrt(np.mean(((f1 - f0) / scale)**2))
+    d2 = jnp.sqrt(jnp.mean(((f1 - f0) / scale)**2))
     order = 1
     h1 = h0 * d2 ** (-1 / (order + 1))
-    return np.min((100 * h0, h1))
+    return jnp.min((100 * h0, h1))
 
 
 def _predict(state):
@@ -229,10 +249,10 @@ def _predict(state):
     """
     n = len(state['y'])
     order = state['order']
-    orders = np.repeat(np.arange(MAX_ORDER + 1).reshape(-1, 1), n, axis=1)
-    subD = np.where(orders <= order, state['D'], 0)
-    state['y0'] = np.sum(subD, axis=0)
-    state['scale_y0'] = state['atol'] + state['rtol'] * np.abs(state['y0'])
+    orders = jnp.repeat(jnp.arange(MAX_ORDER + 1).reshape(-1, 1), n, axis=1)
+    subD = jnp.where(orders <= order, state['D'], 0)
+    state['y0'] = jnp.sum(subD, axis=0)
+    state['scale_y0'] = state['atol'] + state['rtol'] * jnp.abs(state['y0'])
     return state
 
 
@@ -242,11 +262,11 @@ def _update_psi(state):
     """
     order = state['order']
     n = len(state['y'])
-    orders = np.arange(MAX_ORDER + 1)
-    subGamma = np.where(orders > 0, np.where(orders <= order, state['gamma'], 0), 0)
-    orders = np.repeat(orders.reshape(-1, 1), n, axis=1)
-    subD = np.where(orders > 0, np.where(orders <= order, state['D'], 0), 0)
-    state['psi'] = np.dot(
+    orders = jnp.arange(MAX_ORDER + 1)
+    subGamma = jnp.where(orders > 0, jnp.where(orders <= order, state['gamma'], 0), 0)
+    orders = jnp.repeat(orders.reshape(-1, 1), n, axis=1)
+    subD = jnp.where(orders > 0, jnp.where(orders <= order, state['D'], 0), 0)
+    state['psi'] = jnp.dot(
         subD.T,
         subGamma
     ) * state['alpha'][order]
@@ -337,16 +357,16 @@ def update_lu(state):
 
     # update D using equations in section 3.2 of [1]
     RU = _compute_R(order, factor).dot(state['U'])
-    I = np.arange(0, MAX_ORDER + 1).reshape(-1, 1)
-    J = np.arange(0, MAX_ORDER + 1)
+    I = jnp.arange(0, MAX_ORDER + 1).reshape(-1, 1)
+    J = jnp.arange(0, MAX_ORDER + 1)
 
     # only update order+1, order+1 entries of D
-    RU = np.where(np.logical_and(I <= order, J <= order),
-                  RU, np.identity(MAX_ORDER + 1))
+    RU = jnp.where(jnp.logical_and(I <= order, J <= order),
+                  RU, jnp.identity(MAX_ORDER + 1))
     D = state['D']
-    D = np.dot(RU.T, D)
+    D = jnp.dot(RU.T, D)
     # D = jax.ops.index_update(D, jax.ops.index[:order + 1],
-    #                         np.dot(RU.T, D[:order + 1]))
+    #                         jnp.dot(RU.T, D[:order + 1]))
     state['D'] = D
 
     # update psi (D has changed)
@@ -379,8 +399,8 @@ def _newton_iteration(state, fun):
     LU = state['LU']
     scale_y0 = state['scale_y0']
     t = state['t'] + state['h']
-    d = np.zeros_like(y0)
-    y = np.array(y0, copy=True)
+    d = jnp.zeros_like(y0)
+    y = jnp.array(y0, copy=True)
 
     not_converged = True
     dy_norm_old = -1.0
@@ -397,7 +417,7 @@ def while_body(while_state):
         state['n_function_evals'] += 1
         b = c * f_eval - psi - d
         dy = jax.scipy.linalg.lu_solve(LU, b)
-        dy_norm = np.sqrt(np.mean((dy / scale_y0)**2))
+        dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
         rate = dy_norm / dy_norm_old
 
         # if iteration is not going to converge in NEWTON_MAXITER
@@ -443,8 +463,8 @@ def _bdf_step(state, fun, jac):
     # initialise step size and try to make the step,
     # iterate, reducing step size until error is in bounds
     step_accepted = False
-    y = np.empty_like(state['y'])
-    d = np.empty_like(state['y'])
+    y = jnp.empty_like(state['y'])
+    d = jnp.empty_like(state['y'])
     n_iter = -1
 
     # loop until step is accepted
@@ -486,13 +506,13 @@ def need_to_update_step_size(if_state2):
             def converged(if_state2):
                 state, step_accepted = if_state2
                 # yay, converged, now check error is within bounds
-                scale_y = state['atol'] + state['rtol'] * np.abs(y)
+                scale_y = state['atol'] + state['rtol'] * jnp.abs(y)
 
                 # combine eq 3, 4 and 6 from [1] to obtain error
                 # Note that error = C_k * h^{k+1} y^{k+1}
                 # and d = D^{k+1} y_{n+1} \approx h^{k+1} y^{k+1}
                 error = state['error_const'][state['order']] * d
-                error_norm = np.sqrt(np.mean((error / scale_y)**2))
+                error_norm = jnp.sqrt(jnp.mean((error / scale_y)**2))
 
                 # calculate safety outside if since we will reuse later
                 safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
@@ -505,7 +525,7 @@ def error_too_large(if_state3):
                     state, step_accepted = if_state3
                     state['n_error_test_failures'] += 1
                     # calculate optimal step size factor as per eq 2.46 of [2]
-                    factor = np.max((MIN_FACTOR,
+                    factor = jnp.max((MIN_FACTOR,
                                      safety *
                                      error_norm ** (-1 / (state['order'] + 1))))
                     state = _update_step_size(state, factor, False)
@@ -560,9 +580,9 @@ def order_change(if_state):
         order = state['order']
 
         # Note: we are recalculating these from the while loop above, could re-use?
-        scale_y = state['atol'] + state['rtol'] * np.abs(y)
+        scale_y = state['atol'] + state['rtol'] * jnp.abs(y)
         error = state['error_const'][order] * d
-        error_norm = np.sqrt(np.mean((error / scale_y)**2))
+        error_norm = jnp.sqrt(jnp.mean((error / scale_y)**2))
         safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
                                                    + n_iter)
 
@@ -577,11 +597,11 @@ def order_change(if_state):
         def order_greater_one(if_state2):
             state, scale_y, order = if_state2
             error_m = state['error_const'][order - 1] * state['D'][order]
-            error_m_norm = np.sqrt(np.mean((error_m / scale_y)**2))
+            error_m_norm = jnp.sqrt(jnp.mean((error_m / scale_y)**2))
             return error_m_norm
 
         def order_equal_one(if_state2):
-            error_m_norm = np.inf
+            error_m_norm = jnp.inf
             return error_m_norm
 
         error_m_norm = jax.lax.cond(order > 1,
@@ -591,27 +611,27 @@ def order_equal_one(if_state2):
         def order_less_max(if_state2):
             state, scale_y, order = if_state2
             error_p = state['error_const'][order + 1] * state['D'][order + 2]
-            error_p_norm = np.sqrt(np.mean((error_p / scale_y)**2))
+            error_p_norm = jnp.sqrt(jnp.mean((error_p / scale_y)**2))
             return error_p_norm
 
         def order_max(if_state2):
-            error_p_norm = np.inf
+            error_p_norm = jnp.inf
             return error_p_norm
 
         error_p_norm = jax.lax.cond(order < MAX_ORDER,
                                     if_state2, order_less_max,
                                     if_state2, order_max)
 
-        error_norms = np.array([error_m_norm, error_norm, error_p_norm])
-        factors = error_norms ** (-1 / (np.arange(3) + order))
+        error_norms = jnp.array([error_m_norm, error_norm, error_p_norm])
+        factors = error_norms ** (-1 / (jnp.arange(3) + order))
 
         # now we have the three factors for orders k-1, k and k+1, pick the maximum in
         # order to maximise the resultant step size
-        max_index = np.argmax(factors)
+        max_index = jnp.argmax(factors)
         order += max_index - 1
         state['order'] = order
 
-        factor = np.min((MAX_FACTOR, safety * factors[max_index]))
+        factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
         state = _update_step_size(state, factor, False)
 
         return state
@@ -655,28 +675,24 @@ def while_body(while_state):
     return order_summation
 
 
-@jax.partial(jax.jit, static_argnums=(0, 1, 2, 3))
-def _bdf_odeint(fun, jac, rtol, atol, y0, t_eval, inputs):
+@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
+def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     """
     main solver loop - creates a stepper object and steps through time, interpolating to
     the time points in t_eval
     """
 
     def fun_bind_inputs(t, y):
-        return fun(t, y, inputs)
+        return fun(y, t, *args)
 
-    if jac is None:
-        jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=1)
-    else:
-        def jac_bind_inputs(t, y):
-            jac(t, y, inputs)
+    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=1)
 
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
 
     stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, t0, y0, h0, rtol, atol)
     i = 0
-    y_out = np.empty((len(y0), len(t_eval)), dtype=y0.dtype)
+    y_out = jnp.empty((len(y0), len(t_eval)), dtype=y0.dtype)
 
     init_state = [stepper, t_eval, i, y_out, 0]
 
@@ -687,7 +703,7 @@ def cond_fun(state):
     def body_fun(state):
         stepper, t_eval, i, y_out, n_steps = state
         stepper = _bdf_step(stepper, fun_bind_inputs, jac_bind_inputs)
-        index = np.searchsorted(t_eval, stepper['t'])
+        index = jnp.searchsorted(t_eval, stepper['t'])
 
         def for_body(j, y_out):
             t = t_eval[j]
@@ -703,4 +719,93 @@ def for_body(j, y_out):
 
     stepper['n_steps'] = n_steps
 
-    return y_out, stepper
+    return y_out
+
+
+@partial(jax.jit, static_argnums=(0, 1, 2))
+def _bdf_odeint_wrapper(func, rtol, atol, y0, ts, *args):
+    y0, unravel = ravel_pytree(y0)
+    func = ravel_first_arg(func, unravel)
+    out = _bdf_odeint(func, rtol, atol, y0, ts, *args)
+    return jax.vmap(unravel)(out)
+
+
+def _bdf_odeint_fwd(func, rtol, atol, y0, ts, *args):
+    ys = _bdf_odeint(func, rtol, atol, y0, ts, *args)
+    return ys, (ys, ts, args)
+
+
+def _bdf_odeint_rev(func, rtol, atol, res, g):
+    ys, ts, args = res
+
+    def aug_dynamics(augmented_state, t, *args):
+        """Original system augmented with vjp_y, vjp_t and vjp_args."""
+        y, y_bar, *_ = augmented_state
+        # `t` here is negatice time, so we need to negate again to get back to
+        # normal time. See the `odeint` invocation in `scan_fun` below.
+        y_dot, vjpfun = jax.vjp(func, y, -t, *args)
+        return (-y_dot, *vjpfun(y_bar))
+
+    y_bar = g[-1]
+    ts_bar = []
+    t0_bar = 0.
+
+    def scan_fun(carry, i):
+        y_bar, t0_bar, args_bar = carry
+        # Compute effect of moving measurement time
+        t_bar = jnp.dot(func(ys[i], ts[i], *args), g[i])
+        t0_bar = t0_bar - t_bar
+        # Run augmented system backwards to previous observation
+        _, y_bar, t0_bar, args_bar = jax_bdf_integrate(
+            aug_dynamics, (ys[i], y_bar, t0_bar, args_bar),
+            jnp.array([-ts[i], -ts[i - 1]]),
+            *args, rtol=rtol, atol=atol)
+        y_bar, t0_bar, args_bar = tree_map(op.itemgetter(1), (y_bar, t0_bar, args_bar))
+        # Add gradient from current output
+        y_bar = y_bar + g[i - 1]
+        return (y_bar, t0_bar, args_bar), t_bar
+
+    init_carry = (g[-1], 0., tree_map(jnp.zeros_like, args))
+    (y_bar, t0_bar, args_bar), rev_ts_bar = lax.scan(
+        scan_fun, init_carry, jnp.arange(len(ts) - 1, 0, -1))
+    ts_bar = jnp.concatenate([jnp.array([t0_bar]), rev_ts_bar[::-1]])
+    return (y_bar, ts_bar, *args_bar)
+
+
+_bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
+
+
+@cache()
+def closure_convert(fun, in_tree, in_avals):
+    in_pvals = [pe.PartialVal.unknown(aval) for aval in in_avals]
+    wrapped_fun, out_tree = flatten_fun_nokwargs(lu.wrap_init(fun), in_tree)
+    with core.initial_style_staging():
+        jaxpr, out_pvals, consts = pe.trace_to_jaxpr(
+            wrapped_fun, in_pvals, instantiate=True, stage_out=False)
+    out_tree = out_tree()
+    num_consts = len(consts)
+
+    def converted_fun(y, t, *consts_args):
+        consts, args = split_list(consts_args, [num_consts])
+        all_args, in_tree2 = tree_flatten((y, t, *args))
+        assert in_tree == in_tree2
+        out_flat = core.eval_jaxpr(jaxpr, consts, *all_args)
+        return tree_unflatten(out_tree, out_flat)
+
+    return converted_fun, consts
+
+
+def abstractify(x):
+    return core.raise_to_shaped(core.get_aval(x))
+
+
+def ravel_first_arg(f, unravel):
+    return ravel_first_arg_(lu.wrap_init(f), unravel).call_wrapped
+
+
+@lu.transformation
+def ravel_first_arg_(unravel, y_flat, *args):
+    y = unravel(y_flat)
+    ans = yield (y,) + args, {}
+    ans_flat, _ = ravel_pytree(ans)
+    yield ans_flat
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 5b9fb6bc0e..28e5d52c6c 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -128,16 +128,16 @@ def solve_model_rk45(inputs):
             return np.transpose(y), None
 
         def solve_model_bdf(inputs):
-            y, stepper = pybamm.jax_bdf_integrate(
-                model.rhs_eval,
+            y = pybamm.jax_bdf_integrate(
+                rhs_odeint,
                 y0,
                 t_eval,
-                inputs=inputs,
+                inputs,
                 rtol=self.rtol,
                 atol=self.atol,
                 **self.extra_options
             )
-            return y, stepper
+            return y, None
 
         if self.method == 'RK45':
             return jax.jit(solve_model_rk45)

From dbea36bdd20b43851bebd40aff414c3631122758 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Thu, 2 Jul 2020 16:29:13 +0100
Subject: [PATCH 02/39] #1031 add a sensitivities test

---
 pybamm/solvers/jax_bdf_solver.py              |  4 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  | 52 +++++++++++++++----
 tests/unit/test_solvers/test_jax_solver.py    |  4 +-
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 7dc49666aa..3812cd17c7 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -675,7 +675,7 @@ def while_body(while_state):
     return order_summation
 
 
-@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
+#@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
 def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     """
     main solver loop - creates a stepper object and steps through time, interpolating to
@@ -772,7 +772,7 @@ def scan_fun(carry, i):
     return (y_bar, ts_bar, *args_bar)
 
 
-_bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
+#_bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
 
 
 @cache()
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index a91a722c13..b919e2a1d9 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -5,6 +5,7 @@
 import time
 import numpy as np
 from platform import system
+import jax
 
 
 @unittest.skipIf(system() == "Windows", "JAX not supported on windows")
@@ -27,15 +28,14 @@ def test_solver(self):
 
         # Solve
         t_eval = np.linspace(0, 1, 80)
-        y0 = model.concatenated_initial_conditions.evaluate()
+        y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
         rhs = pybamm.EvaluatorJax(model.concatenated_rhs)
 
-        def fun(t, y, inputs):
-            return rhs.evaluate(t=t, y=y, inputs=inputs).reshape(-1)
+        def fun(y, t):
+            return rhs.evaluate(t=t, y=y).reshape(-1)
 
         t0 = time.perf_counter()
-        y, _ = pybamm.jax_bdf_integrate(
-            fun, y0, t_eval, inputs=None, rtol=1e-8, atol=1e-8)
+        y = pybamm.jax_bdf_integrate(fun, y0, t_eval, rtol=1e-8, atol=1e-8)
         t1 = time.perf_counter() - t0
 
         # test accuracy
@@ -43,7 +43,7 @@ def fun(t, y, inputs):
                                    rtol=1e-7, atol=1e-7)
 
         t0 = time.perf_counter()
-        y, _ = pybamm.jax_bdf_integrate(fun, y0, t_eval, rtol=1e-8, atol=1e-8)
+        y = pybamm.jax_bdf_integrate(fun, y0, t_eval, rtol=1e-8, atol=1e-8)
         t2 = time.perf_counter() - t0
 
         # second run should be much quicker
@@ -53,6 +53,36 @@ def fun(t, y, inputs):
         np.testing.assert_allclose(y[0, :], np.exp(0.1 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
+    def test_solver_sensitivities(self):
+        # Create model
+        model = pybamm.BaseModel()
+        model.convert_to_format = "jax"
+        domain = ["negative electrode", "separator", "positive electrode"]
+        var = pybamm.Variable("var", domain=domain)
+        model.rhs = {var: -pybamm.InputParameter("rate") * var}
+        model.initial_conditions = {var: 1}
+
+        # create discretisation
+        mesh = get_mesh_for_testing()
+        spatial_methods = {"macroscale": pybamm.FiniteVolume()}
+        disc = pybamm.Discretisation(mesh, spatial_methods)
+        disc.process_model(model)
+
+        # Solve
+        t_eval = np.linspace(0, 10, 80)
+        y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
+        rhs = pybamm.EvaluatorJax(model.concatenated_rhs)
+
+        def fun(y, t, inputs):
+            return rhs.evaluate(t=t, y=y, inputs=inputs).reshape(-1)
+
+        grad_integrate = jax.jacfwd(pybamm.jax_bdf_integrate, argnums=3)
+
+        grad = grad_integrate(fun, y0, t_eval, {"rate": 0.1}, rtol=1e-9, atol=1e-9)
+        print(grad)
+
+        np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
+
     def test_solver_with_inputs(self):
         # Create model
         model = pybamm.BaseModel()
@@ -69,17 +99,17 @@ def test_solver_with_inputs(self):
         disc.process_model(model)
 
         # Solve
-        t_eval = np.linspace(0, 10, 100)
-        y0 = model.concatenated_initial_conditions.evaluate()
+        t_eval = np.linspace(0, 10, 80)
+        y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
         rhs = pybamm.EvaluatorJax(model.concatenated_rhs)
 
-        def fun(t, y, inputs):
+        def fun(y, t, inputs):
             return rhs.evaluate(t=t, y=y, inputs=inputs).reshape(-1)
 
-        y, _ = pybamm.jax_bdf_integrate(fun, y0, t_eval, inputs={
+        y = pybamm.jax_bdf_integrate(fun, y0, t_eval, {
             "rate": 0.1}, rtol=1e-9, atol=1e-9)
 
-        np.testing.assert_allclose(y[0, :], np.exp(-0.1 * t_eval))
+        np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
 
 
 if __name__ == "__main__":
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 47ad041ea8..8625d41035 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -117,7 +117,7 @@ def test_model_solver_with_inputs(self):
         disc.process_model(model)
         # Solve
         solver = pybamm.JaxSolver(rtol=1e-8, atol=1e-8)
-        t_eval = np.linspace(0, 5, 100)
+        t_eval = np.linspace(0, 5, 80)
 
         t0 = time.perf_counter()
         solution = solver.solve(model, t_eval, inputs={"rate": 0.1})
@@ -153,7 +153,7 @@ def test_get_solve(self):
         disc.process_model(model)
         # Solve
         solver = pybamm.JaxSolver(rtol=1e-8, atol=1e-8)
-        t_eval = np.linspace(0, 5, 100)
+        t_eval = np.linspace(0, 5, 80)
 
         with self.assertRaisesRegex(RuntimeError, "Model is not set up for solving"):
             solver.get_solve(model, t_eval)

From 3cd80d39e47ddc378994819916200678ce8bcf2a Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sat, 4 Jul 2020 09:35:34 +0100
Subject: [PATCH 03/39] #1031 add fix for int captured args to fun

---
 pybamm/solvers/jax_bdf_solver.py              | 55 ++++++++++++++-----
 .../unit/test_solvers/test_jax_bdf_solver.py  | 19 ++++++-
 2 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 3812cd17c7..02d21997f2 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -1,11 +1,11 @@
 from functools import partial
 import operator as op
+import numpy as onp
 
 import jax
 import jax.numpy as jnp
 from jax import core
-from jax import lax
-from jax import ops
+from jax import dtypes
 from jax.util import safe_map, safe_zip, cache, split_list
 from jax.api_util import flatten_fun_nokwargs
 from jax.flatten_util import ravel_pytree
@@ -119,6 +119,19 @@ def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
         val = body_fun(i, val)
     return val
 
+def flax_scan(f, init, xs, length=None):  # pragma: no cover
+    """
+    for debugging purposes, use this instead of jax.lax.scan
+    """
+    if xs is None:
+        xs = [None] * length
+    carry = init
+    ys = []
+    for x in xs:
+        carry, y = f(carry, x)
+        ys.append(y)
+    return carry, onp.stack(ys)
+
 
 def _compute_R(order, factor):
     """
@@ -675,7 +688,7 @@ def while_body(while_state):
     return order_summation
 
 
-#@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
+@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
 def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     """
     main solver loop - creates a stepper object and steps through time, interpolating to
@@ -692,7 +705,7 @@ def fun_bind_inputs(t, y):
 
     stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, t0, y0, h0, rtol, atol)
     i = 0
-    y_out = jnp.empty((len(y0), len(t_eval)), dtype=y0.dtype)
+    y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
     init_state = [stepper, t_eval, i, y_out, 0]
 
@@ -707,7 +720,7 @@ def body_fun(state):
 
         def for_body(j, y_out):
             t = t_eval[j]
-            y_out = jax.ops.index_update(y_out, jax.ops.index[:, j],
+            y_out = jax.ops.index_update(y_out, jax.ops.index[j, :],
                                          _bdf_interpolate(stepper, t))
             return y_out
 
@@ -741,7 +754,7 @@ def _bdf_odeint_rev(func, rtol, atol, res, g):
     def aug_dynamics(augmented_state, t, *args):
         """Original system augmented with vjp_y, vjp_t and vjp_args."""
         y, y_bar, *_ = augmented_state
-        # `t` here is negatice time, so we need to negate again to get back to
+        # `t` here is negative time, so we need to negate again to get back to
         # normal time. See the `odeint` invocation in `scan_fun` below.
         y_dot, vjpfun = jax.vjp(func, y, -t, *args)
         return (-y_dot, *vjpfun(y_bar))
@@ -766,14 +779,13 @@ def scan_fun(carry, i):
         return (y_bar, t0_bar, args_bar), t_bar
 
     init_carry = (g[-1], 0., tree_map(jnp.zeros_like, args))
-    (y_bar, t0_bar, args_bar), rev_ts_bar = lax.scan(
+    (y_bar, t0_bar, args_bar), rev_ts_bar = jax.lax.scan(
         scan_fun, init_carry, jnp.arange(len(ts) - 1, 0, -1))
     ts_bar = jnp.concatenate([jnp.array([t0_bar]), rev_ts_bar[::-1]])
     return (y_bar, ts_bar, *args_bar)
 
 
-#_bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
-
+_bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
 
 @cache()
 def closure_convert(fun, in_tree, in_avals):
@@ -781,19 +793,33 @@ def closure_convert(fun, in_tree, in_avals):
     wrapped_fun, out_tree = flatten_fun_nokwargs(lu.wrap_init(fun), in_tree)
     with core.initial_style_staging():
         jaxpr, out_pvals, consts = pe.trace_to_jaxpr(
-            wrapped_fun, in_pvals, instantiate=True, stage_out=False)
+        wrapped_fun, in_pvals, instantiate=True, stage_out=False)
     out_tree = out_tree()
-    num_consts = len(consts)
 
-    def converted_fun(y, t, *consts_args):
-        consts, args = split_list(consts_args, [num_consts])
+    # We only want to closure convert for constants with respect to which we're
+    # differentiating. As a proxy for that, we hoist consts with float dtype.
+    # TODO(mattjj): revise this approach
+    is_float = lambda c: dtypes.issubdtype(dtypes.dtype(c), jnp.inexact)
+    (closure_consts, hoisted_consts), merge = partition_list(is_float, consts)
+    num_consts = len(hoisted_consts)
+
+    def converted_fun(y, t, *hconsts_args):
+        hoisted_consts, args = split_list(hconsts_args, [num_consts])
+        consts = merge(closure_consts, hoisted_consts)
         all_args, in_tree2 = tree_flatten((y, t, *args))
         assert in_tree == in_tree2
         out_flat = core.eval_jaxpr(jaxpr, consts, *all_args)
         return tree_unflatten(out_tree, out_flat)
 
-    return converted_fun, consts
+    return converted_fun, hoisted_consts
 
+def partition_list(choice, lst):
+    out = [], []
+    which = [out[choice(elt)].append(elt) or choice(elt) for elt in lst]
+    def merge(l1, l2):
+        i1, i2 = iter(l1), iter(l2)
+        return [next(i2 if snd else i1) for snd in which]
+    return out, merge
 
 def abstractify(x):
     return core.raise_to_shaped(core.get_aval(x))
@@ -802,7 +828,6 @@ def abstractify(x):
 def ravel_first_arg(f, unravel):
     return ravel_first_arg_(lu.wrap_init(f), unravel).call_wrapped
 
-
 @lu.transformation
 def ravel_first_arg_(unravel, y_flat, *args):
     y = unravel(y_flat)
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index b919e2a1d9..f81220e891 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -27,7 +27,7 @@ def test_solver(self):
         disc.process_model(model)
 
         # Solve
-        t_eval = np.linspace(0, 1, 80)
+        t_eval = np.linspace(0.0, 1.0, 80)
         y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
         rhs = pybamm.EvaluatorJax(model.concatenated_rhs)
 
@@ -76,11 +76,24 @@ def test_solver_sensitivities(self):
         def fun(y, t, inputs):
             return rhs.evaluate(t=t, y=y, inputs=inputs).reshape(-1)
 
-        grad_integrate = jax.jacfwd(pybamm.jax_bdf_integrate, argnums=3)
+        h = 0.0001
+        rate = 0.1
 
-        grad = grad_integrate(fun, y0, t_eval, {"rate": 0.1}, rtol=1e-9, atol=1e-9)
+
+        grad_integrate = jax.jacrev(pybamm.jax_bdf_integrate, argnums=3)
+
+        grad = grad_integrate(fun, y0, t_eval, {"rate": rate}, rtol=1e-9, atol=1e-9)
         print(grad)
 
+        eval_plus = pybamm.jax_bdf_integrate(fun, y0, t_eval, {"rate": rate + h},
+                                             rtol=1e-9, atol=1e-9)
+        eval_neg = pybamm.jax_bdf_integrate(fun, y0, t_eval, {"rate": rate - h},
+                                             rtol=1e-9, atol=1e-9)
+        grad_num = (eval_plus - eval_neg) / (2 * h)
+        print(grad_num)
+
+
+
         np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
 
     def test_solver_with_inputs(self):

From 003bbc48ceb07a1bc10baf6286f9ed86a1fdd837 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sat, 4 Jul 2020 09:41:06 +0100
Subject: [PATCH 04/39] #1031 swap to using fun(y, t)

---
 pybamm/solvers/jax_bdf_solver.py               | 18 +++++++++---------
 tests/unit/test_solvers/test_jax_bdf_solver.py |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 02d21997f2..1933f8bb1d 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -167,11 +167,11 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     ----------
 
     fun: callable
-        function with signature (t, y), where t is a scalar time and y is a ndarray with
+        function with signature (y, t), where t is a scalar time and y is a ndarray with
         shape (n,), returns the rhs of the system of ODE equations as an nd array with
         shape (n,)
     jac: callable
-        function with signature (t, y), where t is a scalar time and y is a ndarray with
+        function with signature (y, t), where t is a scalar time and y is a ndarray with
         shape (n,), returns the jacobian matrix of fun as an ndarray with shape (n,n)
     t0: float
         initial time
@@ -187,7 +187,7 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     state = {}
     state['t'] = t0
     state['y'] = y0
-    f0 = fun(t0, y0)
+    f0 = fun(y0, t0)
     state['atol'] = atol
     state['rtol'] = rtol
     order = 1
@@ -219,7 +219,7 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     state['c'] = c
     state['error_const'] = error_const
 
-    J = jac(t0, y0)
+    J = jac(y0, t0)
     state['J'] = J
     state['LU'] = jax.scipy.linalg.lu_factor(I - c * J)
 
@@ -249,7 +249,7 @@ def _select_initial_step(state, fun, t0, y0, f0, h0):
     """
     scale = state['atol'] + jnp.abs(y0) * state['rtol']
     y1 = y0 + h0 * f0
-    f1 = fun(t0 + h0, y1)
+    f1 = fun(y1, t0 + h0)
     d2 = jnp.sqrt(jnp.mean(((f1 - f0) / scale)**2))
     order = 1
     h1 = h0 * d2 ** (-1 / (order + 1))
@@ -396,7 +396,7 @@ def _update_jacobian(state, jac):
     we update the jacobian using J(t_{n+1}, y^0_{n+1})
     following the scipy bdf implementation rather than J(t_n, y_n) as per [1]
     """
-    J = jac(state['t'] + state['h'], state['y0'])
+    J = jac(state['y0'], state['t'] + state['h'])
     state['n_jacobian_evals'] += 1
     state['LU'] = jax.scipy.linalg.lu_factor(state['I'] - state['c'] * J)
     state['n_lu_decompositions'] += 1
@@ -426,7 +426,7 @@ def while_cond(while_state):
 
     def while_body(while_state):
         k, not_converged, dy_norm_old, d, y, state = while_state
-        f_eval = fun(t, y)
+        f_eval = fun(y, t)
         state['n_function_evals'] += 1
         b = c * f_eval - psi - d
         dy = jax.scipy.linalg.lu_solve(LU, b)
@@ -695,10 +695,10 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     the time points in t_eval
     """
 
-    def fun_bind_inputs(t, y):
+    def fun_bind_inputs(y, t):
         return fun(y, t, *args)
 
-    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=1)
+    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
 
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index f81220e891..0f3eb8bf3e 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -39,7 +39,7 @@ def fun(y, t):
         t1 = time.perf_counter() - t0
 
         # test accuracy
-        np.testing.assert_allclose(y[0, :], np.exp(0.1 * t_eval),
+        np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
         t0 = time.perf_counter()
@@ -50,7 +50,7 @@ def fun(y, t):
         self.assertLess(t2, t1)
 
         # test second run is accurate
-        np.testing.assert_allclose(y[0, :], np.exp(0.1 * t_eval),
+        np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
     def test_solver_sensitivities(self):

From 0330cdcd16c5d27900844d260e0ec695137560e4 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 6 Jul 2020 11:34:25 +0100
Subject: [PATCH 05/39] #1031 try to improve performance on autodiff of solver

---
 pybamm/solvers/jax_bdf_solver.py              |  6 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  | 75 +++++++++++++++++--
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 1933f8bb1d..6f69ec2741 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -694,11 +694,9 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     main solver loop - creates a stepper object and steps through time, interpolating to
     the time points in t_eval
     """
+    fun_bind_inputs = lambda y, t: fun(y, t, *args)
 
-    def fun_bind_inputs(y, t):
-        return fun(y, t, *args)
-
-    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
+    jac_bind_inputs = jax.jacrev(fun_bind_inputs, argnums=0)
 
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 0f3eb8bf3e..b14aa5b941 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -68,8 +68,21 @@ def test_solver_sensitivities(self):
         disc = pybamm.Discretisation(mesh, spatial_methods)
         disc.process_model(model)
 
+        #model = pybamm.BaseModel()
+        #var = pybamm.Variable("var")
+        #model.rhs = {var: -pybamm.InputParameter("rate") * var}
+        #model.initial_conditions = {var: 1}
+        ## No need to set parameters; can use base discretisation (no spatial operators)
+
+        ## create discretisation
+        #disc = pybamm.Discretisation()
+        #disc.process_model(model)
+
+        #t_eval = np.linspace(0, 10, 4)
+
+
         # Solve
-        t_eval = np.linspace(0, 10, 80)
+        t_eval = np.linspace(0, 10, 4)
         y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
         rhs = pybamm.EvaluatorJax(model.concatenated_rhs)
 
@@ -79,22 +92,68 @@ def fun(y, t, inputs):
         h = 0.0001
         rate = 0.1
 
+        @jax.jit
+        def solve(rate):
+            return pybamm.jax_bdf_integrate(fun, y0, t_eval,
+                                            {'rate': rate},
+                                            rtol=1e-9, atol=1e-9)
+
+        @jax.jit
+        def solve_odeint(rate):
+            return jax.experimental.ode.odeint(fun, y0, t_eval,
+                                            {'rate': rate},
+                                            rtol=1e-9, atol=1e-9)
 
-        grad_integrate = jax.jacrev(pybamm.jax_bdf_integrate, argnums=3)
 
-        grad = grad_integrate(fun, y0, t_eval, {"rate": rate}, rtol=1e-9, atol=1e-9)
+        grad_solve = jax.jit(jax.jacrev(solve))
+        grad = grad_solve(rate)
         print(grad)
 
-        eval_plus = pybamm.jax_bdf_integrate(fun, y0, t_eval, {"rate": rate + h},
-                                             rtol=1e-9, atol=1e-9)
-        eval_neg = pybamm.jax_bdf_integrate(fun, y0, t_eval, {"rate": rate - h},
-                                             rtol=1e-9, atol=1e-9)
+        eval_plus = solve(rate + h)
+        eval_plus2 = solve_odeint(rate + h)
+        print(eval_plus.shape)
+        print(eval_plus2.shape)
+        eval_neg = solve(rate - h)
         grad_num = (eval_plus - eval_neg) / (2 * h)
         print(grad_num)
 
+        grad_solve = jax.jit(jax.jacrev(solve))
+        print('finished calculating jacobian',grad_solve)
+        print('5')
+        time.sleep(1)
+        print('4')
+        time.sleep(1)
+        print('3')
+        time.sleep(1)
+        print('2')
+        time.sleep(1)
+        print('1')
+        time.sleep(1)
+        print('go')
+
+        grad = grad_solve(rate)
+        print('finished executing jacobian')
+        print(grad)
 
+        print('5')
+        time.sleep(1)
+        print('4')
+        time.sleep(1)
+        print('3')
+        time.sleep(1)
+        print('2')
+        time.sleep(1)
+        print('1')
+        time.sleep(1)
+        print('go')
+
+        grad = grad_solve(rate)
+        print(grad)
+        print('finished executing jacobian')
 
-        np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
+
+
+        #np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
 
     def test_solver_with_inputs(self):
         # Create model

From c74c35e44a08a7832a28724deffbe9d62f85845a Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 10 Jul 2020 16:44:54 +0100
Subject: [PATCH 06/39] #1031 finalise sensitivity test for jax_bdf_solver

---
 pybamm/solvers/jax_bdf_solver.py              |  2 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  | 84 +++----------------
 2 files changed, 13 insertions(+), 73 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 6f69ec2741..15d1260fbe 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -696,7 +696,7 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     """
     fun_bind_inputs = lambda y, t: fun(y, t, *args)
 
-    jac_bind_inputs = jax.jacrev(fun_bind_inputs, argnums=0)
+    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
 
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index b14aa5b941..8498b51765 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -7,7 +7,6 @@
 from platform import system
 import jax
 
-
 @unittest.skipIf(system() == "Windows", "JAX not supported on windows")
 class TestJaxBDFSolver(unittest.TestCase):
     def test_solver(self):
@@ -63,24 +62,11 @@ def test_solver_sensitivities(self):
         model.initial_conditions = {var: 1}
 
         # create discretisation
-        mesh = get_mesh_for_testing()
+        mesh = get_mesh_for_testing(xpts=10)
         spatial_methods = {"macroscale": pybamm.FiniteVolume()}
         disc = pybamm.Discretisation(mesh, spatial_methods)
         disc.process_model(model)
 
-        #model = pybamm.BaseModel()
-        #var = pybamm.Variable("var")
-        #model.rhs = {var: -pybamm.InputParameter("rate") * var}
-        #model.initial_conditions = {var: 1}
-        ## No need to set parameters; can use base discretisation (no spatial operators)
-
-        ## create discretisation
-        #disc = pybamm.Discretisation()
-        #disc.process_model(model)
-
-        #t_eval = np.linspace(0, 10, 4)
-
-
         # Solve
         t_eval = np.linspace(0, 10, 4)
         y0 = model.concatenated_initial_conditions.evaluate().reshape(-1)
@@ -92,68 +78,22 @@ def fun(y, t, inputs):
         h = 0.0001
         rate = 0.1
 
+        # create a couple of dummy "models" were we calculate the sum of the time series
         @jax.jit
-        def solve(rate):
-            return pybamm.jax_bdf_integrate(fun, y0, t_eval,
+        def solve_bdf(rate):
+            return jax.numpy.sum(pybamm.jax_bdf_integrate(fun, y0, t_eval,
                                             {'rate': rate},
-                                            rtol=1e-9, atol=1e-9)
-
-        @jax.jit
-        def solve_odeint(rate):
-            return jax.experimental.ode.odeint(fun, y0, t_eval,
-                                            {'rate': rate},
-                                            rtol=1e-9, atol=1e-9)
+                                            rtol=1e-9, atol=1e-9))
 
+        # check answers with finite difference
+        eval_plus = solve_bdf(rate + h)
+        eval_neg = solve_bdf(rate - h)
+        grad_num = (eval_plus - eval_neg) / (2 * h)
 
-        grad_solve = jax.jit(jax.jacrev(solve))
-        grad = grad_solve(rate)
-        print(grad)
+        grad_solve_bdf = jax.jit(jax.grad(solve_bdf))
+        grad_bdf = grad_solve_bdf(rate)
 
-        eval_plus = solve(rate + h)
-        eval_plus2 = solve_odeint(rate + h)
-        print(eval_plus.shape)
-        print(eval_plus2.shape)
-        eval_neg = solve(rate - h)
-        grad_num = (eval_plus - eval_neg) / (2 * h)
-        print(grad_num)
-
-        grad_solve = jax.jit(jax.jacrev(solve))
-        print('finished calculating jacobian',grad_solve)
-        print('5')
-        time.sleep(1)
-        print('4')
-        time.sleep(1)
-        print('3')
-        time.sleep(1)
-        print('2')
-        time.sleep(1)
-        print('1')
-        time.sleep(1)
-        print('go')
-
-        grad = grad_solve(rate)
-        print('finished executing jacobian')
-        print(grad)
-
-        print('5')
-        time.sleep(1)
-        print('4')
-        time.sleep(1)
-        print('3')
-        time.sleep(1)
-        print('2')
-        time.sleep(1)
-        print('1')
-        time.sleep(1)
-        print('go')
-
-        grad = grad_solve(rate)
-        print(grad)
-        print('finished executing jacobian')
-
-
-
-        #np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
+        self.assertAlmostEqual(grad_bdf, grad_num, places=3)
 
     def test_solver_with_inputs(self):
         # Create model

From 610d215c655a9e00b854341dbc1e936313821140 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 10 Jul 2020 17:11:54 +0100
Subject: [PATCH 07/39] #1031 add test for sensitivities with jax solver

---
 pybamm/solvers/jax_solver.py                  | 13 ++--
 .../unit/test_solvers/test_jax_bdf_solver.py  |  2 +-
 tests/unit/test_solvers/test_jax_solver.py    | 68 +++++++++++++++++++
 3 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 28e5d52c6c..550317b86e 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -74,11 +74,11 @@ def get_solve(self, model, t_eval):
                 raise RuntimeError("Model is not set up for solving, run"
                                    "`solver.solve(model)` first")
 
-            self._cached_solves[model] = self._create_solve(model, t_eval)
+            self._cached_solves[model] = self.create_solve(model, t_eval)
 
         return self._cached_solves[model]
 
-    def _create_solve(self, model, t_eval):
+    def create_solve(self, model, t_eval):
         """
         Return a compiled JAX function that solves an ode model with input arguments.
 
@@ -125,7 +125,7 @@ def solve_model_rk45(inputs):
                 atol=self.atol,
                 **self.extra_options
             )
-            return np.transpose(y), None
+            return np.transpose(y)
 
         def solve_model_bdf(inputs):
             y = pybamm.jax_bdf_integrate(
@@ -137,7 +137,7 @@ def solve_model_bdf(inputs):
                 atol=self.atol,
                 **self.extra_options
             )
-            return y, None
+            return y
 
         if self.method == 'RK45':
             return jax.jit(solve_model_rk45)
@@ -165,13 +165,14 @@ def _integrate(self, model, t_eval, inputs=None):
 
         """
         if model not in self._cached_solves:
-            self._cached_solves[model] = self._create_solve(model, t_eval)
+            self._cached_solves[model] = self.create_solve(model, t_eval)
 
-        y, stepper = self._cached_solves[model](inputs)
+        y = self._cached_solves[model](inputs)
 
         # note - the actual solve is not done until this line!
         y = onp.array(y)
 
+        stepper = None
         if stepper is not None:
             sstring = ''
             sstring += 'JAX {} solver - stats\n'.format(self.method)
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 8498b51765..c6c3add533 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -78,7 +78,7 @@ def fun(y, t, inputs):
         h = 0.0001
         rate = 0.1
 
-        # create a couple of dummy "models" were we calculate the sum of the time series
+        # create a dummy "model" where we calculate the sum of the time series
         @jax.jit
         def solve_bdf(rate):
             return jax.numpy.sum(pybamm.jax_bdf_integrate(fun, y0, t_eval,
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 8625d41035..bafe90e358 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -5,6 +5,9 @@
 import time
 import numpy as np
 from platform import system
+from platform import system
+if system() != "Windows":
+    import jax
 
 
 @unittest.skipIf(system() == "Windows", "JAX not supported on windows")
@@ -51,6 +54,71 @@ def test_model_solver(self):
             self.assertLess(t_second_solve, t_first_solve)
             np.testing.assert_array_equal(second_solution.y, solution.y)
 
+    def test_solver_sensitivities(self):
+        # Create model
+        model = pybamm.BaseModel()
+        model.convert_to_format = "jax"
+        domain = ["negative electrode", "separator", "positive electrode"]
+        var = pybamm.Variable("var", domain=domain)
+        model.rhs = {var: 0.1 * var}
+        model.initial_conditions = {var: 1.0}
+        # No need to set parameters; can use base discretisation (no spatial operators)
+
+        # create discretisation
+        mesh = get_mesh_for_testing()
+        spatial_methods = {"macroscale": pybamm.FiniteVolume()}
+        disc = pybamm.Discretisation(mesh, spatial_methods)
+        disc.process_model(model)
+
+        for method in ['RK45', 'BDF']:
+            # Solve
+            solver = pybamm.JaxSolver(
+                method=method, rtol=1e-8, atol=1e-8
+            )
+            t_eval = np.linspace(0, 1, 80)
+
+            # need to solve the model once to get it set up by the base solver
+            solver.solve(model, t_eval)
+            solve = solver.get_solve(model, t_eval)
+
+            h = 0.0001
+            rate = 0.1
+
+            # create a dummy "model" where we calculate the sum of the time series
+            def solve_model(rate):
+                return jax.numpy.sum(solve({'rate': rate}))
+
+            # check answers with finite difference
+            eval_plus = solve_model(rate + h)
+            eval_neg = solve_model(rate - h)
+            grad_num = (eval_plus - eval_neg) / (2 * h)
+
+            grad_solve = jax.jit(jax.grad(solve_model))
+            grad = grad_solve(rate)
+
+            self.assertAlmostEqual(grad, grad_num, places=3)
+
+    def test_solver_only_works_with_jax(self):
+        model = pybamm.BaseModel()
+        var = pybamm.Variable("var")
+        model.rhs = {var: -pybamm.sqrt(var)}
+        model.initial_conditions = {var: 1}
+        # No need to set parameters; can use base discretisation (no spatial operators)
+
+        # create discretisation
+        disc = pybamm.Discretisation()
+        disc.process_model(model)
+
+        t_eval = np.linspace(0, 3, 100)
+
+        # solver needs a model converted to jax
+        for convert_to_format in ["casadi", "python", "something_else"]:
+            model.convert_to_format = convert_to_format
+
+            solver = pybamm.JaxSolver()
+            with self.assertRaisesRegex(RuntimeError, "must be converted to JAX"):
+                solver.solve(model, t_eval)
+
     def test_solver_only_works_with_jax(self):
         model = pybamm.BaseModel()
         var = pybamm.Variable("var")

From ebe7109da82ed190a8ab3e0a2fc143aad36835af Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 10 Jul 2020 17:55:09 +0100
Subject: [PATCH 08/39] #1031 fix flake8 errors

---
 pybamm/solvers/jax_bdf_solver.py              | 24 ++++++++++++-------
 .../unit/test_solvers/test_jax_bdf_solver.py  |  9 ++++---
 tests/unit/test_solvers/test_jax_solver.py    | 22 -----------------
 3 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 15d1260fbe..c810e58e0b 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -12,15 +12,14 @@
 from jax.tree_util import tree_map, tree_flatten, tree_unflatten
 from jax.interpreters import partial_eval as pe
 from jax import linear_util as lu
+from jax.config import config
+
+config.update("jax_enable_x64", True)
 
 map = safe_map
 zip = safe_zip
 
 
-from jax.config import config
-config.update("jax_enable_x64", True)
-
-
 def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
     """
     Backward Difference formula (BDF) implicit multistep integrator. The basic algorithm
@@ -119,6 +118,7 @@ def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
         val = body_fun(i, val)
     return val
 
+
 def flax_scan(f, init, xs, length=None):  # pragma: no cover
     """
     for debugging purposes, use this instead of jax.lax.scan
@@ -375,7 +375,7 @@ def update_lu(state):
 
     # only update order+1, order+1 entries of D
     RU = jnp.where(jnp.logical_and(I <= order, J <= order),
-                  RU, jnp.identity(MAX_ORDER + 1))
+                   RU, jnp.identity(MAX_ORDER + 1))
     D = state['D']
     D = jnp.dot(RU.T, D)
     # D = jax.ops.index_update(D, jax.ops.index[:order + 1],
@@ -694,7 +694,8 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     main solver loop - creates a stepper object and steps through time, interpolating to
     the time points in t_eval
     """
-    fun_bind_inputs = lambda y, t: fun(y, t, *args)
+    def fun_bind_inputs(y, t):
+        fun(y, t, *args)
 
     jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
 
@@ -785,19 +786,22 @@ def scan_fun(carry, i):
 
 _bdf_odeint.defvjp(_bdf_odeint_fwd, _bdf_odeint_rev)
 
+
 @cache()
 def closure_convert(fun, in_tree, in_avals):
     in_pvals = [pe.PartialVal.unknown(aval) for aval in in_avals]
     wrapped_fun, out_tree = flatten_fun_nokwargs(lu.wrap_init(fun), in_tree)
     with core.initial_style_staging():
         jaxpr, out_pvals, consts = pe.trace_to_jaxpr(
-        wrapped_fun, in_pvals, instantiate=True, stage_out=False)
+            wrapped_fun, in_pvals, instantiate=True, stage_out=False
+        )
     out_tree = out_tree()
 
     # We only want to closure convert for constants with respect to which we're
     # differentiating. As a proxy for that, we hoist consts with float dtype.
     # TODO(mattjj): revise this approach
-    is_float = lambda c: dtypes.issubdtype(dtypes.dtype(c), jnp.inexact)
+    def is_float(c):
+        dtypes.issubdtype(dtypes.dtype(c), jnp.inexact)
     (closure_consts, hoisted_consts), merge = partition_list(is_float, consts)
     num_consts = len(hoisted_consts)
 
@@ -811,14 +815,17 @@ def converted_fun(y, t, *hconsts_args):
 
     return converted_fun, hoisted_consts
 
+
 def partition_list(choice, lst):
     out = [], []
     which = [out[choice(elt)].append(elt) or choice(elt) for elt in lst]
+
     def merge(l1, l2):
         i1, i2 = iter(l1), iter(l2)
         return [next(i2 if snd else i1) for snd in which]
     return out, merge
 
+
 def abstractify(x):
     return core.raise_to_shaped(core.get_aval(x))
 
@@ -826,6 +833,7 @@ def abstractify(x):
 def ravel_first_arg(f, unravel):
     return ravel_first_arg_(lu.wrap_init(f), unravel).call_wrapped
 
+
 @lu.transformation
 def ravel_first_arg_(unravel, y_flat, *args):
     y = unravel(y_flat)
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index c6c3add533..beadc6c097 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -7,6 +7,7 @@
 from platform import system
 import jax
 
+
 @unittest.skipIf(system() == "Windows", "JAX not supported on windows")
 class TestJaxBDFSolver(unittest.TestCase):
     def test_solver(self):
@@ -81,9 +82,11 @@ def fun(y, t, inputs):
         # create a dummy "model" where we calculate the sum of the time series
         @jax.jit
         def solve_bdf(rate):
-            return jax.numpy.sum(pybamm.jax_bdf_integrate(fun, y0, t_eval,
-                                            {'rate': rate},
-                                            rtol=1e-9, atol=1e-9))
+            return jax.numpy.sum(
+                pybamm.jax_bdf_integrate(fun, y0, t_eval,
+                                         {'rate': rate},
+                                         rtol=1e-9, atol=1e-9)
+            )
 
         # check answers with finite difference
         eval_plus = solve_bdf(rate + h)
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index bafe90e358..66aa777f8e 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -5,7 +5,6 @@
 import time
 import numpy as np
 from platform import system
-from platform import system
 if system() != "Windows":
     import jax
 
@@ -119,27 +118,6 @@ def test_solver_only_works_with_jax(self):
             with self.assertRaisesRegex(RuntimeError, "must be converted to JAX"):
                 solver.solve(model, t_eval)
 
-    def test_solver_only_works_with_jax(self):
-        model = pybamm.BaseModel()
-        var = pybamm.Variable("var")
-        model.rhs = {var: -pybamm.sqrt(var)}
-        model.initial_conditions = {var: 1}
-        # No need to set parameters; can use base discretisation (no spatial operators)
-
-        # create discretisation
-        disc = pybamm.Discretisation()
-        disc.process_model(model)
-
-        t_eval = np.linspace(0, 3, 100)
-
-        # solver needs a model converted to jax
-        for convert_to_format in ["casadi", "python", "something_else"]:
-            model.convert_to_format = convert_to_format
-
-            solver = pybamm.JaxSolver()
-            with self.assertRaisesRegex(RuntimeError, "must be converted to JAX"):
-                solver.solve(model, t_eval)
-
     def test_solver_doesnt_support_events(self):
         # Create model
         model = pybamm.BaseModel()

From d8b9a3248cf6ab928163c4910e02d66b73b4c091 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 10 Jul 2020 18:42:46 +0100
Subject: [PATCH 09/39] #1031 fix tests

---
 pybamm/solvers/jax_bdf_solver.py           |  9 +++++----
 pybamm/solvers/jax_solver.py               |  2 +-
 tests/unit/test_solvers/test_jax_solver.py | 16 ++++++++--------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index c810e58e0b..9bca450246 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -695,7 +695,7 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     the time points in t_eval
     """
     def fun_bind_inputs(y, t):
-        fun(y, t, *args)
+        return fun(y, t, *args)
 
     jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
 
@@ -800,9 +800,10 @@ def closure_convert(fun, in_tree, in_avals):
     # We only want to closure convert for constants with respect to which we're
     # differentiating. As a proxy for that, we hoist consts with float dtype.
     # TODO(mattjj): revise this approach
-    def is_float(c):
-        dtypes.issubdtype(dtypes.dtype(c), jnp.inexact)
-    (closure_consts, hoisted_consts), merge = partition_list(is_float, consts)
+    (closure_consts, hoisted_consts), merge = partition_list(
+        lambda c: dtypes.issubdtype(dtypes.dtype(c), jnp.inexact),
+        consts
+    )
     num_consts = len(hoisted_consts)
 
     def converted_fun(y, t, *hconsts_args):
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 550317b86e..faede56f48 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -137,7 +137,7 @@ def solve_model_bdf(inputs):
                 atol=self.atol,
                 **self.extra_options
             )
-            return y
+            return np.transpose(y)
 
         if self.method == 'RK45':
             return jax.jit(solve_model_rk45)
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 66aa777f8e..ebc5722bb9 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -27,7 +27,7 @@ def test_model_solver(self):
         disc = pybamm.Discretisation(mesh, spatial_methods)
         disc.process_model(model)
 
-        for method in ['BDF', 'RK45']:
+        for method in ['RK45', 'BDF']:
             # Solve
             solver = pybamm.JaxSolver(
                 method=method, rtol=1e-8, atol=1e-8
@@ -59,7 +59,7 @@ def test_solver_sensitivities(self):
         model.convert_to_format = "jax"
         domain = ["negative electrode", "separator", "positive electrode"]
         var = pybamm.Variable("var", domain=domain)
-        model.rhs = {var: 0.1 * var}
+        model.rhs = {var: -pybamm.InputParameter("rate") * var}
         model.initial_conditions = {var: 1.0}
         # No need to set parameters; can use base discretisation (no spatial operators)
 
@@ -76,13 +76,13 @@ def test_solver_sensitivities(self):
             )
             t_eval = np.linspace(0, 1, 80)
 
-            # need to solve the model once to get it set up by the base solver
-            solver.solve(model, t_eval)
-            solve = solver.get_solve(model, t_eval)
-
             h = 0.0001
             rate = 0.1
 
+            # need to solve the model once to get it set up by the base solver
+            solver.solve(model, t_eval, {'rate': rate})
+            solve = solver.get_solve(model, t_eval)
+
             # create a dummy "model" where we calculate the sum of the time series
             def solve_model(rate):
                 return jax.numpy.sum(solve({'rate': rate}))
@@ -206,12 +206,12 @@ def test_get_solve(self):
 
         solver.solve(model, t_eval, inputs={"rate": 0.1})
         solver = solver.get_solve(model, t_eval)
-        y, _ = solver({"rate": 0.1})
+        y = solver({"rate": 0.1})
 
         np.testing.assert_allclose(y[0], np.exp(-0.1 * t_eval),
                                    rtol=1e-6, atol=1e-6)
 
-        y, _ = solver({"rate": 0.2})
+        y = solver({"rate": 0.2})
 
         np.testing.assert_allclose(y[0], np.exp(-0.2 * t_eval),
                                    rtol=1e-6, atol=1e-6)

From ce39947bcbbcb4429278f4a65835782720d66ac8 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Fri, 10 Jul 2020 18:49:09 +0100
Subject: [PATCH 10/39] #1031 fix more tests

---
 tests/unit/test_solvers/test_jax_solver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index ebc5722bb9..a84d1b3c33 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -80,7 +80,7 @@ def test_solver_sensitivities(self):
             rate = 0.1
 
             # need to solve the model once to get it set up by the base solver
-            solver.solve(model, t_eval, {'rate': rate})
+            solver.solve(model, t_eval, inputs={'rate': rate})
             solve = solver.get_solve(model, t_eval)
 
             # create a dummy "model" where we calculate the sum of the time series
@@ -95,7 +95,7 @@ def solve_model(rate):
             grad_solve = jax.jit(jax.grad(solve_model))
             grad = grad_solve(rate)
 
-            self.assertAlmostEqual(grad, grad_num, places=3)
+            self.assertAlmostEqual(grad, grad_num, places=1)
 
     def test_solver_only_works_with_jax(self):
         model = pybamm.BaseModel()

From 7b27581a03af3101a5d5ba083eca82d1669dbfb8 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sat, 11 Jul 2020 07:05:02 +0100
Subject: [PATCH 11/39] #1031 fix test

---
 tests/unit/test_solvers/test_jax_bdf_solver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index beadc6c097..10bf3548b1 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -124,7 +124,7 @@ def fun(y, t, inputs):
         y = pybamm.jax_bdf_integrate(fun, y0, t_eval, {
             "rate": 0.1}, rtol=1e-9, atol=1e-9)
 
-        np.testing.assert_allclose(y[0, :].reshape(-1), np.exp(-0.1 * t_eval))
+        np.testing.assert_allclose(y[:, 0].reshape(-1), np.exp(-0.1 * t_eval))
 
 
 if __name__ == "__main__":

From b7d1f1ba9ac2ec5c3acbc7c18babaf572e88acdf Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sat, 11 Jul 2020 08:07:51 +0100
Subject: [PATCH 12/39] #1031 refactor jax_bdf_solver.py to make clear which
 code is taken from Jax library

---
 pybamm/solvers/jax_bdf_solver.py | 293 ++++++++++++++++++-------------
 1 file changed, 175 insertions(+), 118 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 9bca450246..717dd7c6af 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -19,16 +19,23 @@
 map = safe_map
 zip = safe_zip
 
+MAX_ORDER = 5
+NEWTON_MAXITER = 4
+MIN_FACTOR = 0.2
+MAX_FACTOR = 10
 
-def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
+
+@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
+def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     """
-    Backward Difference formula (BDF) implicit multistep integrator. The basic algorithm
-    is derived in [2]_. This particular implementation follows that implemented in the
-    Matlab routine ode15s described in [1]_ and the SciPy implementation [3]_, which
-    features the NDF formulas for improved stability, with associated differences in the
-    error constants, and calculates the jacobian at J(t_{n+1}, y^0_{n+1}).  This
-    implementation was based on that implemented in the scipy library [3]_, which also
-    mainly follows [1]_ but uses the more standard jacobian update.
+    This implements a Backward Difference formula (BDF) implicit multistep integrator.
+    The basic algorithm is derived in [2]_. This particular implementation follows that
+    implemented in the Matlab routine ode15s described in [1]_ and the SciPy
+    implementation [3]_, which features the NDF formulas for improved stability, with
+    associated differences in the error constants, and calculates the jacobian at
+    J(t_{n+1}, y^0_{n+1}).  This implementation was based on that implemented in the
+    scipy library [3]_, which also mainly follows [1]_ but uses the more standard
+    jacobian update.
 
     Parameters
     ----------
@@ -54,9 +61,6 @@ def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
     y: ndarray with shape (n, m)
         calculated state vector at each of the m time points
 
-    stepper: dict
-        internal variables of the stepper object
-
     References
     ----------
     .. [1] L. F. Shampine, M. W. Reichelt, "THE MATLAB ODE SUITE", SIAM J. SCI.
@@ -69,90 +73,45 @@ def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
            fundamental algorithms for scientific computing in Python.
            Nature methods, 17(3), 261-272.
     """
-    def _check_arg(arg):
-        if not isinstance(arg, core.Tracer) and not core.valid_jaxtype(arg):
-            msg = ("The contents of odeint *args must be arrays or scalars, but got "
-                   "\n{}.")
-        raise TypeError(msg.format(arg))
-
-    flat_args, in_tree = tree_flatten((y0, t_eval[0], *args))
-    in_avals = tuple(map(abstractify, flat_args))
-    converted, consts = closure_convert(func, in_tree, in_avals)
-
-    return _bdf_odeint_wrapper(converted, rtol, atol, y0, t_eval, *consts, *args)
-
-
-MAX_ORDER = 5
-NEWTON_MAXITER = 4
-MIN_FACTOR = 0.2
-MAX_FACTOR = 10
 
+    def fun_bind_inputs(y, t):
+        return fun(y, t, *args)
 
-def flax_cond(pred, true_operand, true_fun,
-              false_operand, false_fun):  # pragma: no cover
-    """
-    for debugging purposes, use this instead of jax.lax.cond
-    """
-    if pred:
-        return true_fun(true_operand)
-    else:
-        return false_fun(false_operand)
-
+    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
 
-def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
-    """
-    for debugging purposes, use this instead of jax.lax.while_loop
-    """
-    val = init_val
-    while cond_fun(val):
-        val = body_fun(val)
-    return val
+    t0 = t_eval[0]
+    h0 = t_eval[1] - t0
 
+    stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, t0, y0, h0, rtol, atol)
+    i = 0
+    y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
-def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
-    """
-    for debugging purposes, use this instead of jax.lax.fori_loop
-    """
-    val = init_val
-    for i in range(start, stop):
-        val = body_fun(i, val)
-    return val
+    init_state = [stepper, t_eval, i, y_out, 0]
 
+    def cond_fun(state):
+        _, t_eval, i, _, _ = state
+        return i < len(t_eval)
 
-def flax_scan(f, init, xs, length=None):  # pragma: no cover
-    """
-    for debugging purposes, use this instead of jax.lax.scan
-    """
-    if xs is None:
-        xs = [None] * length
-    carry = init
-    ys = []
-    for x in xs:
-        carry, y = f(carry, x)
-        ys.append(y)
-    return carry, onp.stack(ys)
+    def body_fun(state):
+        stepper, t_eval, i, y_out, n_steps = state
+        stepper = _bdf_step(stepper, fun_bind_inputs, jac_bind_inputs)
+        index = jnp.searchsorted(t_eval, stepper['t'])
 
+        def for_body(j, y_out):
+            t = t_eval[j]
+            y_out = jax.ops.index_update(y_out, jax.ops.index[j, :],
+                                         _bdf_interpolate(stepper, t))
+            return y_out
 
-def _compute_R(order, factor):
-    """
-    computes the R matrix with entries
-    given by the first equation on page 8 of [1]
+        y_out = jax.lax.fori_loop(i, index, for_body, y_out)
+        return [stepper, t_eval, index, y_out, n_steps + 1]
 
-    This is used to update the differences matrix when step size h is varied according
-    to factor = h_{n+1} / h_n
+    stepper, t_eval, i, y_out, n_steps = jax.lax.while_loop(cond_fun, body_fun,
+                                                            init_state)
 
-    Note that the U matrix also defined in the same section can be also be
-    found using factor = 1, which corresponds to R with a constant step size
-    """
-    I = jnp.arange(1, MAX_ORDER + 1).reshape(-1, 1)
-    J = jnp.arange(1, MAX_ORDER + 1)
-    M = jnp.empty((MAX_ORDER + 1, MAX_ORDER + 1))
-    M = jax.ops.index_update(M, jax.ops.index[1:, 1:],
-                             (I - 1 - factor * J) / I)
-    M = jax.ops.index_update(M, jax.ops.index[0], 1)
-    R = jnp.cumprod(M, axis=0)
+    stepper['n_steps'] = n_steps
 
-    return R
+    return y_out
 
 
 def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
@@ -160,7 +119,7 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     Initiation routine for Backward Difference formula (BDF) implicit multistep
     integrator.
 
-    See jax_bdf_solver function above for details, this function returns a dict with the
+    See _bdf_odeint function above for details, this function returns a dict with the
     initial state of the solver
 
     Parameters
@@ -234,6 +193,28 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     return state
 
 
+def _compute_R(order, factor):
+    """
+    computes the R matrix with entries
+    given by the first equation on page 8 of [1]
+
+    This is used to update the differences matrix when step size h is varied according
+    to factor = h_{n+1} / h_n
+
+    Note that the U matrix also defined in the same section can be also be
+    found using factor = 1, which corresponds to R with a constant step size
+    """
+    I = jnp.arange(1, MAX_ORDER + 1).reshape(-1, 1)
+    J = jnp.arange(1, MAX_ORDER + 1)
+    M = jnp.empty((MAX_ORDER + 1, MAX_ORDER + 1))
+    M = jax.ops.index_update(M, jax.ops.index[1:, 1:],
+                             (I - 1 - factor * J) / I)
+    M = jax.ops.index_update(M, jax.ops.index[0], 1)
+    R = jnp.cumprod(M, axis=0)
+
+    return R
+
+
 def _select_initial_step(state, fun, t0, y0, f0, h0):
     """
     Select a good initial step by stepping forward one step of forward euler, and
@@ -539,8 +520,8 @@ def error_too_large(if_state3):
                     state['n_error_test_failures'] += 1
                     # calculate optimal step size factor as per eq 2.46 of [2]
                     factor = jnp.max((MIN_FACTOR,
-                                     safety *
-                                     error_norm ** (-1 / (state['order'] + 1))))
+                                      safety *
+                                      error_norm ** (-1 / (state['order'] + 1))))
                     state = _update_step_size(state, factor, False)
                     return [state, step_accepted]
 
@@ -688,50 +669,126 @@ def while_body(while_state):
     return order_summation
 
 
-@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
-def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
-    """
-    main solver loop - creates a stepper object and steps through time, interpolating to
-    the time points in t_eval
+# NOTE: all code below (except the docstring on jax_bdf_integrate), to define the API of
+# the jax solver and the ability to solve the adjoint sensitivities, has been copied
+# from the JAX library at https://github.com/google/jax. This is under an Apache
+# license, a short form of which is given here:
+#
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+# file except in compliance with the License.  You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied.  See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
     """
-    def fun_bind_inputs(y, t):
-        return fun(y, t, *args)
+    Backward Difference formula (BDF) implicit multistep integrator. The basic algorithm
+    is derived in [2]_. This particular implementation follows that implemented in the
+    Matlab routine ode15s described in [1]_ and the SciPy implementation [3]_, which
+    features the NDF formulas for improved stability, with associated differences in the
+    error constants, and calculates the jacobian at J(t_{n+1}, y^0_{n+1}).  This
+    implementation was based on that implemented in the scipy library [3]_, which also
+    mainly follows [1]_ but uses the more standard jacobian update.
 
-    jac_bind_inputs = jax.jacfwd(fun_bind_inputs, argnums=0)
+    Parameters
+    ----------
 
-    t0 = t_eval[0]
-    h0 = t_eval[1] - t0
+    func: callable
+        function to evaluate the time derivative of the solution `y` at time
+        `t` as `func(y, t, *args)`, producing the same shape/structure as `y0`.
+    y0: ndarray
+        initial state vector
+    t_eval: ndarray
+        time points to evaluate the solution, has shape (m,)
+    args: (optional)
+        tuple of additional arguments for `fun`, which must be arrays
+        scalars, or (nested) standard Python containers (tuples, lists, dicts,
+        namedtuples, i.e. pytrees) of those types.
+    rtol: (optional) float
+        relative tolerance for the solver
+    atol: (optional) float
+        absolute tolerance for the solver
 
-    stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, t0, y0, h0, rtol, atol)
-    i = 0
-    y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
+    Returns
+    -------
+    y: ndarray with shape (n, m)
+        calculated state vector at each of the m time points
 
-    init_state = [stepper, t_eval, i, y_out, 0]
+    References
+    ----------
+    .. [1] L. F. Shampine, M. W. Reichelt, "THE MATLAB ODE SUITE", SIAM J. SCI.
+           COMPUTE., Vol. 18, No. 1, pp. 1-22, January 1997.
+    .. [2] G. D. Byrne, A. C. Hindmarsh, "A Polyalgorithm for the Numerical
+           Solution of Ordinary Differential Equations", ACM Transactions on
+           Mathematical Software, Vol. 1, No. 1, pp. 71-96, March 1975.
+    .. [3] Virtanen, P., Gommers, R., Oliphant, T. E., Haberland, M., Reddy,
+           T., Cournapeau, D., ... & van der Walt, S. J. (2020). SciPy 1.0:
+           fundamental algorithms for scientific computing in Python.
+           Nature methods, 17(3), 261-272.
+    """
+    def _check_arg(arg):
+        if not isinstance(arg, core.Tracer) and not core.valid_jaxtype(arg):
+            msg = ("The contents of odeint *args must be arrays or scalars, but got "
+                   "\n{}.")
+        raise TypeError(msg.format(arg))
 
-    def cond_fun(state):
-        _, t_eval, i, _, _ = state
-        return i < len(t_eval)
+    flat_args, in_tree = tree_flatten((y0, t_eval[0], *args))
+    in_avals = tuple(map(abstractify, flat_args))
+    converted, consts = closure_convert(func, in_tree, in_avals)
 
-    def body_fun(state):
-        stepper, t_eval, i, y_out, n_steps = state
-        stepper = _bdf_step(stepper, fun_bind_inputs, jac_bind_inputs)
-        index = jnp.searchsorted(t_eval, stepper['t'])
+    return _bdf_odeint_wrapper(converted, rtol, atol, y0, t_eval, *consts, *args)
 
-        def for_body(j, y_out):
-            t = t_eval[j]
-            y_out = jax.ops.index_update(y_out, jax.ops.index[j, :],
-                                         _bdf_interpolate(stepper, t))
-            return y_out
 
-        y_out = jax.lax.fori_loop(i, index, for_body, y_out)
-        return [stepper, t_eval, index, y_out, n_steps + 1]
+def flax_cond(pred, true_operand, true_fun,
+              false_operand, false_fun):  # pragma: no cover
+    """
+    for debugging purposes, use this instead of jax.lax.cond
+    """
+    if pred:
+        return true_fun(true_operand)
+    else:
+        return false_fun(false_operand)
 
-    stepper, t_eval, i, y_out, n_steps = jax.lax.while_loop(cond_fun, body_fun,
-                                                            init_state)
 
-    stepper['n_steps'] = n_steps
+def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
+    """
+    for debugging purposes, use this instead of jax.lax.while_loop
+    """
+    val = init_val
+    while cond_fun(val):
+        val = body_fun(val)
+    return val
 
-    return y_out
+
+def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
+    """
+    for debugging purposes, use this instead of jax.lax.fori_loop
+    """
+    val = init_val
+    for i in range(start, stop):
+        val = body_fun(i, val)
+    return val
+
+
+def flax_scan(f, init, xs, length=None):  # pragma: no cover
+    """
+    for debugging purposes, use this instead of jax.lax.scan
+    """
+    if xs is None:
+        xs = [None] * length
+    carry = init
+    ys = []
+    for x in xs:
+        carry, y = f(carry, x)
+        ys.append(y)
+    return carry, onp.stack(ys)
 
 
 @partial(jax.jit, static_argnums=(0, 1, 2))

From 5886f45cc39ec4be9b08a809a450ea73ec49e7a7 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 11:47:02 +0100
Subject: [PATCH 13/39] #1031 fix for windows

---
 tests/unit/test_solvers/test_jax_bdf_solver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 10bf3548b1..26a935461d 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -5,7 +5,8 @@
 import time
 import numpy as np
 from platform import system
-import jax
+if system() != "Windows":
+    import jax
 
 
 @unittest.skipIf(system() == "Windows", "JAX not supported on windows")

From a91757296f6b31bd967e1de8e37a8ca19765ef2e Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 11:57:15 +0100
Subject: [PATCH 14/39] #1031 fix codacity issues

---
 pybamm/solvers/jax_bdf_solver.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 717dd7c6af..1b8680c44f 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -6,7 +6,7 @@
 import jax.numpy as jnp
 from jax import core
 from jax import dtypes
-from jax.util import safe_map, safe_zip, cache, split_list
+from jax.util import safe_map, cache, split_list
 from jax.api_util import flatten_fun_nokwargs
 from jax.flatten_util import ravel_pytree
 from jax.tree_util import tree_map, tree_flatten, tree_unflatten
@@ -16,9 +16,6 @@
 
 config.update("jax_enable_x64", True)
 
-map = safe_map
-zip = safe_zip
-
 MAX_ORDER = 5
 NEWTON_MAXITER = 4
 MIN_FACTOR = 0.2
@@ -740,7 +737,7 @@ def _check_arg(arg):
         raise TypeError(msg.format(arg))
 
     flat_args, in_tree = tree_flatten((y0, t_eval[0], *args))
-    in_avals = tuple(map(abstractify, flat_args))
+    in_avals = tuple(safe_map(abstractify, flat_args))
     converted, consts = closure_convert(func, in_tree, in_avals)
 
     return _bdf_odeint_wrapper(converted, rtol, atol, y0, t_eval, *consts, *args)
@@ -849,7 +846,7 @@ def closure_convert(fun, in_tree, in_avals):
     in_pvals = [pe.PartialVal.unknown(aval) for aval in in_avals]
     wrapped_fun, out_tree = flatten_fun_nokwargs(lu.wrap_init(fun), in_tree)
     with core.initial_style_staging():
-        jaxpr, out_pvals, consts = pe.trace_to_jaxpr(
+        jaxpr, _, consts = pe.trace_to_jaxpr(
             wrapped_fun, in_pvals, instantiate=True, stage_out=False
         )
     out_tree = out_tree()
@@ -867,7 +864,6 @@ def converted_fun(y, t, *hconsts_args):
         hoisted_consts, args = split_list(hconsts_args, [num_consts])
         consts = merge(closure_consts, hoisted_consts)
         all_args, in_tree2 = tree_flatten((y, t, *args))
-        assert in_tree == in_tree2
         out_flat = core.eval_jaxpr(jaxpr, consts, *all_args)
         return tree_unflatten(out_tree, out_flat)
 

From 22e7a0a6bee3408a28bb01831dfa4130c965ee02 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 12:08:52 +0100
Subject: [PATCH 15/39] #1031 fix codacity error

---
 pybamm/solvers/jax_bdf_solver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 1b8680c44f..2a4f8e5f52 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -863,7 +863,7 @@ def closure_convert(fun, in_tree, in_avals):
     def converted_fun(y, t, *hconsts_args):
         hoisted_consts, args = split_list(hconsts_args, [num_consts])
         consts = merge(closure_consts, hoisted_consts)
-        all_args, in_tree2 = tree_flatten((y, t, *args))
+        all_args, _ = tree_flatten((y, t, *args))
         out_flat = core.eval_jaxpr(jaxpr, consts, *all_args)
         return tree_unflatten(out_tree, out_flat)
 

From 9ceef3066e71d98d339ae4313eefbd86ab95df45 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 12:09:52 +0100
Subject: [PATCH 16/39] #1031 correct comments on JAX code

---
 pybamm/solvers/jax_bdf_solver.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 2a4f8e5f52..a33f88ac1c 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -666,10 +666,10 @@ def while_body(while_state):
     return order_summation
 
 
-# NOTE: all code below (except the docstring on jax_bdf_integrate), to define the API of
-# the jax solver and the ability to solve the adjoint sensitivities, has been copied
-# from the JAX library at https://github.com/google/jax. This is under an Apache
-# license, a short form of which is given here:
+# NOTE: all code below (except the docstring on jax_bdf_integrate and other minor
+# edits), to define the API of the jax solver and the ability to solve the adjoint
+# sensitivities, has been copied from the JAX library at https://github.com/google/jax.
+# This is under an Apache license, a short form of which is given here:
 #
 # Copyright 2018 Google LLC
 #

From 0ae3c18879a661eaad647e18cc64490b55b1b53a Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 12:49:03 +0100
Subject: [PATCH 17/39] #1031 remove solver stats print lines

---
 pybamm/solvers/jax_solver.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index faede56f48..d4e5ff2d24 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -172,21 +172,6 @@ def _integrate(self, model, t_eval, inputs=None):
         # note - the actual solve is not done until this line!
         y = onp.array(y)
 
-        stepper = None
-        if stepper is not None:
-            sstring = ''
-            sstring += 'JAX {} solver - stats\n'.format(self.method)
-            sstring += '\tNumber of steps: {}\n'.format(stepper['n_steps'])
-            sstring += '\tnumber of function evaluations: {}\n'.format(
-                stepper['n_function_evals'])
-            sstring += '\tnumber of jacobian evaluations: {}\n'.format(
-                stepper['n_jacobian_evals'])
-            sstring += '\tnumber of LU decompositions: {}\n'.format(
-                stepper['n_lu_decompositions'])
-            sstring += '\tnumber of error test failures: {}'.format(
-                stepper['n_error_test_failures'])
-            pybamm.logger.info(sstring)
-
         termination = "final time"
         t_event = None
         y_event = onp.array(None)

From a42f4e56a6b3be9fd3e89d2badfe6ab8391e171d Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sun, 12 Jul 2020 09:20:27 +0100
Subject: [PATCH 18/39] #1104 debugging for incorporation of mass matrix

---
 pybamm/solvers/jax_bdf_solver.py              | 138 ++++++++++++------
 .../unit/test_solvers/test_jax_bdf_solver.py  |  40 +++++
 2 files changed, 134 insertions(+), 44 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index a33f88ac1c..1ad65107d3 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -22,8 +22,8 @@
 MAX_FACTOR = 10
 
 
-@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2))
-def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
+@jax.partial(jax.custom_vjp, nondiff_argnums=(0, 1, 2, 3))
+def _bdf_odeint(fun, mass, rtol, atol, y0, t_eval, *args):
     """
     This implements a Backward Difference formula (BDF) implicit multistep integrator.
     The basic algorithm is derived in [2]_. This particular implementation follows that
@@ -40,8 +40,10 @@ def _bdf_odeint(fun, rtol, atol, y0, t_eval, *args):
     func: callable
         function to evaluate the time derivative of the solution `y` at time
         `t` as `func(y, t, *args)`, producing the same shape/structure as `y0`.
+    mass: ndarray
+        constant mass matrix with shape (n,n)
     y0: ndarray
-        initial state vector
+        initial state vector, has shape (n,)
     t_eval: ndarray
         time points to evaluate the solution, has shape (m,)
     args: (optional)
@@ -79,7 +81,7 @@ def fun_bind_inputs(y, t):
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
 
-    stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, t0, y0, h0, rtol, atol)
+    stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, mass, t0, y0, h0, rtol, atol)
     i = 0
     y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
@@ -100,10 +102,10 @@ def for_body(j, y_out):
                                          _bdf_interpolate(stepper, t))
             return y_out
 
-        y_out = jax.lax.fori_loop(i, index, for_body, y_out)
+        y_out = flax_fori_loop(i, index, for_body, y_out)
         return [stepper, t_eval, index, y_out, n_steps + 1]
 
-    stepper, t_eval, i, y_out, n_steps = jax.lax.while_loop(cond_fun, body_fun,
+    stepper, t_eval, i, y_out, n_steps = flax_while_loop(cond_fun, body_fun,
                                                             init_state)
 
     stepper['n_steps'] = n_steps
@@ -111,7 +113,7 @@ def for_body(j, y_out):
     return y_out
 
 
-def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
+def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     """
     Initiation routine for Backward Difference formula (BDF) implicit multistep
     integrator.
@@ -129,6 +131,8 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     jac: callable
         function with signature (y, t), where t is a scalar time and y is a ndarray with
         shape (n,), returns the jacobian matrix of fun as an ndarray with shape (n,n)
+    mass: ndarray
+        constant mass matrix with shape (n,n)
     t0: float
         initial time
     y0: ndarray
@@ -144,29 +148,34 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
     state['t'] = t0
     state['y'] = y0
     f0 = fun(y0, t0)
+    print('f0 is ', f0)
     state['atol'] = atol
     state['rtol'] = rtol
     order = 1
     state['order'] = order
     state['h'] = _select_initial_step(state, fun, t0, y0, f0, h0)
+    print('h0 is ',state['h'])
     EPS = jnp.finfo(y0.dtype).eps
     state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
     state['n_equal_steps'] = 0
     D = jnp.empty((MAX_ORDER + 1, len(y0)), dtype=y0.dtype)
     D = jax.ops.index_update(D, jax.ops.index[0, :], y0)
-    D = jax.ops.index_update(D, jax.ops.index[1, :], f0 * h0)
+    D = jax.ops.index_update(D, jax.ops.index[1, :], f0 * state['h'])
     state['D'] = D
     state['y0'] = None
     state['scale_y0'] = None
     state = _predict(state)
-    I = jnp.identity(len(y0), dtype=y0.dtype)
-    state['I'] = I
+    if mass is None:
+        state['M'] = jnp.identity(len(y0), dtype=y0.dtype)
+    else:
+        state['M'] = mass
+    print('mass is ', state['M'])
 
     # kappa values for difference orders, taken from Table 1 of [1]
     kappa = jnp.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
     gamma = jnp.hstack((0, jnp.cumsum(1 / jnp.arange(1, MAX_ORDER + 1))))
     alpha = 1.0 / ((1 - kappa) * gamma)
-    c = h0 * alpha[order]
+    c = state['h'] * alpha[order]
     error_const = kappa * gamma + 1 / jnp.arange(1, MAX_ORDER + 2)
 
     state['kappa'] = kappa
@@ -177,7 +186,8 @@ def _bdf_init(fun, jac, t0, y0, h0, rtol, atol):
 
     J = jac(y0, t0)
     state['J'] = J
-    state['LU'] = jax.scipy.linalg.lu_factor(I - c * J)
+    print('J is ', J)
+    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * J)
 
     state['U'] = _compute_R(order, 1)
     state['psi'] = None
@@ -297,7 +307,7 @@ def while_body(while_state):
         i -= 1
         return [i, D]
 
-    i, D = jax.lax.while_loop(while_cond, while_body, while_state)
+    i, D = flax_while_loop(while_cond, while_body, while_state)
 
     state['D'] = D
 
@@ -310,7 +320,7 @@ def update_psi_and_predict(state):
 
         return state
 
-    state = jax.lax.cond(only_update_D == False,  # noqa: E712
+    state = flax_cond(only_update_D == False,  # noqa: E712
                          state, update_psi_and_predict,
                          state, lambda x: x)
 
@@ -323,9 +333,10 @@ def _update_step_size(state, factor, dont_update_lu):
     the first equation of page 9 of [1]:
 
     - constant c = h / (1-kappa) gamma_k term
-    - lu factorisation of (I - c * J) used in newton iteration (same equation)
+    - lu factorisation of (M - c * J) used in newton iteration (same equation)
     - psi term
     """
+    print('update_step_size',factor)
     order = state['order']
     h = state['h']
 
@@ -335,11 +346,11 @@ def _update_step_size(state, factor, dont_update_lu):
 
     # redo lu (c has changed)
     def update_lu(state):
-        state['LU'] = jax.scipy.linalg.lu_factor(state['I'] - c * state['J'])
+        state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * state['J'])
         state['n_lu_decompositions'] += 1
         return state
 
-    state = jax.lax.cond(dont_update_lu == False,  # noqa: E712
+    state = flax_cond(dont_update_lu == False,  # noqa: E712
                          state, update_lu,
                          state, lambda x: x)
 
@@ -374,20 +385,23 @@ def _update_jacobian(state, jac):
     we update the jacobian using J(t_{n+1}, y^0_{n+1})
     following the scipy bdf implementation rather than J(t_n, y_n) as per [1]
     """
+    print('update_jacobian')
     J = jac(state['y0'], state['t'] + state['h'])
     state['n_jacobian_evals'] += 1
-    state['LU'] = jax.scipy.linalg.lu_factor(state['I'] - state['c'] * J)
+    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - state['c'] * J)
     state['n_lu_decompositions'] += 1
     state['J'] = J
     return state
 
 
 def _newton_iteration(state, fun):
+    print('newton iterate')
     tol = state['newton_tol']
     c = state['c']
     psi = state['psi']
     y0 = state['y0']
     LU = state['LU']
+    M = state['M']
     scale_y0 = state['scale_y0']
     t = state['t'] + state['h']
     d = jnp.zeros_like(y0)
@@ -406,7 +420,7 @@ def while_body(while_state):
         k, not_converged, dy_norm_old, d, y, state = while_state
         f_eval = fun(y, t)
         state['n_function_evals'] += 1
-        b = c * f_eval - psi - d
+        b = c * f_eval - M @ (psi - d)
         dy = jax.scipy.linalg.lu_solve(LU, b)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
         rate = dy_norm / dy_norm_old
@@ -416,7 +430,7 @@ def while_body(while_state):
         pred = rate >= 1
         pred += rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol
         pred *= dy_norm_old >= 0
-        k = jax.lax.cond(pred, k, lambda k: NEWTON_MAXITER, k, lambda k: k)
+        k = flax_cond(pred, k, lambda k: NEWTON_MAXITER, k, lambda k: k)
 
         d += dy
         y = y0 + d
@@ -436,18 +450,19 @@ def not_converged_fun(not_converged):
         dy_norm_old = dy_norm
 
         not_converged = \
-            jax.lax.cond(pred,
+            flax_cond(pred,
                          not_converged, converged_fun,
                          not_converged, not_converged_fun)
         return [k + 1, not_converged, dy_norm_old, d, y, state]
 
-    k, not_converged, dy_norm_old, d, y, state = jax.lax.while_loop(while_cond,
+    k, not_converged, dy_norm_old, d, y, state = flax_while_loop(while_cond,
                                                                     while_body,
                                                                     while_state)
     return not_converged, k, y, d, state
 
 
 def _bdf_step(state, fun, jac):
+    print('bdf step', state['t'])
     # we will try and use the old jacobian unless convergence of newton iteration
     # fails
     not_updated_jacobian = True
@@ -513,6 +528,7 @@ def converged(if_state2):
 
                 def error_too_large(if_state3):
                     # error too large, reduce step size and try again
+                    print('error too large')
                     state, step_accepted = if_state3
                     state['n_error_test_failures'] += 1
                     # calculate optimal step size factor as per eq 2.46 of [2]
@@ -529,26 +545,26 @@ def accept_step(if_state3):
                     return [state, step_accepted]
 
                 state, step_accepted = \
-                    jax.lax.cond(error_norm > 1,
+                    flax_cond(error_norm > 1,
                                  if_state3, error_too_large,
                                  if_state3, accept_step)
 
                 return [state, step_accepted]
 
-            state, step_accepted = jax.lax.cond(not_converged,
+            state, step_accepted = flax_cond(not_converged,
                                                 if_state2, need_to_update_step_size,
                                                 if_state2, converged)
 
             return [state, not_updated_jacobian, step_accepted]
 
         state, not_updated_jacobian, step_accepted = \
-            jax.lax.cond(pred,
+            flax_cond(pred,
                          if_state, need_to_update_jacobian,
                          if_state, dont_need_to_update_jacobian)
         return [state, step_accepted, not_updated_jacobian, y, d, n_iter]
 
     state, step_accepted, not_updated_jacobian, y, d, n_iter = \
-        jax.lax.while_loop(while_cond, while_body, while_state)
+        flax_while_loop(while_cond, while_body, while_state)
 
     # take the accepted step
     state['y'] = y
@@ -595,7 +611,7 @@ def order_equal_one(if_state2):
             error_m_norm = jnp.inf
             return error_m_norm
 
-        error_m_norm = jax.lax.cond(order > 1,
+        error_m_norm = flax_cond(order > 1,
                                     if_state2, order_greater_one,
                                     if_state2, order_equal_one)
 
@@ -609,7 +625,7 @@ def order_max(if_state2):
             error_p_norm = jnp.inf
             return error_p_norm
 
-        error_p_norm = jax.lax.cond(order < MAX_ORDER,
+        error_p_norm = flax_cond(order < MAX_ORDER,
                                     if_state2, order_less_max,
                                     if_state2, order_max)
 
@@ -627,7 +643,7 @@ def order_max(if_state2):
 
         return state
 
-    state = jax.lax.cond(state['n_equal_steps'] < state['order'] + 1,
+    state = flax_cond(state['n_equal_steps'] < state['order'] + 1,
                          if_state, no_change_in_order,
                          if_state, order_change)
 
@@ -660,12 +676,26 @@ def while_body(while_state):
         j += 1
         return [j, time_factor, order_summation]
 
-    j, time_factor, order_summation = jax.lax.while_loop(while_cond,
+    j, time_factor, order_summation = flax_while_loop(while_cond,
                                                          while_body,
                                                          while_state)
     return order_summation
 
 
+def block_diag(lst):
+    def block_fun(i, j, Ai, Aj):
+        if i == j:
+            return Ai
+        else:
+            return jnp.zeros(Ai.shape[0], Aj.shape[1])
+
+    blocks = [
+        [ block_fun(i, j, Ai, Aj) for j, Aj in enumerate(lst)]
+            for i, Ai in enumerate(lst)
+    ]
+
+    return jnp.block(blocks)
+
 # NOTE: all code below (except the docstring on jax_bdf_integrate and other minor
 # edits), to define the API of the jax solver and the ability to solve the adjoint
 # sensitivities, has been copied from the JAX library at https://github.com/google/jax.
@@ -684,7 +714,7 @@ def while_body(while_state):
 # governing permissions and limitations under the License.
 
 
-def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
+def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6, mass=None):
     """
     Backward Difference formula (BDF) implicit multistep integrator. The basic algorithm
     is derived in [2]_. This particular implementation follows that implemented in the
@@ -712,6 +742,8 @@ def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6):
         relative tolerance for the solver
     atol: (optional) float
         absolute tolerance for the solver
+    mass: (optional) ndarray
+        constant mass matrix with shape (n,n)
 
     Returns
     -------
@@ -740,13 +772,13 @@ def _check_arg(arg):
     in_avals = tuple(safe_map(abstractify, flat_args))
     converted, consts = closure_convert(func, in_tree, in_avals)
 
-    return _bdf_odeint_wrapper(converted, rtol, atol, y0, t_eval, *consts, *args)
+    return _bdf_odeint_wrapper(converted, mass, rtol, atol, y0, t_eval, *consts, *args)
 
 
 def flax_cond(pred, true_operand, true_fun,
               false_operand, false_fun):  # pragma: no cover
     """
-    for debugging purposes, use this instead of jax.lax.cond
+    for debugging purposes, use this instead of flax_cond
     """
     if pred:
         return true_fun(true_operand)
@@ -756,7 +788,7 @@ def flax_cond(pred, true_operand, true_fun,
 
 def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
     """
-    for debugging purposes, use this instead of jax.lax.while_loop
+    for debugging purposes, use this instead of flax_while_loop
     """
     val = init_val
     while cond_fun(val):
@@ -766,7 +798,7 @@ def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
 
 def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
     """
-    for debugging purposes, use this instead of jax.lax.fori_loop
+    for debugging purposes, use this instead of flax_fori_loop
     """
     val = init_val
     for i in range(start, stop):
@@ -776,7 +808,7 @@ def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
 
 def flax_scan(f, init, xs, length=None):  # pragma: no cover
     """
-    for debugging purposes, use this instead of jax.lax.scan
+    for debugging purposes, use this instead of flax_scan
     """
     if xs is None:
         xs = [None] * length
@@ -788,20 +820,23 @@ def flax_scan(f, init, xs, length=None):  # pragma: no cover
     return carry, onp.stack(ys)
 
 
-@partial(jax.jit, static_argnums=(0, 1, 2))
-def _bdf_odeint_wrapper(func, rtol, atol, y0, ts, *args):
+#@partial(jax.jit, static_argnums=(0, 1, 2, 3))
+def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
+    if mass is not None:
+        mass = block_diag(tree_flatten(mass)[0])
+    print('mass matrix is ', mass)
     func = ravel_first_arg(func, unravel)
-    out = _bdf_odeint(func, rtol, atol, y0, ts, *args)
+    out = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
     return jax.vmap(unravel)(out)
 
 
-def _bdf_odeint_fwd(func, rtol, atol, y0, ts, *args):
-    ys = _bdf_odeint(func, rtol, atol, y0, ts, *args)
+def _bdf_odeint_fwd(func, mass, rtol, atol, y0, ts, *args):
+    ys = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
     return ys, (ys, ts, args)
 
 
-def _bdf_odeint_rev(func, rtol, atol, res, g):
+def _bdf_odeint_rev(func, mass, rtol, atol, res, g):
     ys, ts, args = res
 
     def aug_dynamics(augmented_state, t, *args):
@@ -815,6 +850,7 @@ def aug_dynamics(augmented_state, t, *args):
     y_bar = g[-1]
     ts_bar = []
     t0_bar = 0.
+    aug_mass = (mass, mass, jnp.ones((1, 1)), tree_map(jnp.ones_like, args))
 
     def scan_fun(carry, i):
         y_bar, t0_bar, args_bar = carry
@@ -825,14 +861,15 @@ def scan_fun(carry, i):
         _, y_bar, t0_bar, args_bar = jax_bdf_integrate(
             aug_dynamics, (ys[i], y_bar, t0_bar, args_bar),
             jnp.array([-ts[i], -ts[i - 1]]),
-            *args, rtol=rtol, atol=atol)
+            *args, mass=aug_mass,
+            rtol=rtol, atol=atol)
         y_bar, t0_bar, args_bar = tree_map(op.itemgetter(1), (y_bar, t0_bar, args_bar))
         # Add gradient from current output
         y_bar = y_bar + g[i - 1]
         return (y_bar, t0_bar, args_bar), t_bar
 
     init_carry = (g[-1], 0., tree_map(jnp.zeros_like, args))
-    (y_bar, t0_bar, args_bar), rev_ts_bar = jax.lax.scan(
+    (y_bar, t0_bar, args_bar), rev_ts_bar = flax_scan(
         scan_fun, init_carry, jnp.arange(len(ts) - 1, 0, -1))
     ts_bar = jnp.concatenate([jnp.array([t0_bar]), rev_ts_bar[::-1]])
     return (y_bar, ts_bar, *args_bar)
@@ -884,6 +921,19 @@ def abstractify(x):
     return core.raise_to_shaped(core.get_aval(x))
 
 
+def ravel_2d_pytree(pytree):
+    leaves, treedef = tree_flatten(pytree)
+    flat, unravel_list = jax.api.vjp(ravel_2d_list, *leaves)
+
+    def unravel_pytree(flat):
+        return tree_unflatten(treedef, unravel_list(flat))
+    return flat, unravel_pytree
+
+
+def ravel_2d_list(*lst):
+    return jnp.concatenate([jnp.ravel(elt) for elt in lst]) if lst else jnp.array([])
+
+
 def ravel_first_arg(f, unravel):
     return ravel_first_arg_(lu.wrap_init(f), unravel).call_wrapped
 
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 26a935461d..41d8288ed8 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -54,6 +54,46 @@ def fun(y, t):
         np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
+    def test_mass_matrix(self):
+        # Solve
+        t_eval = np.linspace(0.0, 1.0, 80)
+
+        def fun(y, t):
+            return jax.numpy.stack([
+                0.1 * y[0],
+                y[1] - 2.0 * y[0],
+            ])
+
+        mass = jax.numpy.array([
+            [1.0, 0.0],
+            [0.0, 0.0],
+        ])
+
+        y0 = jax.numpy.array([1.0, 2.0])
+
+        t0 = time.perf_counter()
+        y = pybamm.jax_bdf_integrate(fun, y0, t_eval, mass=mass, rtol=1e-8, atol=1e-8)
+        t1 = time.perf_counter() - t0
+
+        # test accuracy
+        soln = np.exp(0.1 * t_eval)
+        np.testing.assert_allclose(y[:, 0], soln,
+                                   rtol=1e-7, atol=1e-7)
+        np.testing.assert_allclose(y[:, 1], 2.0 * soln,
+                                   rtol=1e-7, atol=1e-7)
+
+        t0 = time.perf_counter()
+        y = pybamm.jax_bdf_integrate(fun, y0, t_eval, mass=mass, rtol=1e-8, atol=1e-8)
+        t2 = time.perf_counter() - t0
+
+        # second run should be much quicker
+        self.assertLess(t2, t1)
+
+        # test second run is accurate
+        np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
+                                   rtol=1e-7, atol=1e-7)
+
+
     def test_solver_sensitivities(self):
         # Create model
         model = pybamm.BaseModel()

From 5f2db7c43530fd99f3c292d14ce9b70e9bac7791 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sun, 12 Jul 2020 11:16:15 +0100
Subject: [PATCH 19/39] #1104 fix scaling of newton iteration

---
 pybamm/solvers/jax_bdf_solver.py | 54 +++++++++++++-------------------
 1 file changed, 22 insertions(+), 32 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 1ad65107d3..17562d3bc5 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -102,10 +102,10 @@ def for_body(j, y_out):
                                          _bdf_interpolate(stepper, t))
             return y_out
 
-        y_out = flax_fori_loop(i, index, for_body, y_out)
+        y_out = jax.lax.fori_loop(i, index, for_body, y_out)
         return [stepper, t_eval, index, y_out, n_steps + 1]
 
-    stepper, t_eval, i, y_out, n_steps = flax_while_loop(cond_fun, body_fun,
+    stepper, t_eval, i, y_out, n_steps = jax.lax.while_loop(cond_fun, body_fun,
                                                             init_state)
 
     stepper['n_steps'] = n_steps
@@ -148,13 +148,11 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     state['t'] = t0
     state['y'] = y0
     f0 = fun(y0, t0)
-    print('f0 is ', f0)
     state['atol'] = atol
     state['rtol'] = rtol
     order = 1
     state['order'] = order
     state['h'] = _select_initial_step(state, fun, t0, y0, f0, h0)
-    print('h0 is ',state['h'])
     EPS = jnp.finfo(y0.dtype).eps
     state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
     state['n_equal_steps'] = 0
@@ -169,7 +167,6 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
         state['M'] = jnp.identity(len(y0), dtype=y0.dtype)
     else:
         state['M'] = mass
-    print('mass is ', state['M'])
 
     # kappa values for difference orders, taken from Table 1 of [1]
     kappa = jnp.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
@@ -186,7 +183,6 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
 
     J = jac(y0, t0)
     state['J'] = J
-    print('J is ', J)
     state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * J)
 
     state['U'] = _compute_R(order, 1)
@@ -307,7 +303,7 @@ def while_body(while_state):
         i -= 1
         return [i, D]
 
-    i, D = flax_while_loop(while_cond, while_body, while_state)
+    i, D = jax.lax.while_loop(while_cond, while_body, while_state)
 
     state['D'] = D
 
@@ -320,7 +316,7 @@ def update_psi_and_predict(state):
 
         return state
 
-    state = flax_cond(only_update_D == False,  # noqa: E712
+    state = jax.lax.cond(only_update_D == False,  # noqa: E712
                          state, update_psi_and_predict,
                          state, lambda x: x)
 
@@ -336,7 +332,6 @@ def _update_step_size(state, factor, dont_update_lu):
     - lu factorisation of (M - c * J) used in newton iteration (same equation)
     - psi term
     """
-    print('update_step_size',factor)
     order = state['order']
     h = state['h']
 
@@ -350,7 +345,7 @@ def update_lu(state):
         state['n_lu_decompositions'] += 1
         return state
 
-    state = flax_cond(dont_update_lu == False,  # noqa: E712
+    state = jax.lax.cond(dont_update_lu == False,  # noqa: E712
                          state, update_lu,
                          state, lambda x: x)
 
@@ -385,7 +380,6 @@ def _update_jacobian(state, jac):
     we update the jacobian using J(t_{n+1}, y^0_{n+1})
     following the scipy bdf implementation rather than J(t_n, y_n) as per [1]
     """
-    print('update_jacobian')
     J = jac(state['y0'], state['t'] + state['h'])
     state['n_jacobian_evals'] += 1
     state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - state['c'] * J)
@@ -395,7 +389,6 @@ def _update_jacobian(state, jac):
 
 
 def _newton_iteration(state, fun):
-    print('newton iterate')
     tol = state['newton_tol']
     c = state['c']
     psi = state['psi']
@@ -420,7 +413,7 @@ def while_body(while_state):
         k, not_converged, dy_norm_old, d, y, state = while_state
         f_eval = fun(y, t)
         state['n_function_evals'] += 1
-        b = c * f_eval - M @ (psi - d)
+        b = c * f_eval - M @ (psi + d)
         dy = jax.scipy.linalg.lu_solve(LU, b)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
         rate = dy_norm / dy_norm_old
@@ -430,7 +423,7 @@ def while_body(while_state):
         pred = rate >= 1
         pred += rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol
         pred *= dy_norm_old >= 0
-        k = flax_cond(pred, k, lambda k: NEWTON_MAXITER, k, lambda k: k)
+        k = jax.lax.cond(pred, k, lambda k: NEWTON_MAXITER, k, lambda k: k)
 
         d += dy
         y = y0 + d
@@ -450,19 +443,18 @@ def not_converged_fun(not_converged):
         dy_norm_old = dy_norm
 
         not_converged = \
-            flax_cond(pred,
+            jax.lax.cond(pred,
                          not_converged, converged_fun,
                          not_converged, not_converged_fun)
         return [k + 1, not_converged, dy_norm_old, d, y, state]
 
-    k, not_converged, dy_norm_old, d, y, state = flax_while_loop(while_cond,
+    k, not_converged, dy_norm_old, d, y, state = jax.lax.while_loop(while_cond,
                                                                     while_body,
                                                                     while_state)
     return not_converged, k, y, d, state
 
 
 def _bdf_step(state, fun, jac):
-    print('bdf step', state['t'])
     # we will try and use the old jacobian unless convergence of newton iteration
     # fails
     not_updated_jacobian = True
@@ -528,7 +520,6 @@ def converged(if_state2):
 
                 def error_too_large(if_state3):
                     # error too large, reduce step size and try again
-                    print('error too large')
                     state, step_accepted = if_state3
                     state['n_error_test_failures'] += 1
                     # calculate optimal step size factor as per eq 2.46 of [2]
@@ -545,26 +536,26 @@ def accept_step(if_state3):
                     return [state, step_accepted]
 
                 state, step_accepted = \
-                    flax_cond(error_norm > 1,
+                    jax.lax.cond(error_norm > 1,
                                  if_state3, error_too_large,
                                  if_state3, accept_step)
 
                 return [state, step_accepted]
 
-            state, step_accepted = flax_cond(not_converged,
+            state, step_accepted = jax.lax.cond(not_converged,
                                                 if_state2, need_to_update_step_size,
                                                 if_state2, converged)
 
             return [state, not_updated_jacobian, step_accepted]
 
         state, not_updated_jacobian, step_accepted = \
-            flax_cond(pred,
+            jax.lax.cond(pred,
                          if_state, need_to_update_jacobian,
                          if_state, dont_need_to_update_jacobian)
         return [state, step_accepted, not_updated_jacobian, y, d, n_iter]
 
     state, step_accepted, not_updated_jacobian, y, d, n_iter = \
-        flax_while_loop(while_cond, while_body, while_state)
+        jax.lax.while_loop(while_cond, while_body, while_state)
 
     # take the accepted step
     state['y'] = y
@@ -611,7 +602,7 @@ def order_equal_one(if_state2):
             error_m_norm = jnp.inf
             return error_m_norm
 
-        error_m_norm = flax_cond(order > 1,
+        error_m_norm = jax.lax.cond(order > 1,
                                     if_state2, order_greater_one,
                                     if_state2, order_equal_one)
 
@@ -625,7 +616,7 @@ def order_max(if_state2):
             error_p_norm = jnp.inf
             return error_p_norm
 
-        error_p_norm = flax_cond(order < MAX_ORDER,
+        error_p_norm = jax.lax.cond(order < MAX_ORDER,
                                     if_state2, order_less_max,
                                     if_state2, order_max)
 
@@ -643,7 +634,7 @@ def order_max(if_state2):
 
         return state
 
-    state = flax_cond(state['n_equal_steps'] < state['order'] + 1,
+    state = jax.lax.cond(state['n_equal_steps'] < state['order'] + 1,
                          if_state, no_change_in_order,
                          if_state, order_change)
 
@@ -676,7 +667,7 @@ def while_body(while_state):
         j += 1
         return [j, time_factor, order_summation]
 
-    j, time_factor, order_summation = flax_while_loop(while_cond,
+    j, time_factor, order_summation = jax.lax.while_loop(while_cond,
                                                          while_body,
                                                          while_state)
     return order_summation
@@ -778,7 +769,7 @@ def _check_arg(arg):
 def flax_cond(pred, true_operand, true_fun,
               false_operand, false_fun):  # pragma: no cover
     """
-    for debugging purposes, use this instead of flax_cond
+    for debugging purposes, use this instead of jax.lax.cond
     """
     if pred:
         return true_fun(true_operand)
@@ -788,7 +779,7 @@ def flax_cond(pred, true_operand, true_fun,
 
 def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
     """
-    for debugging purposes, use this instead of flax_while_loop
+    for debugging purposes, use this instead of jax.lax.while_loop
     """
     val = init_val
     while cond_fun(val):
@@ -798,7 +789,7 @@ def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
 
 def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
     """
-    for debugging purposes, use this instead of flax_fori_loop
+    for debugging purposes, use this instead of jax.lax.fori_loop
     """
     val = init_val
     for i in range(start, stop):
@@ -808,7 +799,7 @@ def flax_fori_loop(start, stop, body_fun, init_val):  # pragma: no cover
 
 def flax_scan(f, init, xs, length=None):  # pragma: no cover
     """
-    for debugging purposes, use this instead of flax_scan
+    for debugging purposes, use this instead of jax.lax.scan
     """
     if xs is None:
         xs = [None] * length
@@ -825,7 +816,6 @@ def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
     if mass is not None:
         mass = block_diag(tree_flatten(mass)[0])
-    print('mass matrix is ', mass)
     func = ravel_first_arg(func, unravel)
     out = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
     return jax.vmap(unravel)(out)
@@ -869,7 +859,7 @@ def scan_fun(carry, i):
         return (y_bar, t0_bar, args_bar), t_bar
 
     init_carry = (g[-1], 0., tree_map(jnp.zeros_like, args))
-    (y_bar, t0_bar, args_bar), rev_ts_bar = flax_scan(
+    (y_bar, t0_bar, args_bar), rev_ts_bar = jax.lax.scan(
         scan_fun, init_carry, jnp.arange(len(ts) - 1, 0, -1))
     ts_bar = jnp.concatenate([jnp.array([t0_bar]), rev_ts_bar[::-1]])
     return (y_bar, ts_bar, *args_bar)

From 81eb53801056f5eb445bca7fabc2fc40a5023732 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 11:28:39 +0100
Subject: [PATCH 20/39] #1104 add test in jax solver for semi-explicit dae case

---
 pybamm/solvers/jax_bdf_solver.py              | 26 +++++++----
 pybamm/solvers/jax_solver.py                  | 32 +++++++++----
 .../unit/test_solvers/test_jax_bdf_solver.py  | 41 +++++++++++++++++
 tests/unit/test_solvers/test_jax_solver.py    | 46 +++++++++++++++++++
 4 files changed, 127 insertions(+), 18 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 17562d3bc5..410d9d7823 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -13,6 +13,7 @@
 from jax.interpreters import partial_eval as pe
 from jax import linear_util as lu
 from jax.config import config
+from jax.lib import pytree
 
 config.update("jax_enable_x64", True)
 
@@ -162,11 +163,8 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     state['D'] = D
     state['y0'] = None
     state['scale_y0'] = None
+    state['M'] = mass
     state = _predict(state)
-    if mass is None:
-        state['M'] = jnp.identity(len(y0), dtype=y0.dtype)
-    else:
-        state['M'] = mass
 
     # kappa values for difference orders, taken from Table 1 of [1]
     kappa = jnp.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
@@ -678,7 +676,13 @@ def block_fun(i, j, Ai, Aj):
         if i == j:
             return Ai
         else:
-            return jnp.zeros(Ai.shape[0], Aj.shape[1])
+            return jnp.zeros(
+                (
+                    Ai.shape[0] if Ai.ndim > 1 else 1,
+                    Aj.shape[1] if Aj.ndim > 1 else 1,
+                ),
+                dtype=Ai.dtype
+            )
 
     blocks = [
         [ block_fun(i, j, Ai, Aj) for j, Aj in enumerate(lst)]
@@ -811,10 +815,12 @@ def flax_scan(f, init, xs, length=None):  # pragma: no cover
     return carry, onp.stack(ys)
 
 
-#@partial(jax.jit, static_argnums=(0, 1, 2, 3))
+@partial(jax.jit, static_argnums=(0, 1, 2, 3))
 def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
-    if mass is not None:
+    if mass is None:
+        mass = jnp.identity(y0.shape[0], dtype=y0.dtype)
+    else:
         mass = block_diag(tree_flatten(mass)[0])
     func = ravel_first_arg(func, unravel)
     out = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
@@ -840,7 +846,11 @@ def aug_dynamics(augmented_state, t, *args):
     y_bar = g[-1]
     ts_bar = []
     t0_bar = 0.
-    aug_mass = (mass, mass, jnp.ones((1, 1)), tree_map(jnp.ones_like, args))
+
+    def arg_to_identity(arg):
+        return jnp.identity(arg.shape[0] if arg.ndim > 0 else 1, dtype=arg.dtype)
+
+    aug_mass = (mass, mass, jnp.array(1.), tree_map(arg_to_identity, args))
 
     def scan_fun(carry, i):
         y_bar, t0_bar, args_bar = carry
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index d4e5ff2d24..6b4b2f8b8e 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -5,7 +5,7 @@
 
 import jax
 from jax.experimental.ode import odeint
-import jax.numpy as np
+import jax.numpy as jnp
 import numpy as onp
 
 
@@ -42,13 +42,15 @@ class JaxSolver(pybamm.BaseSolver):
     """
 
     def __init__(self, method='RK45', rtol=1e-6, atol=1e-6, extra_options=None):
-        super().__init__(method, rtol, atol)
-        self.ode_solver = True
+        super().__init__(method, rtol, atol, root_method='lm')
         method_options = ['RK45', 'BDF']
         if method not in method_options:
             raise ValueError('method must be one of {}'.format(method_options))
+        self.ode_solver = False
+        if method == 'RK45':
+            self.ode_solver = True
         self.extra_options = extra_options or {}
-        self.name = "JAX solver"
+        self.name = "JAX solver ({})".format(method)
         self._cached_solves = dict()
 
     def get_solve(self, model, t_eval):
@@ -111,13 +113,22 @@ def create_solve(self, model, t_eval):
 
         # Initial conditions
         y0 = model.y0
+        mass = None
+        if self.method == 'BDF':
+            mass = model.mass_matrix.entries.toarray()
 
-        def rhs_odeint(y, t, inputs):
-            return model.rhs_eval(t, y, inputs)
+        def rhs_ode(y, t, inputs):
+            return model.rhs_eval(t, y, inputs),
+
+        def rhs_dae(y, t, inputs):
+            return jnp.concatenate([
+                model.rhs_eval(t, y, inputs),
+                model.algebraic_eval(t, y, inputs),
+            ])
 
         def solve_model_rk45(inputs):
             y = odeint(
-                rhs_odeint,
+                rhs_ode,
                 y0,
                 t_eval,
                 inputs,
@@ -125,19 +136,20 @@ def solve_model_rk45(inputs):
                 atol=self.atol,
                 **self.extra_options
             )
-            return np.transpose(y)
+            return jnp.transpose(y)
 
         def solve_model_bdf(inputs):
             y = pybamm.jax_bdf_integrate(
-                rhs_odeint,
+                rhs_dae,
                 y0,
                 t_eval,
                 inputs,
                 rtol=self.rtol,
                 atol=self.atol,
+                mass=mass,
                 **self.extra_options
             )
-            return np.transpose(y)
+            return jnp.transpose(y)
 
         if self.method == 'RK45':
             return jax.jit(solve_model_rk45)
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 41d8288ed8..9d4b8263d2 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -139,6 +139,47 @@ def solve_bdf(rate):
 
         self.assertAlmostEqual(grad_bdf, grad_num, places=3)
 
+    @unittest.skip("sensitivities do not yet work with semi-explict dae")
+    def test_mass_matrix_with_sensitivities(self):
+        # Solve
+        t_eval = np.linspace(0.0, 1.0, 80)
+
+        def fun(y, t, inputs):
+            return jax.numpy.stack([
+                inputs['rate'] * y[0],
+                y[1] - 2.0 * y[0],
+            ])
+
+        mass = jax.numpy.array([
+            [1.0, 0.0],
+            [0.0, 0.0],
+        ])
+
+        y0 = jax.numpy.array([1.0, 2.0])
+
+        h = 0.0001
+        rate = 0.1
+
+        # create a dummy "model" where we calculate the sum of the time series
+        @jax.jit
+        def solve_bdf(rate):
+            return jax.numpy.sum(
+                pybamm.jax_bdf_integrate(fun, y0, t_eval,
+                                         {'rate': rate},
+                                         mass=mass,
+                                         rtol=1e-9, atol=1e-9)
+            )
+
+        # check answers with finite difference
+        eval_plus = solve_bdf(rate + h)
+        eval_neg = solve_bdf(rate - h)
+        grad_num = (eval_plus - eval_neg) / (2 * h)
+
+        grad_solve_bdf = jax.jit(jax.grad(solve_bdf))
+        grad_bdf = grad_solve_bdf(rate)
+
+        self.assertAlmostEqual(grad_bdf, grad_num, places=3)
+
     def test_solver_with_inputs(self):
         # Create model
         model = pybamm.BaseModel()
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index a84d1b3c33..546aee41be 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -53,6 +53,52 @@ def test_model_solver(self):
             self.assertLess(t_second_solve, t_first_solve)
             np.testing.assert_array_equal(second_solution.y, solution.y)
 
+    def test_semi_explicit_model(self):
+        # Create model
+        model = pybamm.BaseModel()
+        model.convert_to_format = "jax"
+        domain = ["negative electrode", "separator", "positive electrode"]
+        var = pybamm.Variable("var", domain=domain)
+        var2 = pybamm.Variable("var2", domain=domain)
+        model.rhs = {var: 0.1 * var}
+        model.algebraic = {var2: var2 - 2.0 * var}
+        model.initial_conditions = {var: 1.0, var2: 1.0}
+        # No need to set parameters; can use base discretisation (no spatial operators)
+
+        # create discretisation
+        mesh = get_mesh_for_testing()
+        spatial_methods = {"macroscale": pybamm.FiniteVolume()}
+        disc = pybamm.Discretisation(mesh, spatial_methods)
+        disc.process_model(model)
+
+        # Solve
+        solver = pybamm.JaxSolver(
+            method='BDF', rtol=1e-8, atol=1e-8
+        )
+        t_eval = np.linspace(0, 1, 80)
+        t0 = time.perf_counter()
+        solution = solver.solve(model, t_eval)
+        t_first_solve = time.perf_counter() - t0
+        np.testing.assert_array_equal(solution.t, t_eval)
+        soln = np.exp(0.1 * solution.t)
+        np.testing.assert_allclose(solution.y[0], soln,
+                                   rtol=1e-7, atol=1e-7)
+        np.testing.assert_allclose(solution.y[-1], 2 * soln,
+                                   rtol=1e-7, atol=1e-7)
+
+        # Test time
+        self.assertEqual(
+            solution.total_time, solution.solve_time + solution.set_up_time
+        )
+        self.assertEqual(solution.termination, "final time")
+
+        t0 = time.perf_counter()
+        second_solution = solver.solve(model, t_eval)
+        t_second_solve = time.perf_counter() - t0
+
+        self.assertLess(t_second_solve, t_first_solve)
+        np.testing.assert_array_equal(second_solution.y, solution.y)
+
     def test_solver_sensitivities(self):
         # Create model
         model = pybamm.BaseModel()

From 107f7ce04ac33abb99871a881867892143a3d8a4 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 11:41:33 +0100
Subject: [PATCH 21/39] #1104 tidy up

---
 pybamm/solvers/jax_bdf_solver.py               | 5 ++---
 tests/unit/test_solvers/test_jax_bdf_solver.py | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 410d9d7823..857e45f89e 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -13,7 +13,6 @@
 from jax.interpreters import partial_eval as pe
 from jax import linear_util as lu
 from jax.config import config
-from jax.lib import pytree
 
 config.update("jax_enable_x64", True)
 
@@ -685,8 +684,8 @@ def block_fun(i, j, Ai, Aj):
             )
 
     blocks = [
-        [ block_fun(i, j, Ai, Aj) for j, Aj in enumerate(lst)]
-            for i, Ai in enumerate(lst)
+        [block_fun(i, j, Ai, Aj) for j, Aj in enumerate(lst)]
+        for i, Ai in enumerate(lst)
     ]
 
     return jnp.block(blocks)
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 9d4b8263d2..4160ca8510 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -93,7 +93,6 @@ def fun(y, t):
         np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
-
     def test_solver_sensitivities(self):
         # Create model
         model = pybamm.BaseModel()

From 1bdb5fde17117c76566697526f3264823524dcc3 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 13 Jul 2020 19:07:06 +0100
Subject: [PATCH 22/39] #1104 jax bdf solver calculates own consistent initial
 conditions for semi-explicit dae models

---
 pybamm/solvers/base_solver.py                 |  5 +-
 pybamm/solvers/jax_bdf_solver.py              | 96 +++++++++++++++++--
 pybamm/solvers/jax_solver.py                  |  8 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  |  5 +-
 tests/unit/test_solvers/test_jax_solver.py    |  1 +
 5 files changed, 102 insertions(+), 13 deletions(-)

diff --git a/pybamm/solvers/base_solver.py b/pybamm/solvers/base_solver.py
index 4451a7b9fd..1b32fdf751 100644
--- a/pybamm/solvers/base_solver.py
+++ b/pybamm/solvers/base_solver.py
@@ -451,9 +451,12 @@ def calculate_consistent_state(self, model, time=0, inputs=None):
         -------
         y0_consistent : array-like, same shape as y0_guess
             Initial conditions that are consistent with the algebraic equations (roots
-            of the algebraic equations)
+            of the algebraic equations). If self.root_method == None then returns
+            model.y0.
         """
         pybamm.logger.info("Start calculating consistent states")
+        if self.root_method is None:
+            return model.y0
         try:
             root_sol = self.root_method._integrate(model, [time], inputs)
         except pybamm.SolverError as e:
diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 857e45f89e..cddfd18c01 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -18,6 +18,7 @@
 
 MAX_ORDER = 5
 NEWTON_MAXITER = 4
+ROOT_SOLVE_MAXITER = 15
 MIN_FACTOR = 0.2
 MAX_FACTOR = 10
 
@@ -146,23 +147,27 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     """
     state = {}
     state['t'] = t0
-    state['y'] = y0
-    f0 = fun(y0, t0)
     state['atol'] = atol
     state['rtol'] = rtol
+    state['M'] = mass
+    EPS = jnp.finfo(y0.dtype).eps
+    state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
+
+    scale_y0 = atol + rtol * jnp.abs(y0)
+    y0 = _select_initial_conditions(fun, mass, t0, y0, state['newton_tol'], scale_y0)
+    state['y'] = y0
+
+    f0 = fun(y0, t0)
     order = 1
     state['order'] = order
     state['h'] = _select_initial_step(state, fun, t0, y0, f0, h0)
-    EPS = jnp.finfo(y0.dtype).eps
-    state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
     state['n_equal_steps'] = 0
     D = jnp.empty((MAX_ORDER + 1, len(y0)), dtype=y0.dtype)
     D = jax.ops.index_update(D, jax.ops.index[0, :], y0)
     D = jax.ops.index_update(D, jax.ops.index[1, :], f0 * state['h'])
     state['D'] = D
     state['y0'] = None
-    state['scale_y0'] = None
-    state['M'] = mass
+    state['scale_y0'] = scale_y0
     state = _predict(state)
 
     # kappa values for difference orders, taken from Table 1 of [1]
@@ -215,6 +220,83 @@ def _compute_R(order, factor):
     return R
 
 
+def _select_initial_conditions(fun, M, t0, y0, tol, scale_y0):
+    # identify differentiable variables as zeros on diagonal
+    algebraic_variables = jnp.diag(M) == 0.
+
+    # if all differentiable variables then return y0 (can use normal python if since M
+    # is static)
+    if not jnp.any(algebraic_variables):
+        return y0
+
+    # calculate consistent initial conditions via a newton on -J_a @ delta = f_a This
+    # follows this reference:
+    #
+    # Shampine, L. F., Reichelt, M. W., & Kierzenka, J. A. (1999). Solving index-1 DAEs
+    # in MATLAB and Simulink. SIAM review, 41(3), 538-552.
+
+    # calculate fun_a, function of algebraic variables
+    def fun_a(y_a):
+        y_full = jax.ops.index_update(y0, algebraic_variables, y_a)
+        return fun(y_full, t0)[algebraic_variables]
+
+    y0_a = y0[algebraic_variables]
+    scale_y0_a = scale_y0[algebraic_variables]
+
+    d = jnp.zeros(y0_a.shape[0], dtype=y0.dtype)
+    y_a = jnp.array(y0_a)
+
+    # calculate neg jacobian of fun_a
+    J_a = jax.jacfwd(fun_a)(y_a)
+    LU = jax.scipy.linalg.lu_factor(-J_a)
+
+    not_converged = True
+    dy_norm_old = -1.0
+    k = 0
+    while_state = [k, not_converged, dy_norm_old, d, y_a]
+
+    def while_cond(while_state):
+        k, not_converged, _, _, _ = while_state
+        return not_converged * (k < ROOT_SOLVE_MAXITER)
+
+    def while_body(while_state):
+        k, not_converged, dy_norm_old, d, y_a = while_state
+        f_eval = fun_a(y_a)
+        dy = jax.scipy.linalg.lu_solve(LU, f_eval)
+        dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0_a)**2))
+        rate = dy_norm / dy_norm_old
+
+        d += dy
+        y_a = y0_a + d
+
+        # if converged then break out of iteration early
+        pred = dy_norm_old >= 0
+        pred *= rate / (1 - rate) * dy_norm < tol
+        pred += dy_norm == 0
+
+        def converged_fun(not_converged):
+            not_converged = False
+            return not_converged
+
+        def not_converged_fun(not_converged):
+            return not_converged
+
+        dy_norm_old = dy_norm
+
+        not_converged = \
+            jax.lax.cond(pred,
+                         not_converged, converged_fun,
+                         not_converged, not_converged_fun)
+        return [k + 1, not_converged, dy_norm_old, d, y_a]
+
+    k, not_converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
+                                                               while_body,
+                                                               while_state)
+    y_tilde = jax.ops.index_update(y0, algebraic_variables, y_a)
+
+    return y_tilde
+
+
 def _select_initial_step(state, fun, t0, y0, f0, h0):
     """
     Select a good initial step by stepping forward one step of forward euler, and
@@ -394,7 +476,7 @@ def _newton_iteration(state, fun):
     M = state['M']
     scale_y0 = state['scale_y0']
     t = state['t'] + state['h']
-    d = jnp.zeros_like(y0)
+    d = jnp.zeros(y0.shape, dtype=y0.dtype)
     y = jnp.array(y0, copy=True)
 
     not_converged = True
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 6b4b2f8b8e..f7f50e0c3a 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -42,7 +42,9 @@ class JaxSolver(pybamm.BaseSolver):
     """
 
     def __init__(self, method='RK45', rtol=1e-6, atol=1e-6, extra_options=None):
-        super().__init__(method, rtol, atol, root_method='lm')
+        # note: bdf solver itself calculates consistent initial conditions so can set
+        # root_method to none
+        super().__init__(method, rtol, atol, root_method=None)
         method_options = ['RK45', 'BDF']
         if method not in method_options:
             raise ValueError('method must be one of {}'.format(method_options))
@@ -111,8 +113,8 @@ def create_solve(self, model, t_eval):
                                " re-solve using no events and a fixed"
                                " end-time".format(model.events))
 
-        # Initial conditions
-        y0 = model.y0
+        # Initial conditions, make sure they are an 0D array
+        y0 = jnp.array(model.y0).reshape(-1)
         mass = None
         if self.method == 'BDF':
             mass = model.mass_matrix.entries.toarray()
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 4160ca8510..ff89ac1e3f 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -69,7 +69,9 @@ def fun(y, t):
             [0.0, 0.0],
         ])
 
-        y0 = jax.numpy.array([1.0, 2.0])
+        # give some bad initial conditions, solver should calculate correct ones using
+        # this as a guess
+        y0 = jax.numpy.array([1.0, 1.5])
 
         t0 = time.perf_counter()
         y = pybamm.jax_bdf_integrate(fun, y0, t_eval, mass=mass, rtol=1e-8, atol=1e-8)
@@ -138,7 +140,6 @@ def solve_bdf(rate):
 
         self.assertAlmostEqual(grad_bdf, grad_num, places=3)
 
-    @unittest.skip("sensitivities do not yet work with semi-explict dae")
     def test_mass_matrix_with_sensitivities(self):
         # Solve
         t_eval = np.linspace(0.0, 1.0, 80)
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 546aee41be..96886697f4 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -62,6 +62,7 @@ def test_semi_explicit_model(self):
         var2 = pybamm.Variable("var2", domain=domain)
         model.rhs = {var: 0.1 * var}
         model.algebraic = {var2: var2 - 2.0 * var}
+        # give inconsistent initial conditions, should calculate correct ones
         model.initial_conditions = {var: 1.0, var2: 1.0}
         # No need to set parameters; can use base discretisation (no spatial operators)
 

From df5326aa0414bdb30464126651da032820818fc8 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Tue, 14 Jul 2020 10:50:01 +0100
Subject: [PATCH 23/39] #1104 remove cond in update_step_size

---
 pybamm/solvers/jax_bdf_solver.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index cddfd18c01..0d06740799 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -402,7 +402,7 @@ def update_psi_and_predict(state):
     return state
 
 
-def _update_step_size(state, factor, dont_update_lu):
+def _update_step_size(state, factor):
     """
     If step size h is changed then also need to update the terms in
     the first equation of page 9 of [1]:
@@ -419,14 +419,8 @@ def _update_step_size(state, factor, dont_update_lu):
     c = h * state['alpha'][order]
 
     # redo lu (c has changed)
-    def update_lu(state):
-        state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * state['J'])
-        state['n_lu_decompositions'] += 1
-        return state
-
-    state = jax.lax.cond(dont_update_lu == False,  # noqa: E712
-                         state, update_lu,
-                         state, lambda x: x)
+    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * state['J'])
+    state['n_lu_decompositions'] += 1
 
     state['h'] = h
     state['c'] = c
@@ -577,7 +571,7 @@ def need_to_update_step_size(if_state2):
                 # newton iteration did not converge, but jacobian has already been
                 # evaluated so reduce step size by 0.3 (as per [1]) and try again
                 state, step_accepted = if_state2
-                state = _update_step_size(state, 0.3, False)
+                state = _update_step_size(state, 0.3)
                 return [state, step_accepted]
 
             def converged(if_state2):
@@ -605,7 +599,7 @@ def error_too_large(if_state3):
                     factor = jnp.max((MIN_FACTOR,
                                       safety *
                                       error_norm ** (-1 / (state['order'] + 1))))
-                    state = _update_step_size(state, factor, False)
+                    state = _update_step_size(state, factor)
                     return [state, step_accepted]
 
                 def accept_step(if_state3):
@@ -709,7 +703,7 @@ def order_max(if_state2):
         state['order'] = order
 
         factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
-        state = _update_step_size(state, factor, False)
+        state = _update_step_size(state, factor)
 
         return state
 

From c4c4c66d66808265f439e4e599eb01bbf6e33a11 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Tue, 14 Jul 2020 10:54:10 +0100
Subject: [PATCH 24/39] #1104 remove cond in newton iteration

---
 pybamm/solvers/jax_bdf_solver.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 0d06740799..a23b8c7e59 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -504,21 +504,10 @@ def while_body(while_state):
         # if converged then break out of iteration early
         pred = dy_norm_old >= 0
         pred *= rate / (1 - rate) * dy_norm < tol
-        pred += dy_norm == 0
-
-        def converged_fun(not_converged):
-            not_converged = False
-            return not_converged
-
-        def not_converged_fun(not_converged):
-            return not_converged
+        not_converged = dy_norm == 0 + pred
 
         dy_norm_old = dy_norm
 
-        not_converged = \
-            jax.lax.cond(pred,
-                         not_converged, converged_fun,
-                         not_converged, not_converged_fun)
         return [k + 1, not_converged, dy_norm_old, d, y, state]
 
     k, not_converged, dy_norm_old, d, y, state = jax.lax.while_loop(while_cond,

From 0ca87c4efc6c4794d9eb274c568fdedbe99495e4 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Tue, 14 Jul 2020 16:16:27 +0100
Subject: [PATCH 25/39] #1104 get rid of lax.cond calls in bdf solver

---
 pybamm/solvers/jax_bdf_solver.py              | 455 ++++++++----------
 .../unit/test_solvers/test_jax_bdf_solver.py  |   5 +-
 2 files changed, 192 insertions(+), 268 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index a23b8c7e59..3fdefb50f7 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -1,6 +1,7 @@
 from functools import partial
 import operator as op
 import numpy as onp
+import collections
 
 import jax
 import jax.numpy as jnp
@@ -9,7 +10,7 @@
 from jax.util import safe_map, cache, split_list
 from jax.api_util import flatten_fun_nokwargs
 from jax.flatten_util import ravel_pytree
-from jax.tree_util import tree_map, tree_flatten, tree_unflatten
+from jax.tree_util import tree_map, tree_flatten, tree_unflatten, tree_multimap, partial
 from jax.interpreters import partial_eval as pe
 from jax import linear_util as lu
 from jax.config import config
@@ -86,16 +87,16 @@ def fun_bind_inputs(y, t):
     i = 0
     y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
-    init_state = [stepper, t_eval, i, y_out, 0]
+    init_state = [stepper, t_eval, i, y_out]
 
     def cond_fun(state):
-        _, t_eval, i, _, _ = state
+        _, t_eval, i, _ = state
         return i < len(t_eval)
 
     def body_fun(state):
-        stepper, t_eval, i, y_out, n_steps = state
+        stepper, t_eval, i, y_out = state
         stepper = _bdf_step(stepper, fun_bind_inputs, jac_bind_inputs)
-        index = jnp.searchsorted(t_eval, stepper['t'])
+        index = jnp.searchsorted(t_eval, stepper.t)
 
         def for_body(j, y_out):
             t = t_eval[j]
@@ -104,16 +105,25 @@ def for_body(j, y_out):
             return y_out
 
         y_out = jax.lax.fori_loop(i, index, for_body, y_out)
-        return [stepper, t_eval, index, y_out, n_steps + 1]
+        return [stepper, t_eval, index, y_out]
 
-    stepper, t_eval, i, y_out, n_steps = jax.lax.while_loop(cond_fun, body_fun,
+    stepper, t_eval, i, y_out = jax.lax.while_loop(cond_fun, body_fun,
                                                             init_state)
 
-    stepper['n_steps'] = n_steps
-
     return y_out
 
 
+BDFInternalStates = [
+        't', 'atol', 'rtol', 'M', 'newton_tol', 'order', 'h', 'n_equal_steps', 'D',
+        'y0', 'scale_y0', 'kappa', 'gamma', 'alpha', 'c', 'error_const', 'J', 'LU', 'U',
+        'psi', 'n_function_evals', 'n_jacobian_evals', 'n_lu_decompositions', 'n_steps']
+BDFState = collections.namedtuple('BDFState', BDFInternalStates)
+
+jax.tree_util.register_pytree_node(
+                      BDFState,
+                      lambda xs: (tuple(xs), None),
+                      lambda _, xs: BDFState(*xs))
+
 def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     """
     Initiation routine for Backward Difference formula (BDF) implicit multistep
@@ -145,6 +155,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     atol: (optional) float
         absolute tolerance for the solver
     """
+
     state = {}
     state['t'] = t0
     state['atol'] = atol
@@ -155,20 +166,18 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
 
     scale_y0 = atol + rtol * jnp.abs(y0)
     y0 = _select_initial_conditions(fun, mass, t0, y0, state['newton_tol'], scale_y0)
-    state['y'] = y0
 
     f0 = fun(y0, t0)
     order = 1
     state['order'] = order
-    state['h'] = _select_initial_step(state, fun, t0, y0, f0, h0)
+    state['h'] = _select_initial_step(atol, rtol, fun, t0, y0, f0, h0)
     state['n_equal_steps'] = 0
     D = jnp.empty((MAX_ORDER + 1, len(y0)), dtype=y0.dtype)
     D = jax.ops.index_update(D, jax.ops.index[0, :], y0)
     D = jax.ops.index_update(D, jax.ops.index[1, :], f0 * state['h'])
     state['D'] = D
-    state['y0'] = None
+    state['y0'] = y0
     state['scale_y0'] = scale_y0
-    state = _predict(state)
 
     # kappa values for difference orders, taken from Table 1 of [1]
     kappa = jnp.array([0, -0.1850, -1 / 9, -0.0823, -0.0415, 0])
@@ -189,13 +198,16 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
 
     state['U'] = _compute_R(order, 1)
     state['psi'] = None
-    state = _update_psi(state)
 
     state['n_function_evals'] = 2
     state['n_jacobian_evals'] = 1
     state['n_lu_decompositions'] = 1
-    state['n_error_test_failures'] = 0
-    return state
+    state['n_steps'] = 0
+
+    tuple_state = BDFState(*[state[k] for k in BDFInternalStates])
+    y0, scale_y0 = _predict(tuple_state, D)
+    psi = _update_psi(tuple_state, D)
+    return tuple_state._replace(y0=y0, scale_y0=scale_y0, psi=psi)
 
 
 def _compute_R(order, factor):
@@ -272,23 +284,13 @@ def while_body(while_state):
         # if converged then break out of iteration early
         pred = dy_norm_old >= 0
         pred *= rate / (1 - rate) * dy_norm < tol
-        pred += dy_norm == 0
-
-        def converged_fun(not_converged):
-            not_converged = False
-            return not_converged
-
-        def not_converged_fun(not_converged):
-            return not_converged
+        not_converged = dy_norm == 0 + pred
 
         dy_norm_old = dy_norm
 
-        not_converged = \
-            jax.lax.cond(pred,
-                         not_converged, converged_fun,
-                         not_converged, not_converged_fun)
         return [k + 1, not_converged, dy_norm_old, d, y_a]
 
+
     k, not_converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
                                                                while_body,
                                                                while_state)
@@ -297,7 +299,7 @@ def not_converged_fun(not_converged):
     return y_tilde
 
 
-def _select_initial_step(state, fun, t0, y0, f0, h0):
+def _select_initial_step(atol, rtol, fun, t0, y0, f0, h0):
     """
     Select a good initial step by stepping forward one step of forward euler, and
     comparing the predicted state against that using the provided function.
@@ -310,7 +312,7 @@ def _select_initial_step(state, fun, t0, y0, f0, h0):
     .. [1] E. Hairer, S. P. Norsett G. Wanner, "Solving Ordinary Differential
                Equations I: Nonstiff Problems", Sec. II.4.
     """
-    scale = state['atol'] + jnp.abs(y0) * state['rtol']
+    scale = atol + jnp.abs(y0) * rtol
     y1 = y0 + h0 * f0
     f1 = fun(y1, t0 + h0)
     d2 = jnp.sqrt(jnp.mean(((f1 - f0) / scale)**2))
@@ -319,37 +321,37 @@ def _select_initial_step(state, fun, t0, y0, f0, h0):
     return jnp.min((100 * h0, h1))
 
 
-def _predict(state):
+def _predict(state, D):
     """
     predict forward to new step (eq 2 in [1])
     """
-    n = len(state['y'])
-    order = state['order']
+    n = len(state.y0)
+    order = state.order
     orders = jnp.repeat(jnp.arange(MAX_ORDER + 1).reshape(-1, 1), n, axis=1)
-    subD = jnp.where(orders <= order, state['D'], 0)
-    state['y0'] = jnp.sum(subD, axis=0)
-    state['scale_y0'] = state['atol'] + state['rtol'] * jnp.abs(state['y0'])
-    return state
+    subD = jnp.where(orders <= order, D, 0)
+    y0 = jnp.sum(subD, axis=0)
+    scale_y0 = state.atol + state.rtol * jnp.abs(state.y0)
+    return y0, scale_y0
 
 
-def _update_psi(state):
+def _update_psi(state, D):
     """
     update psi term as defined in second equation on page 9 of [1]
     """
-    order = state['order']
-    n = len(state['y'])
+    order = state.order
+    n = len(state.y0)
     orders = jnp.arange(MAX_ORDER + 1)
-    subGamma = jnp.where(orders > 0, jnp.where(orders <= order, state['gamma'], 0), 0)
+    subGamma = jnp.where(orders > 0, jnp.where(orders <= order, state.gamma, 0), 0)
     orders = jnp.repeat(orders.reshape(-1, 1), n, axis=1)
-    subD = jnp.where(orders > 0, jnp.where(orders <= order, state['D'], 0), 0)
-    state['psi'] = jnp.dot(
+    subD = jnp.where(orders > 0, jnp.where(orders <= order, D, 0), 0)
+    psi = jnp.dot(
         subD.T,
         subGamma
-    ) * state['alpha'][order]
-    return state
+    ) * state.alpha[order]
+    return psi
 
 
-def _update_difference_for_next_step(state, d, only_update_D=False):
+def _update_difference_for_next_step(state, d):
     """
     update of difference equations can be done efficiently
     by reusing d and D.
@@ -362,8 +364,8 @@ def _update_difference_for_next_step(state, d, only_update_D=False):
 
     Combining these gives the following algorithm
     """
-    order = state['order']
-    D = state['D']
+    order = state.order
+    D = state.D
     D = jax.ops.index_update(D, jax.ops.index[order + 2],
                              d - D[order + 1])
     D = jax.ops.index_update(D, jax.ops.index[order + 1],
@@ -384,22 +386,7 @@ def while_body(while_state):
 
     i, D = jax.lax.while_loop(while_cond, while_body, while_state)
 
-    state['D'] = D
-
-    def update_psi_and_predict(state):
-        # update psi (D has changed)
-        state = _update_psi(state)
-
-        # update y0 (D has changed)
-        state = _predict(state)
-
-        return state
-
-    state = jax.lax.cond(only_update_D == False,  # noqa: E712
-                         state, update_psi_and_predict,
-                         state, lambda x: x)
-
-    return state
+    return D
 
 
 def _update_step_size(state, factor):
@@ -411,81 +398,79 @@ def _update_step_size(state, factor):
     - lu factorisation of (M - c * J) used in newton iteration (same equation)
     - psi term
     """
-    order = state['order']
-    h = state['h']
+    order = state.order
+    h = state.h
 
     h *= factor
-    state['n_equal_steps'] = 0
-    c = h * state['alpha'][order]
+    n_equal_steps = 0
+    c = h * state.alpha[order]
 
     # redo lu (c has changed)
-    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * state['J'])
-    state['n_lu_decompositions'] += 1
-
-    state['h'] = h
-    state['c'] = c
+    LU = jax.scipy.linalg.lu_factor(state.M - c * state.J)
+    n_lu_decompositions = state.n_lu_decompositions + 1
 
     # update D using equations in section 3.2 of [1]
-    RU = _compute_R(order, factor).dot(state['U'])
+    RU = _compute_R(order, factor).dot(state.U)
     I = jnp.arange(0, MAX_ORDER + 1).reshape(-1, 1)
     J = jnp.arange(0, MAX_ORDER + 1)
 
     # only update order+1, order+1 entries of D
     RU = jnp.where(jnp.logical_and(I <= order, J <= order),
                    RU, jnp.identity(MAX_ORDER + 1))
-    D = state['D']
+    D = state.D
     D = jnp.dot(RU.T, D)
     # D = jax.ops.index_update(D, jax.ops.index[:order + 1],
     #                         jnp.dot(RU.T, D[:order + 1]))
-    state['D'] = D
 
     # update psi (D has changed)
-    state = _update_psi(state)
+    psi = _update_psi(state, D)
 
     # update y0 (D has changed)
-    state = _predict(state)
-
-    return state
+    y0, scale_y0 = _predict(state, D)
 
+    return state._replace(n_equal_steps=n_equal_steps, LU=LU,
+                          n_lu_decompositions=n_lu_decompositions, h=h, c=c,
+                          D=D, psi=psi, y0=y0, scale_y0=scale_y0)
 
 def _update_jacobian(state, jac):
     """
     we update the jacobian using J(t_{n+1}, y^0_{n+1})
     following the scipy bdf implementation rather than J(t_n, y_n) as per [1]
     """
-    J = jac(state['y0'], state['t'] + state['h'])
-    state['n_jacobian_evals'] += 1
-    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - state['c'] * J)
-    state['n_lu_decompositions'] += 1
-    state['J'] = J
-    return state
+    J = jac(state.y0, state.t + state.h)
+    n_jacobian_evals = state.n_jacobian_evals + 1
+    LU = jax.scipy.linalg.lu_factor(state.M - state.c * J)
+    n_lu_decompositions = state.n_lu_decompositions + 1
+    return state._replace(J=J, n_jacobian_evals=n_jacobian_evals, LU=LU,
+                          n_lu_decompositions=n_lu_decompositions)
 
 
 def _newton_iteration(state, fun):
-    tol = state['newton_tol']
-    c = state['c']
-    psi = state['psi']
-    y0 = state['y0']
-    LU = state['LU']
-    M = state['M']
-    scale_y0 = state['scale_y0']
-    t = state['t'] + state['h']
+    tol = state.newton_tol
+    c = state.c
+    psi = state.psi
+    y0 = state.y0
+    LU = state.LU
+    M = state.M
+    scale_y0 = state.scale_y0
+    t = state.t + state.h
     d = jnp.zeros(y0.shape, dtype=y0.dtype)
     y = jnp.array(y0, copy=True)
+    n_function_evals = state.n_function_evals
 
     not_converged = True
     dy_norm_old = -1.0
     k = 0
-    while_state = [k, not_converged, dy_norm_old, d, y, state]
+    while_state = [k, not_converged, dy_norm_old, d, y, n_function_evals]
 
     def while_cond(while_state):
         k, not_converged, _, _, _, _ = while_state
         return not_converged * (k < NEWTON_MAXITER)
 
     def while_body(while_state):
-        k, not_converged, dy_norm_old, d, y, state = while_state
+        k, not_converged, dy_norm_old, d, y, n_function_evals = while_state
         f_eval = fun(y, t)
-        state['n_function_evals'] += 1
+        n_function_evals += 1
         b = c * f_eval - M @ (psi + d)
         dy = jax.scipy.linalg.lu_solve(LU, b)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
@@ -496,7 +481,7 @@ def while_body(while_state):
         pred = rate >= 1
         pred += rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol
         pred *= dy_norm_old >= 0
-        k = jax.lax.cond(pred, k, lambda k: NEWTON_MAXITER, k, lambda k: k)
+        k += pred * (NEWTON_MAXITER - k)
 
         d += dy
         y = y0 + d
@@ -508,197 +493,146 @@ def while_body(while_state):
 
         dy_norm_old = dy_norm
 
-        return [k + 1, not_converged, dy_norm_old, d, y, state]
+        return [k + 1, not_converged, dy_norm_old, d, y, n_function_evals]
 
-    k, not_converged, dy_norm_old, d, y, state = jax.lax.while_loop(while_cond,
+    k, not_converged, dy_norm_old, d, y, n_function_evals = jax.lax.while_loop(while_cond,
                                                                     while_body,
                                                                     while_state)
-    return not_converged, k, y, d, state
+    return not_converged, k, y, d, state._replace(n_function_evals=n_function_evals)
+
+def rms_norm(arg):
+    return jnp.sqrt(jnp.mean(arg**2))
+
+
+def _prepare_next_step(state, d):
+    D = _update_difference_for_next_step(state, d)
+    psi = _update_psi(state, D)
+    y0, scale_y0 = _predict(state, D)
+    return state._replace(D=D,psi=psi,y0=y0,scale_y0=scale_y0)
+
+
+def _prepare_next_step_order_change(state, d, y, n_iter):
+    order = state.order
+
+    D = _update_difference_for_next_step(state, d)
+
+    # Note: we are recalculating these from the while loop above, could re-use?
+    scale_y = state.atol + state.rtol * jnp.abs(y)
+    error = state.error_const[order] * d
+    error_norm = rms_norm(error / scale_y)
+    safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
+                                               + n_iter)
+
+    # similar to the optimal step size factor we calculated above for the current
+    # order k, we need to calculate the optimal step size factors for orders
+    # k-1 and k+1. To do this, we note that the error = C_k * D^{k+1} y_n
+    error_m_norm = jnp.where(
+        order > 1,
+        rms_norm(state.error_const[order - 1] * D[order] / scale_y),
+        jnp.inf
+    )
+    error_p_norm = jnp.where(
+        order < MAX_ORDER,
+        rms_norm(state.error_const[order + 1] * D[order + 2] / scale_y),
+        jnp.inf
+    )
+
+    error_norms = jnp.array([error_m_norm, error_norm, error_p_norm])
+    factors = error_norms ** (-1 / (jnp.arange(3) + order))
+
+    # now we have the three factors for orders k-1, k and k+1, pick the maximum in
+    # order to maximise the resultant step size
+    max_index = jnp.argmax(factors)
+    order = order + max_index - 1
+
+    factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
+
+    new_state = _update_step_size(state._replace(D=D, order=order), factor)
+    return new_state
 
 
 def _bdf_step(state, fun, jac):
     # we will try and use the old jacobian unless convergence of newton iteration
     # fails
-    not_updated_jacobian = True
+    updated_jacobian = False
     # initialise step size and try to make the step,
     # iterate, reducing step size until error is in bounds
     step_accepted = False
-    y = jnp.empty_like(state['y'])
-    d = jnp.empty_like(state['y'])
+    y = jnp.empty_like(state.y0)
+    d = jnp.empty_like(state.y0)
     n_iter = -1
 
     # loop until step is accepted
-    while_state = [state, step_accepted, not_updated_jacobian, y, d, n_iter]
+    while_state = [state, step_accepted, updated_jacobian, y, d, n_iter]
 
     def while_cond(while_state):
         _, step_accepted, _, _, _, _ = while_state
         return step_accepted == False  # noqa: E712
 
     def while_body(while_state):
-        state, step_accepted, not_updated_jacobian, y, d, n_iter = while_state
+        state, step_accepted, updated_jacobian, y, d, n_iter = while_state
 
         # solve BDF equation using y0 as starting point
         not_converged, n_iter, y, d, state = _newton_iteration(state, fun)
 
-        # if not converged update the jacobian for J(t_n,y_n) and try again
-        pred = not_converged * not_updated_jacobian
-        if_state = [state, not_updated_jacobian, step_accepted]
-
-        def need_to_update_jacobian(if_state):
-            # newton iteration did not converge, update the jacobian and try again
-            state, not_updated_jacobian, step_accepted = if_state
-            state = _update_jacobian(state, jac)
-            not_updated_jacobian = False
-            return [state, not_updated_jacobian, step_accepted]
-
-        def dont_need_to_update_jacobian(if_state):
-            state, not_updated_jacobian, step_accepted = if_state
-
-            if_state2 = [state, step_accepted]
-
-            def need_to_update_step_size(if_state2):
-                # newton iteration did not converge, but jacobian has already been
-                # evaluated so reduce step size by 0.3 (as per [1]) and try again
-                state, step_accepted = if_state2
-                state = _update_step_size(state, 0.3)
-                return [state, step_accepted]
-
-            def converged(if_state2):
-                state, step_accepted = if_state2
-                # yay, converged, now check error is within bounds
-                scale_y = state['atol'] + state['rtol'] * jnp.abs(y)
-
-                # combine eq 3, 4 and 6 from [1] to obtain error
-                # Note that error = C_k * h^{k+1} y^{k+1}
-                # and d = D^{k+1} y_{n+1} \approx h^{k+1} y^{k+1}
-                error = state['error_const'][state['order']] * d
-                error_norm = jnp.sqrt(jnp.mean((error / scale_y)**2))
-
-                # calculate safety outside if since we will reuse later
-                safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
-                                                           + n_iter)
-
-                if_state3 = [state, step_accepted]
-
-                def error_too_large(if_state3):
-                    # error too large, reduce step size and try again
-                    state, step_accepted = if_state3
-                    state['n_error_test_failures'] += 1
-                    # calculate optimal step size factor as per eq 2.46 of [2]
-                    factor = jnp.max((MIN_FACTOR,
-                                      safety *
-                                      error_norm ** (-1 / (state['order'] + 1))))
-                    state = _update_step_size(state, factor)
-                    return [state, step_accepted]
-
-                def accept_step(if_state3):
-                    # if we get here we can accept the step
-                    state, step_accepted = if_state3
-                    step_accepted = True
-                    return [state, step_accepted]
-
-                state, step_accepted = \
-                    jax.lax.cond(error_norm > 1,
-                                 if_state3, error_too_large,
-                                 if_state3, accept_step)
-
-                return [state, step_accepted]
-
-            state, step_accepted = jax.lax.cond(not_converged,
-                                                if_state2, need_to_update_step_size,
-                                                if_state2, converged)
-
-            return [state, not_updated_jacobian, step_accepted]
-
-        state, not_updated_jacobian, step_accepted = \
-            jax.lax.cond(pred,
-                         if_state, need_to_update_jacobian,
-                         if_state, dont_need_to_update_jacobian)
-        return [state, step_accepted, not_updated_jacobian, y, d, n_iter]
-
-    state, step_accepted, not_updated_jacobian, y, d, n_iter = \
-        jax.lax.while_loop(while_cond, while_body, while_state)
-
-    # take the accepted step
-    state['y'] = y
-    state['t'] += state['h']
-
-    # a change in order is only done after running at order k for k + 1 steps
-    # (see page 83 of [2])
-    state['n_equal_steps'] += 1
-
-    if_state = [state, d, y, n_iter]
-
-    def no_change_in_order(if_state):
-        # no change in order this step, update differences D and exit
-        state, d, _, _ = if_state
-        state = _update_difference_for_next_step(state, d, False)
-        return state
-
-    def order_change(if_state):
-        state, d, y, n_iter = if_state
-        order = state['order']
+        # newton iteration did not converge, but jacobian has already been
+        # evaluated so reduce step size by 0.3 (as per [1]) and try again
+        state = tree_multimap(
+                    partial(jnp.where, not_converged * updated_jacobian),
+                    _update_step_size(state, 0.3),
+                    state
+        )
 
-        # Note: we are recalculating these from the while loop above, could re-use?
-        scale_y = state['atol'] + state['rtol'] * jnp.abs(y)
-        error = state['error_const'][order] * d
-        error_norm = jnp.sqrt(jnp.mean((error / scale_y)**2))
-        safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
-                                                   + n_iter)
+        # if not converged and jacobian not updated, then update the jacobian and try again
+        (state, updated_jacobian) = tree_multimap(
+                      partial(jnp.where, not_converged * (updated_jacobian == False)),
+                      (_update_jacobian(state, jac), True),
+                      (state, False)
+        )
 
-        # don't need to update psi and y0 yet as we will be changing D again soon
-        state = _update_difference_for_next_step(state, d, True)
+        safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER + n_iter)
+        scale_y = state.atol + state.rtol * jnp.abs(y)
 
-        if_state2 = [state, scale_y, order]
-        # similar to the optimal step size factor we calculated above for the current
-        # order k, we need to calculate the optimal step size factors for orders
-        # k-1 and k+1. To do this, we note that the error = C_k * D^{k+1} y_n
+        # combine eq 3, 4 and 6 from [1] to obtain error
+        # Note that error = C_k * h^{k+1} y^{k+1}
+        # and d = D^{k+1} y_{n+1} \approx h^{k+1} y^{k+1}
+        error = state.error_const[state.order] * d
 
-        def order_greater_one(if_state2):
-            state, scale_y, order = if_state2
-            error_m = state['error_const'][order - 1] * state['D'][order]
-            error_m_norm = jnp.sqrt(jnp.mean((error_m / scale_y)**2))
-            return error_m_norm
+        error_norm = rms_norm(error / scale_y)
 
-        def order_equal_one(if_state2):
-            error_m_norm = jnp.inf
-            return error_m_norm
+        # calculate optimal step size factor as per eq 2.46 of [2]
+        factor = jnp.max((MIN_FACTOR,
+                          safety *
+                          error_norm ** (-1 / (state.order + 1))))
 
-        error_m_norm = jax.lax.cond(order > 1,
-                                    if_state2, order_greater_one,
-                                    if_state2, order_equal_one)
+        (state, step_accepted) = tree_multimap(
+            partial(jnp.where, (not_converged == False) * (error_norm > 1)),
+            (_update_step_size(state, factor), False),
+            (state, True)
+        )
 
-        def order_less_max(if_state2):
-            state, scale_y, order = if_state2
-            error_p = state['error_const'][order + 1] * state['D'][order + 2]
-            error_p_norm = jnp.sqrt(jnp.mean((error_p / scale_y)**2))
-            return error_p_norm
+        return [state, step_accepted, updated_jacobian, y, d, n_iter]
 
-        def order_max(if_state2):
-            error_p_norm = jnp.inf
-            return error_p_norm
+    state, step_accepted, updated_jacobian, y, d, n_iter = \
+        jax.lax.while_loop(while_cond, while_body, while_state)
 
-        error_p_norm = jax.lax.cond(order < MAX_ORDER,
-                                    if_state2, order_less_max,
-                                    if_state2, order_max)
 
-        error_norms = jnp.array([error_m_norm, error_norm, error_p_norm])
-        factors = error_norms ** (-1 / (jnp.arange(3) + order))
+    # take the accepted step
+    n_steps = state.n_steps + 1
+    t = state.t + state.h
 
-        # now we have the three factors for orders k-1, k and k+1, pick the maximum in
-        # order to maximise the resultant step size
-        max_index = jnp.argmax(factors)
-        order += max_index - 1
-        state['order'] = order
+    # a change in order is only done after running at order k for k + 1 steps
+    # (see page 83 of [2])
+    n_equal_steps = state.n_equal_steps + 1
 
-        factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
-        state = _update_step_size(state, factor)
 
-        return state
+    state = state._replace(n_equal_steps=n_equal_steps, t=t, n_steps=n_steps)
 
-    state = jax.lax.cond(state['n_equal_steps'] < state['order'] + 1,
-                         if_state, no_change_in_order,
-                         if_state, order_change)
+    state = tree_multimap(
+        partial(jnp.where, n_equal_steps < state.order + 1),
+        _prepare_next_step(state, d),
+        _prepare_next_step_order_change(state, d, y, n_iter)
+    )
 
     return state
 
@@ -709,10 +643,10 @@ def _bdf_interpolate(state, t_eval):
 
     definition of the interpolating polynomial can be found on page 7 of [1]
     """
-    order = state['order']
-    t = state['t']
-    h = state['h']
-    D = state['D']
+    order = state.order
+    t = state.t
+    h = state.h
+    D = state.D
     j = 0
     time_factor = 1.0
     order_summation = D[0]
@@ -834,17 +768,6 @@ def _check_arg(arg):
     return _bdf_odeint_wrapper(converted, mass, rtol, atol, y0, t_eval, *consts, *args)
 
 
-def flax_cond(pred, true_operand, true_fun,
-              false_operand, false_fun):  # pragma: no cover
-    """
-    for debugging purposes, use this instead of jax.lax.cond
-    """
-    if pred:
-        return true_fun(true_operand)
-    else:
-        return false_fun(false_operand)
-
-
 def flax_while_loop(cond_fun, body_fun, init_val):  # pragma: no cover
     """
     for debugging purposes, use this instead of jax.lax.while_loop
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index ff89ac1e3f..9674145af2 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -41,7 +41,7 @@ def fun(y, t):
 
         # test accuracy
         np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
-                                   rtol=1e-7, atol=1e-7)
+                                   rtol=1e-6, atol=1e-6)
 
         t0 = time.perf_counter()
         y = pybamm.jax_bdf_integrate(fun, y0, t_eval, rtol=1e-8, atol=1e-8)
@@ -52,7 +52,7 @@ def fun(y, t):
 
         # test second run is accurate
         np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
-                                   rtol=1e-7, atol=1e-7)
+                                   rtol=1e-6, atol=1e-6)
 
     def test_mass_matrix(self):
         # Solve
@@ -140,6 +140,7 @@ def solve_bdf(rate):
 
         self.assertAlmostEqual(grad_bdf, grad_num, places=3)
 
+    @unittest.skip("sensitivities not yet supported on for dae models")
     def test_mass_matrix_with_sensitivities(self):
         # Solve
         t_eval = np.linspace(0.0, 1.0, 80)

From 3e31c5f0b6715c765fcec52256492d32052abb4b Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Tue, 14 Jul 2020 19:01:14 +0100
Subject: [PATCH 26/39] #1104 fix flake8 and some minor bugs

---
 pybamm/solvers/jax_bdf_solver.py           | 85 ++++++++++++----------
 tests/unit/test_solvers/test_jax_solver.py |  2 +-
 2 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 3fdefb50f7..f8263b0c54 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -1,4 +1,3 @@
-from functools import partial
 import operator as op
 import numpy as onp
 import collections
@@ -83,8 +82,10 @@ def fun_bind_inputs(y, t):
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
 
-    stepper = _bdf_init(fun_bind_inputs, jac_bind_inputs, mass, t0, y0, h0, rtol, atol)
-    i = 0
+    stepper, failed = _bdf_init(
+        fun_bind_inputs, jac_bind_inputs, mass, t0, y0, h0, rtol, atol
+    )
+    i = failed * len(t_eval)
     y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
     init_state = [stepper, t_eval, i, y_out]
@@ -108,21 +109,24 @@ def for_body(j, y_out):
         return [stepper, t_eval, index, y_out]
 
     stepper, t_eval, i, y_out = jax.lax.while_loop(cond_fun, body_fun,
-                                                            init_state)
+                                                   init_state)
 
     return y_out
 
 
 BDFInternalStates = [
-        't', 'atol', 'rtol', 'M', 'newton_tol', 'order', 'h', 'n_equal_steps', 'D',
-        'y0', 'scale_y0', 'kappa', 'gamma', 'alpha', 'c', 'error_const', 'J', 'LU', 'U',
-        'psi', 'n_function_evals', 'n_jacobian_evals', 'n_lu_decompositions', 'n_steps']
+    't', 'atol', 'rtol', 'M', 'newton_tol', 'order', 'h', 'n_equal_steps', 'D',
+    'y0', 'scale_y0', 'kappa', 'gamma', 'alpha', 'c', 'error_const', 'J', 'LU', 'U',
+    'psi', 'n_function_evals', 'n_jacobian_evals', 'n_lu_decompositions', 'n_steps'
+]
 BDFState = collections.namedtuple('BDFState', BDFInternalStates)
 
 jax.tree_util.register_pytree_node(
-                      BDFState,
-                      lambda xs: (tuple(xs), None),
-                      lambda _, xs: BDFState(*xs))
+    BDFState,
+    lambda xs: (tuple(xs), None),
+    lambda _, xs: BDFState(*xs)
+)
+
 
 def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     """
@@ -165,7 +169,9 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     state['newton_tol'] = jnp.max((10 * EPS / rtol, jnp.min((0.03, rtol ** 0.5))))
 
     scale_y0 = atol + rtol * jnp.abs(y0)
-    y0 = _select_initial_conditions(fun, mass, t0, y0, state['newton_tol'], scale_y0)
+    y0, not_converged = _select_initial_conditions(
+        fun, mass, t0, y0, state['newton_tol'], scale_y0
+    )
 
     f0 = fun(y0, t0)
     order = 1
@@ -207,7 +213,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     tuple_state = BDFState(*[state[k] for k in BDFInternalStates])
     y0, scale_y0 = _predict(tuple_state, D)
     psi = _update_psi(tuple_state, D)
-    return tuple_state._replace(y0=y0, scale_y0=scale_y0, psi=psi)
+    return tuple_state._replace(y0=y0, scale_y0=scale_y0, psi=psi), not_converged
 
 
 def _compute_R(order, factor):
@@ -239,7 +245,7 @@ def _select_initial_conditions(fun, M, t0, y0, tol, scale_y0):
     # if all differentiable variables then return y0 (can use normal python if since M
     # is static)
     if not jnp.any(algebraic_variables):
-        return y0
+        return y0, False
 
     # calculate consistent initial conditions via a newton on -J_a @ delta = f_a This
     # follows this reference:
@@ -256,7 +262,7 @@ def fun_a(y_a):
     scale_y0_a = scale_y0[algebraic_variables]
 
     d = jnp.zeros(y0_a.shape[0], dtype=y0.dtype)
-    y_a = jnp.array(y0_a)
+    y_a = jnp.array(y0_a, copy=True)
 
     # calculate neg jacobian of fun_a
     J_a = jax.jacfwd(fun_a)(y_a)
@@ -290,13 +296,12 @@ def while_body(while_state):
 
         return [k + 1, not_converged, dy_norm_old, d, y_a]
 
-
     k, not_converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
                                                                while_body,
                                                                while_state)
     y_tilde = jax.ops.index_update(y0, algebraic_variables, y_a)
 
-    return y_tilde
+    return y_tilde, not_converged
 
 
 def _select_initial_step(atol, rtol, fun, t0, y0, f0, h0):
@@ -399,9 +404,7 @@ def _update_step_size(state, factor):
     - psi term
     """
     order = state.order
-    h = state.h
-
-    h *= factor
+    h = state.h * factor
     n_equal_steps = 0
     c = h * state.alpha[order]
 
@@ -432,6 +435,7 @@ def _update_step_size(state, factor):
                           n_lu_decompositions=n_lu_decompositions, h=h, c=c,
                           D=D, psi=psi, y0=y0, scale_y0=scale_y0)
 
+
 def _update_jacobian(state, jac):
     """
     we update the jacobian using J(t_{n+1}, y^0_{n+1})
@@ -481,7 +485,7 @@ def while_body(while_state):
         pred = rate >= 1
         pred += rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol
         pred *= dy_norm_old >= 0
-        k += pred * (NEWTON_MAXITER - k)
+        k += pred * (NEWTON_MAXITER - k - 1)
 
         d += dy
         y = y0 + d
@@ -495,11 +499,13 @@ def while_body(while_state):
 
         return [k + 1, not_converged, dy_norm_old, d, y, n_function_evals]
 
-    k, not_converged, dy_norm_old, d, y, n_function_evals = jax.lax.while_loop(while_cond,
-                                                                    while_body,
-                                                                    while_state)
+    k, not_converged, dy_norm_old, d, y, n_function_evals = \
+        jax.lax.while_loop(while_cond,
+                           while_body,
+                           while_state)
     return not_converged, k, y, d, state._replace(n_function_evals=n_function_evals)
 
+
 def rms_norm(arg):
     return jnp.sqrt(jnp.mean(arg**2))
 
@@ -508,7 +514,7 @@ def _prepare_next_step(state, d):
     D = _update_difference_for_next_step(state, d)
     psi = _update_psi(state, D)
     y0, scale_y0 = _predict(state, D)
-    return state._replace(D=D,psi=psi,y0=y0,scale_y0=scale_y0)
+    return state._replace(D=D, psi=psi, y0=y0, scale_y0=scale_y0)
 
 
 def _prepare_next_step_order_change(state, d, y, n_iter):
@@ -543,7 +549,7 @@ def _prepare_next_step_order_change(state, d, y, n_iter):
     # now we have the three factors for orders k-1, k and k+1, pick the maximum in
     # order to maximise the resultant step size
     max_index = jnp.argmax(factors)
-    order = order + max_index - 1
+    order += max_index - 1
 
     factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
 
@@ -578,16 +584,20 @@ def while_body(while_state):
         # newton iteration did not converge, but jacobian has already been
         # evaluated so reduce step size by 0.3 (as per [1]) and try again
         state = tree_multimap(
-                    partial(jnp.where, not_converged * updated_jacobian),
-                    _update_step_size(state, 0.3),
-                    state
+            partial(jnp.where, not_converged * updated_jacobian),
+            _update_step_size(state, 0.3),
+            state
         )
 
-        # if not converged and jacobian not updated, then update the jacobian and try again
+        # if not converged and jacobian not updated, then update the jacobian and try
+        # again
         (state, updated_jacobian) = tree_multimap(
-                      partial(jnp.where, not_converged * (updated_jacobian == False)),
-                      (_update_jacobian(state, jac), True),
-                      (state, False)
+            partial(
+                jnp.where,
+                not_converged * (updated_jacobian == False)  # noqa: E712
+            ),
+            (_update_jacobian(state, jac), True),
+            (state, False + updated_jacobian)
         )
 
         safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER + n_iter)
@@ -606,9 +616,12 @@ def while_body(while_state):
                           error_norm ** (-1 / (state.order + 1))))
 
         (state, step_accepted) = tree_multimap(
-            partial(jnp.where, (not_converged == False) * (error_norm > 1)),
+            partial(
+                jnp.where,
+                (not_converged == False) * (error_norm > 1)  # noqa: E712
+            ),
             (_update_step_size(state, factor), False),
-            (state, True)
+            (state, not_converged == False)
         )
 
         return [state, step_accepted, updated_jacobian, y, d, n_iter]
@@ -616,7 +629,6 @@ def while_body(while_state):
     state, step_accepted, updated_jacobian, y, d, n_iter = \
         jax.lax.while_loop(while_cond, while_body, while_state)
 
-
     # take the accepted step
     n_steps = state.n_steps + 1
     t = state.t + state.h
@@ -625,7 +637,6 @@ def while_body(while_state):
     # (see page 83 of [2])
     n_equal_steps = state.n_equal_steps + 1
 
-
     state = state._replace(n_equal_steps=n_equal_steps, t=t, n_steps=n_steps)
 
     state = tree_multimap(
@@ -802,7 +813,7 @@ def flax_scan(f, init, xs, length=None):  # pragma: no cover
     return carry, onp.stack(ys)
 
 
-@partial(jax.jit, static_argnums=(0, 1, 2, 3))
+@jax.partial(jax.jit, static_argnums=(0, 1, 2, 3))
 def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
     if mass is None:
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 96886697f4..2b37305c72 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -38,7 +38,7 @@ def test_model_solver(self):
             t_first_solve = time.perf_counter() - t0
             np.testing.assert_array_equal(solution.t, t_eval)
             np.testing.assert_allclose(solution.y[0], np.exp(0.1 * solution.t),
-                                       rtol=1e-7, atol=1e-7)
+                                       rtol=1e-6, atol=1e-6)
 
             # Test time
             self.assertEqual(

From 4e3a78be84f057fa6d709637a80f7ba383dfdcf6 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Tue, 14 Jul 2020 21:10:58 +0100
Subject: [PATCH 27/39] #1104 improve coverage

---
 pybamm/solvers/jax_bdf_solver.py           | 13 -------------
 tests/unit/test_solvers/test_jax_solver.py |  5 +++++
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index f8263b0c54..67a8a92a35 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -919,19 +919,6 @@ def abstractify(x):
     return core.raise_to_shaped(core.get_aval(x))
 
 
-def ravel_2d_pytree(pytree):
-    leaves, treedef = tree_flatten(pytree)
-    flat, unravel_list = jax.api.vjp(ravel_2d_list, *leaves)
-
-    def unravel_pytree(flat):
-        return tree_unflatten(treedef, unravel_list(flat))
-    return flat, unravel_pytree
-
-
-def ravel_2d_list(*lst):
-    return jnp.concatenate([jnp.ravel(elt) for elt in lst]) if lst else jnp.array([])
-
-
 def ravel_first_arg(f, unravel):
     return ravel_first_arg_(lu.wrap_init(f), unravel).call_wrapped
 
diff --git a/tests/unit/test_solvers/test_jax_solver.py b/tests/unit/test_solvers/test_jax_solver.py
index 2b37305c72..d30f3f76b6 100644
--- a/tests/unit/test_solvers/test_jax_solver.py
+++ b/tests/unit/test_solvers/test_jax_solver.py
@@ -244,6 +244,11 @@ def test_get_solve(self):
         spatial_methods = {"macroscale": pybamm.FiniteVolume()}
         disc = pybamm.Discretisation(mesh, spatial_methods)
         disc.process_model(model)
+
+        # test that another method string gives error
+        with self.assertRaises(ValueError):
+            solver = pybamm.JaxSolver(method='not_real')
+
         # Solve
         solver = pybamm.JaxSolver(rtol=1e-8, atol=1e-8)
         t_eval = np.linspace(0, 5, 80)

From f08dc5726e1b6c768dcc241a8a9f6cfdcb9a2909 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 08:06:50 +0100
Subject: [PATCH 28/39] #1104 put in proper adjoint equations for semi-explicit
 dae index 1

---
 pybamm/solvers/jax_bdf_solver.py              | 27 +++++++++++++++----
 .../unit/test_solvers/test_jax_bdf_solver.py  |  1 -
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 67a8a92a35..e356c04f7c 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -239,7 +239,7 @@ def _compute_R(order, factor):
 
 
 def _select_initial_conditions(fun, M, t0, y0, tol, scale_y0):
-    # identify differentiable variables as zeros on diagonal
+    # identify algebraic variables as zeros on diagonal
     algebraic_variables = jnp.diag(M) == 0.
 
     # if all differentiable variables then return y0 (can use normal python if since M
@@ -700,9 +700,10 @@ def block_fun(i, j, Ai, Aj):
 
     return jnp.block(blocks)
 
-# NOTE: all code below (except the docstring on jax_bdf_integrate and other minor
-# edits), to define the API of the jax solver and the ability to solve the adjoint
-# sensitivities, has been copied from the JAX library at https://github.com/google/jax.
+# NOTE: the code below (except the docstring on jax_bdf_integrate and other minor
+# edits), has been modified from the JAX library at https://github.com/google/jax.
+# The main difference is the addition of support for semi-explicit dae index 1 problems
+# via the addition of a mass matrix.
 # This is under an Apache license, a short form of which is given here:
 #
 # Copyright 2018 Google LLC
@@ -833,13 +834,29 @@ def _bdf_odeint_fwd(func, mass, rtol, atol, y0, ts, *args):
 def _bdf_odeint_rev(func, mass, rtol, atol, res, g):
     ys, ts, args = res
 
+    diag_mass = jnp.diag(mass)
+
     def aug_dynamics(augmented_state, t, *args):
         """Original system augmented with vjp_y, vjp_t and vjp_args."""
         y, y_bar, *_ = augmented_state
         # `t` here is negative time, so we need to negate again to get back to
         # normal time. See the `odeint` invocation in `scan_fun` below.
         y_dot, vjpfun = jax.vjp(func, y, -t, *args)
-        return (-y_dot, *vjpfun(y_bar))
+
+        # Adjoint equations for semi-explicit dae index 1 system from
+        #
+        # Cao, Y., Li, S., Petzold, L., & Serban, R. (2003). Adjoint sensitivity
+        # analysis for differential-algebraic equations: The adjoint DAE system and its
+        # numerical solution. SIAM journal on scientific computing, 24(3), 1076-1089.
+        #
+        # y_bar_dot_d = -J_dd^T y_bar_d - J_ad^T y_bar_a
+        #           0 =  J_da^T y_bar_d + J_aa^T y_bar_d
+
+        y_bar_dot, t_bar, args_bar = vjpfun(y_bar)
+        # identify algebraic variables as zeros on diagonal
+        y_bar_dot = jnp.where(diag_mass == 0., -y_bar_dot, y_bar_dot)
+
+        return (-y_dot, y_bar_dot, t_bar, args_bar)
 
     y_bar = g[-1]
     ts_bar = []
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index 9674145af2..f76871387e 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -140,7 +140,6 @@ def solve_bdf(rate):
 
         self.assertAlmostEqual(grad_bdf, grad_num, places=3)
 
-    @unittest.skip("sensitivities not yet supported on for dae models")
     def test_mass_matrix_with_sensitivities(self):
         # Solve
         t_eval = np.linspace(0.0, 1.0, 80)

From 016e12d2807f922a7d1afb8243efb477856a16f5 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 08:11:57 +0100
Subject: [PATCH 29/39] #1104 allow users to choose alternate root method to
 set initial conditions

---
 pybamm/solvers/jax_solver.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index f7f50e0c3a..db912d0da1 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -30,6 +30,10 @@ class JaxSolver(pybamm.BaseSolver):
     method: str
         'RK45' (default) uses jax.experimental.odeint
         'BDF' uses custom jax_bdf_integrate (see jax_bdf_integrate.py for details)
+    root_method: str, optional
+        Method to use to calculate consistent initial conditions. By default this uses
+        the newton chord method internal to the jax bdf solver, otherwise choose from
+        the set of default options defined in docs for pybamm.BaseSolver
     rtol : float, optional
         The relative tolerance for the solver (default is 1e-6).
     atol : float, optional
@@ -41,10 +45,11 @@ class JaxSolver(pybamm.BaseSolver):
         for details.
     """
 
-    def __init__(self, method='RK45', rtol=1e-6, atol=1e-6, extra_options=None):
+    def __init__(self, method='RK45', root_method=None,
+                 rtol=1e-6, atol=1e-6, extra_options=None):
         # note: bdf solver itself calculates consistent initial conditions so can set
-        # root_method to none
-        super().__init__(method, rtol, atol, root_method=None)
+        # root_method to none, allow user to override this behavior
+        super().__init__(method, rtol, atol, root_method=root_method)
         method_options = ['RK45', 'BDF']
         if method not in method_options:
             raise ValueError('method must be one of {}'.format(method_options))

From 71cfee64a592a1f4859f844c3e093ccf2a0dd662 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 08:15:23 +0100
Subject: [PATCH 30/39] #1104 add changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7334d46015..eac5700e56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Features
 
+-   Added support for index 1 dae equations and sensitivity calculations to JAX BDF solver ([#1107](https://github.com/pybamm-team/PyBaMM/pull/1107))
 -   Allowed keyword arguments to be passed to `Simulation.plot()` ([#1099](https://github.com/pybamm-team/PyBaMM/pull/1099))
 
 ## Optimizations

From dd8180d937c3ad63ee981d2543e3796cba3a5809 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 09:20:10 +0100
Subject: [PATCH 31/39] #1104 handle augmented dynamics with inputs

---
 pybamm/solvers/jax_bdf_solver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index e356c04f7c..995d47d612 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -852,11 +852,11 @@ def aug_dynamics(augmented_state, t, *args):
         # y_bar_dot_d = -J_dd^T y_bar_d - J_ad^T y_bar_a
         #           0 =  J_da^T y_bar_d + J_aa^T y_bar_d
 
-        y_bar_dot, t_bar, args_bar = vjpfun(y_bar)
+        y_bar_dot, *rest = vjpfun(y_bar)
         # identify algebraic variables as zeros on diagonal
         y_bar_dot = jnp.where(diag_mass == 0., -y_bar_dot, y_bar_dot)
 
-        return (-y_dot, y_bar_dot, t_bar, args_bar)
+        return (-y_dot, y_bar_dot, *rest)
 
     y_bar = g[-1]
     ts_bar = []

From 67766389fa1f7ba464d856e27d69e3653a703086 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 13:50:08 +0100
Subject: [PATCH 32/39] #1104 fix for newton iteration logic

---
 pybamm/solvers/jax_bdf_solver.py | 60 +++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 995d47d612..70f1bde9a8 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -268,17 +268,17 @@ def fun_a(y_a):
     J_a = jax.jacfwd(fun_a)(y_a)
     LU = jax.scipy.linalg.lu_factor(-J_a)
 
-    not_converged = True
+    converged = True
     dy_norm_old = -1.0
     k = 0
-    while_state = [k, not_converged, dy_norm_old, d, y_a]
+    while_state = [k, converged, dy_norm_old, d, y_a]
 
     def while_cond(while_state):
-        k, not_converged, _, _, _ = while_state
-        return not_converged * (k < ROOT_SOLVE_MAXITER)
+        k, converged, _, _, _ = while_state
+        return (converged == False) * (k < ROOT_SOLVE_MAXITER)
 
     def while_body(while_state):
-        k, not_converged, dy_norm_old, d, y_a = while_state
+        k, converged, dy_norm_old, d, y_a = while_state
         f_eval = fun_a(y_a)
         dy = jax.scipy.linalg.lu_solve(LU, f_eval)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0_a)**2))
@@ -288,20 +288,20 @@ def while_body(while_state):
         y_a = y0_a + d
 
         # if converged then break out of iteration early
-        pred = dy_norm_old >= 0
+        pred = dy_norm_old >= 0.
         pred *= rate / (1 - rate) * dy_norm < tol
-        not_converged = dy_norm == 0 + pred
+        converged = (dy_norm == 0.) + pred
 
         dy_norm_old = dy_norm
 
-        return [k + 1, not_converged, dy_norm_old, d, y_a]
+        return [k + 1, converged, dy_norm_old, d, y_a]
 
-    k, not_converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
-                                                               while_body,
-                                                               while_state)
+    k, converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
+                                                        while_body,
+                                                        while_state)
     y_tilde = jax.ops.index_update(y0, algebraic_variables, y_a)
 
-    return y_tilde, not_converged
+    return y_tilde, converged
 
 
 def _select_initial_step(atol, rtol, fun, t0, y0, f0, h0):
@@ -462,17 +462,17 @@ def _newton_iteration(state, fun):
     y = jnp.array(y0, copy=True)
     n_function_evals = state.n_function_evals
 
-    not_converged = True
+    converged = False
     dy_norm_old = -1.0
     k = 0
-    while_state = [k, not_converged, dy_norm_old, d, y, n_function_evals]
+    while_state = [k, converged, dy_norm_old, d, y, n_function_evals]
 
     def while_cond(while_state):
-        k, not_converged, _, _, _, _ = while_state
-        return not_converged * (k < NEWTON_MAXITER)
+        k, converged, _, _, _, _ = while_state
+        return (converged == False) * (k < NEWTON_MAXITER)
 
     def while_body(while_state):
-        k, not_converged, dy_norm_old, d, y, n_function_evals = while_state
+        k, converged, dy_norm_old, d, y, n_function_evals = while_state
         f_eval = fun(y, t)
         n_function_evals += 1
         b = c * f_eval - M @ (psi + d)
@@ -491,19 +491,19 @@ def while_body(while_state):
         y = y0 + d
 
         # if converged then break out of iteration early
-        pred = dy_norm_old >= 0
+        pred = dy_norm_old >= 0.
         pred *= rate / (1 - rate) * dy_norm < tol
-        not_converged = dy_norm == 0 + pred
+        converged = (dy_norm == 0.) + pred
 
         dy_norm_old = dy_norm
 
-        return [k + 1, not_converged, dy_norm_old, d, y, n_function_evals]
+        return [k + 1, converged, dy_norm_old, d, y, n_function_evals]
 
-    k, not_converged, dy_norm_old, d, y, n_function_evals = \
+    k, converged, dy_norm_old, d, y, n_function_evals = \
         jax.lax.while_loop(while_cond,
                            while_body,
                            while_state)
-    return not_converged, k, y, d, state._replace(n_function_evals=n_function_evals)
+    return converged, k, y, d, state._replace(n_function_evals=n_function_evals)
 
 
 def rms_norm(arg):
@@ -558,6 +558,7 @@ def _prepare_next_step_order_change(state, d, y, n_iter):
 
 
 def _bdf_step(state, fun, jac):
+    #print('bdf_step', state.t, state.h)
     # we will try and use the old jacobian unless convergence of newton iteration
     # fails
     updated_jacobian = False
@@ -579,7 +580,8 @@ def while_body(while_state):
         state, step_accepted, updated_jacobian, y, d, n_iter = while_state
 
         # solve BDF equation using y0 as starting point
-        not_converged, n_iter, y, d, state = _newton_iteration(state, fun)
+        converged, n_iter, y, d, state = _newton_iteration(state, fun)
+        not_converged = converged == False
 
         # newton iteration did not converge, but jacobian has already been
         # evaluated so reduce step size by 0.3 (as per [1]) and try again
@@ -589,6 +591,11 @@ def while_body(while_state):
             state
         )
 
+        #if not_converged * updated_jacobian:
+        #    print('not converged, update step size by 0.3')
+        #if not_converged * (updated_jacobian == False):
+        #    print('not converged, update jacobian')
+
         # if not converged and jacobian not updated, then update the jacobian and try
         # again
         (state, updated_jacobian) = tree_multimap(
@@ -615,13 +622,16 @@ def while_body(while_state):
                           safety *
                           error_norm ** (-1 / (state.order + 1))))
 
+        #if converged * (error_norm > 1):
+        #    print('converged, but error is too large',error_norm, factor, d, scale_y)
+
         (state, step_accepted) = tree_multimap(
             partial(
                 jnp.where,
-                (not_converged == False) * (error_norm > 1)  # noqa: E712
+                converged * (error_norm > 1)  # noqa: E712
             ),
             (_update_step_size(state, factor), False),
-            (state, not_converged == False)
+            (state, converged)
         )
 
         return [state, step_accepted, updated_jacobian, y, d, n_iter]

From 957611edf5d65ca1a98ca39616307d0804eb1d0d Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Wed, 15 Jul 2020 19:28:45 +0100
Subject: [PATCH 33/39] #1104 fix calc of init conditions

---
 pybamm/solvers/jax_bdf_solver.py | 47 ++++++++++++++++++--------------
 pybamm/solvers/jax_solver.py     | 12 +++++++-
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 70f1bde9a8..15b88c7630 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -82,10 +82,10 @@ def fun_bind_inputs(y, t):
     t0 = t_eval[0]
     h0 = t_eval[1] - t0
 
-    stepper, failed = _bdf_init(
+    stepper = _bdf_init(
         fun_bind_inputs, jac_bind_inputs, mass, t0, y0, h0, rtol, atol
     )
-    i = failed * len(t_eval)
+    i = 0
     y_out = jnp.empty((len(t_eval), len(y0)), dtype=y0.dtype)
 
     init_state = [stepper, t_eval, i, y_out]
@@ -110,14 +110,14 @@ def for_body(j, y_out):
 
     stepper, t_eval, i, y_out = jax.lax.while_loop(cond_fun, body_fun,
                                                    init_state)
-
     return y_out
 
 
 BDFInternalStates = [
     't', 'atol', 'rtol', 'M', 'newton_tol', 'order', 'h', 'n_equal_steps', 'D',
     'y0', 'scale_y0', 'kappa', 'gamma', 'alpha', 'c', 'error_const', 'J', 'LU', 'U',
-    'psi', 'n_function_evals', 'n_jacobian_evals', 'n_lu_decompositions', 'n_steps'
+    'psi', 'n_function_evals', 'n_jacobian_evals', 'n_lu_decompositions', 'n_steps',
+    'consistent_y0_failed'
 ]
 BDFState = collections.namedtuple('BDFState', BDFInternalStates)
 
@@ -172,6 +172,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     y0, not_converged = _select_initial_conditions(
         fun, mass, t0, y0, state['newton_tol'], scale_y0
     )
+    state['consistent_y0_failed'] = not_converged
 
     f0 = fun(y0, t0)
     order = 1
@@ -213,7 +214,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     tuple_state = BDFState(*[state[k] for k in BDFInternalStates])
     y0, scale_y0 = _predict(tuple_state, D)
     psi = _update_psi(tuple_state, D)
-    return tuple_state._replace(y0=y0, scale_y0=scale_y0, psi=psi), not_converged
+    return tuple_state._replace(y0=y0, scale_y0=scale_y0, psi=psi)
 
 
 def _compute_R(order, factor):
@@ -268,14 +269,14 @@ def fun_a(y_a):
     J_a = jax.jacfwd(fun_a)(y_a)
     LU = jax.scipy.linalg.lu_factor(-J_a)
 
-    converged = True
+    converged = False
     dy_norm_old = -1.0
     k = 0
     while_state = [k, converged, dy_norm_old, d, y_a]
 
     def while_cond(while_state):
         k, converged, _, _, _ = while_state
-        return (converged == False) * (k < ROOT_SOLVE_MAXITER)
+        return (converged == False) * (k < ROOT_SOLVE_MAXITER)  # noqa: E712
 
     def while_body(while_state):
         k, converged, dy_norm_old, d, y_a = while_state
@@ -297,8 +298,8 @@ def while_body(while_state):
         return [k + 1, converged, dy_norm_old, d, y_a]
 
     k, converged, dy_norm_old, d, y_a = jax.lax.while_loop(while_cond,
-                                                        while_body,
-                                                        while_state)
+                                                           while_body,
+                                                           while_state)
     y_tilde = jax.ops.index_update(y0, algebraic_variables, y_a)
 
     return y_tilde, converged
@@ -394,6 +395,16 @@ def while_body(while_state):
     return D
 
 
+def _update_step_size_and_lu(state, factor):
+    state = _update_step_size(state, factor)
+
+    # redo lu (c has changed)
+    LU = jax.scipy.linalg.lu_factor(state.M - state.c * state.J)
+    n_lu_decompositions = state.n_lu_decompositions + 1
+
+    return state._replace(LU=LU, n_lu_decompositions=n_lu_decompositions)
+
+
 def _update_step_size(state, factor):
     """
     If step size h is changed then also need to update the terms in
@@ -408,10 +419,6 @@ def _update_step_size(state, factor):
     n_equal_steps = 0
     c = h * state.alpha[order]
 
-    # redo lu (c has changed)
-    LU = jax.scipy.linalg.lu_factor(state.M - c * state.J)
-    n_lu_decompositions = state.n_lu_decompositions + 1
-
     # update D using equations in section 3.2 of [1]
     RU = _compute_R(order, factor).dot(state.U)
     I = jnp.arange(0, MAX_ORDER + 1).reshape(-1, 1)
@@ -431,8 +438,8 @@ def _update_step_size(state, factor):
     # update y0 (D has changed)
     y0, scale_y0 = _predict(state, D)
 
-    return state._replace(n_equal_steps=n_equal_steps, LU=LU,
-                          n_lu_decompositions=n_lu_decompositions, h=h, c=c,
+    return state._replace(n_equal_steps=n_equal_steps,
+                          h=h, c=c,
                           D=D, psi=psi, y0=y0, scale_y0=scale_y0)
 
 
@@ -469,7 +476,7 @@ def _newton_iteration(state, fun):
 
     def while_cond(while_state):
         k, converged, _, _, _, _ = while_state
-        return (converged == False) * (k < NEWTON_MAXITER)
+        return (converged == False) * (k < NEWTON_MAXITER)  # noqa: E712
 
     def while_body(while_state):
         k, converged, dy_norm_old, d, y, n_function_evals = while_state
@@ -553,7 +560,7 @@ def _prepare_next_step_order_change(state, d, y, n_iter):
 
     factor = jnp.min((MAX_FACTOR, safety * factors[max_index]))
 
-    new_state = _update_step_size(state._replace(D=D, order=order), factor)
+    new_state = _update_step_size_and_lu(state._replace(D=D, order=order), factor)
     return new_state
 
 
@@ -581,13 +588,13 @@ def while_body(while_state):
 
         # solve BDF equation using y0 as starting point
         converged, n_iter, y, d, state = _newton_iteration(state, fun)
-        not_converged = converged == False
+        not_converged = converged == False  # noqa: E712
 
         # newton iteration did not converge, but jacobian has already been
         # evaluated so reduce step size by 0.3 (as per [1]) and try again
         state = tree_multimap(
             partial(jnp.where, not_converged * updated_jacobian),
-            _update_step_size(state, 0.3),
+            _update_step_size_and_lu(state, 0.3),
             state
         )
 
@@ -630,7 +637,7 @@ def while_body(while_state):
                 jnp.where,
                 converged * (error_norm > 1)  # noqa: E712
             ),
-            (_update_step_size(state, factor), False),
+            (_update_step_size_and_lu(state, factor), False),
             (state, converged)
         )
 
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index db912d0da1..77cdda59d7 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -146,7 +146,7 @@ def solve_model_rk45(inputs):
             return jnp.transpose(y)
 
         def solve_model_bdf(inputs):
-            y = pybamm.jax_bdf_integrate(
+            y, stepper = pybamm.jax_bdf_integrate(
                 rhs_dae,
                 y0,
                 t_eval,
@@ -156,6 +156,16 @@ def solve_model_bdf(inputs):
                 mass=mass,
                 **self.extra_options
             )
+            #sstring = ''
+            #sstring += 'JAX {} solver - stats\n'.format(self.method)
+            #sstring += '\tNumber of steps: {}\n'.format(stepper.n_steps)
+            #sstring += '\tnumber of function evaluations: {}\n'.format(
+            #    stepper.n_function_evals)
+            #sstring += '\tnumber of jacobian evaluations: {}\n'.format(
+            #    stepper.n_jacobian_evals)
+            #sstring += '\tnumber of LU decompositions: {}\n'.format(
+            #    stepper.n_lu_decompositions)
+            #pybamm.logger.info(sstring)
             return jnp.transpose(y)
 
         if self.method == 'RK45':

From 02bfd2ce79d57a68888c9b24ec80facd03220f36 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Thu, 16 Jul 2020 09:26:19 +0100
Subject: [PATCH 34/39] #1104 convert to diagonal mass matrix

---
 pybamm/solvers/jax_bdf_solver.py              | 72 ++++++++++++-------
 pybamm/solvers/jax_solver.py                  |  6 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  | 14 ++--
 3 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 15b88c7630..94bd59faf2 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -42,7 +42,7 @@ def _bdf_odeint(fun, mass, rtol, atol, y0, t_eval, *args):
         function to evaluate the time derivative of the solution `y` at time
         `t` as `func(y, t, *args)`, producing the same shape/structure as `y0`.
     mass: ndarray
-        constant mass matrix with shape (n,n)
+        diagonal of the mass matrix with shape (n,)
     y0: ndarray
         initial state vector, has shape (n,)
     t_eval: ndarray
@@ -147,7 +147,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
         function with signature (y, t), where t is a scalar time and y is a ndarray with
         shape (n,), returns the jacobian matrix of fun as an ndarray with shape (n,n)
     mass: ndarray
-        constant mass matrix with shape (n,n)
+        diagonal of the mass matrix with shape (n,)
     t0: float
         initial time
     y0: ndarray
@@ -201,7 +201,10 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
 
     J = jac(y0, t0)
     state['J'] = J
-    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * J)
+
+    Psi = -c * J
+    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(J), state['M'])
+    state['LU'] = jax.scipy.linalg.lu_factor(Psi)
 
     state['U'] = _compute_R(order, 1)
     state['psi'] = None
@@ -241,7 +244,7 @@ def _compute_R(order, factor):
 
 def _select_initial_conditions(fun, M, t0, y0, tol, scale_y0):
     # identify algebraic variables as zeros on diagonal
-    algebraic_variables = jnp.diag(M) == 0.
+    algebraic_variables = M == 0.
 
     # if all differentiable variables then return y0 (can use normal python if since M
     # is static)
@@ -399,7 +402,9 @@ def _update_step_size_and_lu(state, factor):
     state = _update_step_size(state, factor)
 
     # redo lu (c has changed)
-    LU = jax.scipy.linalg.lu_factor(state.M - state.c * state.J)
+    Psi = -state.c * state.J
+    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(state.J), state.M)
+    LU = jax.scipy.linalg.lu_factor(Psi)
     n_lu_decompositions = state.n_lu_decompositions + 1
 
     return state._replace(LU=LU, n_lu_decompositions=n_lu_decompositions)
@@ -450,7 +455,9 @@ def _update_jacobian(state, jac):
     """
     J = jac(state.y0, state.t + state.h)
     n_jacobian_evals = state.n_jacobian_evals + 1
-    LU = jax.scipy.linalg.lu_factor(state.M - state.c * J)
+    Psi = -state.c * state.J
+    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(state.J), state.M)
+    LU = jax.scipy.linalg.lu_factor(Psi)
     n_lu_decompositions = state.n_lu_decompositions + 1
     return state._replace(J=J, n_jacobian_evals=n_jacobian_evals, LU=LU,
                           n_lu_decompositions=n_lu_decompositions)
@@ -482,7 +489,7 @@ def while_body(while_state):
         k, converged, dy_norm_old, d, y, n_function_evals = while_state
         f_eval = fun(y, t)
         n_function_evals += 1
-        b = c * f_eval - M @ (psi + d)
+        b = c * f_eval - M * (psi + d)
         dy = jax.scipy.linalg.lu_solve(LU, b)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
         rate = dy_norm / dy_norm_old
@@ -765,7 +772,7 @@ def jax_bdf_integrate(func, y0, t_eval, *args, rtol=1e-6, atol=1e-6, mass=None):
     atol: (optional) float
         absolute tolerance for the solver
     mass: (optional) ndarray
-        constant mass matrix with shape (n,n)
+        diagonal of the mass matrix with shape (n,)
 
     Returns
     -------
@@ -794,6 +801,9 @@ def _check_arg(arg):
     in_avals = tuple(safe_map(abstractify, flat_args))
     converted, consts = closure_convert(func, in_tree, in_avals)
 
+    if mass is None:
+        mass = onp.ones(y0.shape[0], dtype=y0.dtype)
+
     return _bdf_odeint_wrapper(converted, mass, rtol, atol, y0, t_eval, *consts, *args)
 
 
@@ -834,10 +844,7 @@ def flax_scan(f, init, xs, length=None):  # pragma: no cover
 @jax.partial(jax.jit, static_argnums=(0, 1, 2, 3))
 def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
-    if mass is None:
-        mass = jnp.identity(y0.shape[0], dtype=y0.dtype)
-    else:
-        mass = block_diag(tree_flatten(mass)[0])
+    mass, _ = ravel_pytree(mass)
     func = ravel_first_arg(func, unravel)
     out = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
     return jax.vmap(unravel)(out)
@@ -851,8 +858,6 @@ def _bdf_odeint_fwd(func, mass, rtol, atol, y0, ts, *args):
 def _bdf_odeint_rev(func, mass, rtol, atol, res, g):
     ys, ts, args = res
 
-    diag_mass = jnp.diag(mass)
-
     def aug_dynamics(augmented_state, t, *args):
         """Original system augmented with vjp_y, vjp_t and vjp_args."""
         y, y_bar, *_ = augmented_state
@@ -862,7 +867,7 @@ def aug_dynamics(augmented_state, t, *args):
 
         # Adjoint equations for semi-explicit dae index 1 system from
         #
-        # Cao, Y., Li, S., Petzold, L., & Serban, R. (2003). Adjoint sensitivity
+        # [1] Cao, Y., Li, S., Petzold, L., & Serban, R. (2003). Adjoint sensitivity
         # analysis for differential-algebraic equations: The adjoint DAE system and its
         # numerical solution. SIAM journal on scientific computing, 24(3), 1076-1089.
         #
@@ -870,19 +875,38 @@ def aug_dynamics(augmented_state, t, *args):
         #           0 =  J_da^T y_bar_d + J_aa^T y_bar_d
 
         y_bar_dot, *rest = vjpfun(y_bar)
-        # identify algebraic variables as zeros on diagonal
-        y_bar_dot = jnp.where(diag_mass == 0., -y_bar_dot, y_bar_dot)
 
         return (-y_dot, y_bar_dot, *rest)
 
-    y_bar = g[-1]
+    algebraic_variables = mass == 0.
+    differentiable_variables = algebraic_variables == False  # noqa: E712
+
+    def initialise(g0, y0, t0):
+        # [1] gives init conditions for y_bar_a = g_d - J_ad^T (J_aa^T)^-1 g_a
+        if jnp.any(algebraic_variables):
+            J = jax.jacfwd(func)(y0, t0, *args)
+
+            # boolean arguments not implemented in jnp.ix_
+            J_aa = J[onp.ix_(algebraic_variables, algebraic_variables)]
+            J_ad = J[onp.ix_(algebraic_variables, differentiable_variables)]
+            LU = jax.scipy.linalg.lu_factor(J_aa)
+            g0_a = g0[algebraic_variables]
+            invJ_aa = jax.scipy.linalg.lu_solve(LU, g0_a)
+            y_bar = jax.ops.index_update(
+                g0, differentiable_variables,
+                (g0_a - J_ad @ invJ_aa) / mass[differentiable_variables]
+            )
+        else:
+            y_bar = g0 / mass
+        return y_bar
+
+    y_bar = initialise(g[-1], ys[-1], ts[-1])
     ts_bar = []
     t0_bar = 0.
 
-    def arg_to_identity(arg):
-        return jnp.identity(arg.shape[0] if arg.ndim > 0 else 1, dtype=arg.dtype)
-
-    aug_mass = (mass, mass, jnp.array(1.), tree_map(arg_to_identity, args))
+    def arg_to_ones(arg):
+        return onp.ones(arg.shape[0] if arg.ndim > 0 else 1, dtype=arg.dtype)
+    aug_mass = (mass, mass, jnp.array(1.), tree_map(arg_to_ones, args))
 
     def scan_fun(carry, i):
         y_bar, t0_bar, args_bar = carry
@@ -897,10 +921,10 @@ def scan_fun(carry, i):
             rtol=rtol, atol=atol)
         y_bar, t0_bar, args_bar = tree_map(op.itemgetter(1), (y_bar, t0_bar, args_bar))
         # Add gradient from current output
-        y_bar = y_bar + g[i - 1]
+        y_bar = y_bar + initialise(g[i - 1], ys[i - 1], ts[i - 1])
         return (y_bar, t0_bar, args_bar), t_bar
 
-    init_carry = (g[-1], 0., tree_map(jnp.zeros_like, args))
+    init_carry = (y_bar, t0_bar, tree_map(jnp.zeros_like, args))
     (y_bar, t0_bar, args_bar), rev_ts_bar = jax.lax.scan(
         scan_fun, init_carry, jnp.arange(len(ts) - 1, 0, -1))
     ts_bar = jnp.concatenate([jnp.array([t0_bar]), rev_ts_bar[::-1]])
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 77cdda59d7..54a49f5daa 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -122,7 +122,9 @@ def create_solve(self, model, t_eval):
         y0 = jnp.array(model.y0).reshape(-1)
         mass = None
         if self.method == 'BDF':
-            mass = model.mass_matrix.entries.toarray()
+            mass = model.mass_matrix.entries.diagonal()
+            if onp.count_nonzero(mass) != model.mass_matrix.entries.nnz:
+                raise RuntimeError("Solver only supports a diagonal mass matrix")
 
         def rhs_ode(y, t, inputs):
             return model.rhs_eval(t, y, inputs),
@@ -146,7 +148,7 @@ def solve_model_rk45(inputs):
             return jnp.transpose(y)
 
         def solve_model_bdf(inputs):
-            y, stepper = pybamm.jax_bdf_integrate(
+            y = pybamm.jax_bdf_integrate(
                 rhs_dae,
                 y0,
                 t_eval,
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index f76871387e..fc3b7d7255 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -64,10 +64,7 @@ def fun(y, t):
                 y[1] - 2.0 * y[0],
             ])
 
-        mass = jax.numpy.array([
-            [1.0, 0.0],
-            [0.0, 0.0],
-        ])
+        mass = jax.numpy.array([2.0, 0.0])
 
         # give some bad initial conditions, solver should calculate correct ones using
         # this as a guess
@@ -78,7 +75,7 @@ def fun(y, t):
         t1 = time.perf_counter() - t0
 
         # test accuracy
-        soln = np.exp(0.1 * t_eval)
+        soln = np.exp(0.05 * t_eval)
         np.testing.assert_allclose(y[:, 0], soln,
                                    rtol=1e-7, atol=1e-7)
         np.testing.assert_allclose(y[:, 1], 2.0 * soln,
@@ -92,7 +89,7 @@ def fun(y, t):
         self.assertLess(t2, t1)
 
         # test second run is accurate
-        np.testing.assert_allclose(y[:, 0], np.exp(0.1 * t_eval),
+        np.testing.assert_allclose(y[:, 0], np.exp(0.05 * t_eval),
                                    rtol=1e-7, atol=1e-7)
 
     def test_solver_sensitivities(self):
@@ -150,10 +147,7 @@ def fun(y, t, inputs):
                 y[1] - 2.0 * y[0],
             ])
 
-        mass = jax.numpy.array([
-            [1.0, 0.0],
-            [0.0, 0.0],
-        ])
+        mass = jax.numpy.array([2.0, 0.0])
 
         y0 = jax.numpy.array([1.0, 2.0])
 

From b35019b09ad8f627bc04d297d6bae00d3d8120a8 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Thu, 16 Jul 2020 10:33:10 +0100
Subject: [PATCH 35/39] #1104 swap back to full mass matrix

---
 pybamm/solvers/jax_bdf_solver.py              | 49 ++++++++++---------
 pybamm/solvers/jax_solver.py                  |  4 +-
 .../unit/test_solvers/test_jax_bdf_solver.py  | 10 +++-
 3 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/pybamm/solvers/jax_bdf_solver.py b/pybamm/solvers/jax_bdf_solver.py
index 94bd59faf2..59c5459d12 100644
--- a/pybamm/solvers/jax_bdf_solver.py
+++ b/pybamm/solvers/jax_bdf_solver.py
@@ -202,9 +202,7 @@ def _bdf_init(fun, jac, mass, t0, y0, h0, rtol, atol):
     J = jac(y0, t0)
     state['J'] = J
 
-    Psi = -c * J
-    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(J), state['M'])
-    state['LU'] = jax.scipy.linalg.lu_factor(Psi)
+    state['LU'] = jax.scipy.linalg.lu_factor(state['M'] - c * J)
 
     state['U'] = _compute_R(order, 1)
     state['psi'] = None
@@ -244,7 +242,7 @@ def _compute_R(order, factor):
 
 def _select_initial_conditions(fun, M, t0, y0, tol, scale_y0):
     # identify algebraic variables as zeros on diagonal
-    algebraic_variables = M == 0.
+    algebraic_variables = jnp.diag(M == 0.)
 
     # if all differentiable variables then return y0 (can use normal python if since M
     # is static)
@@ -402,9 +400,7 @@ def _update_step_size_and_lu(state, factor):
     state = _update_step_size(state, factor)
 
     # redo lu (c has changed)
-    Psi = -state.c * state.J
-    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(state.J), state.M)
-    LU = jax.scipy.linalg.lu_factor(Psi)
+    LU = jax.scipy.linalg.lu_factor(state.M - state.c * state.J)
     n_lu_decompositions = state.n_lu_decompositions + 1
 
     return state._replace(LU=LU, n_lu_decompositions=n_lu_decompositions)
@@ -455,9 +451,7 @@ def _update_jacobian(state, jac):
     """
     J = jac(state.y0, state.t + state.h)
     n_jacobian_evals = state.n_jacobian_evals + 1
-    Psi = -state.c * state.J
-    Psi = jax.ops.index_add(Psi, jnp.diag_indices_from(state.J), state.M)
-    LU = jax.scipy.linalg.lu_factor(Psi)
+    LU = jax.scipy.linalg.lu_factor(state.M - state.c * J)
     n_lu_decompositions = state.n_lu_decompositions + 1
     return state._replace(J=J, n_jacobian_evals=n_jacobian_evals, LU=LU,
                           n_lu_decompositions=n_lu_decompositions)
@@ -489,7 +483,7 @@ def while_body(while_state):
         k, converged, dy_norm_old, d, y, n_function_evals = while_state
         f_eval = fun(y, t)
         n_function_evals += 1
-        b = c * f_eval - M * (psi + d)
+        b = c * f_eval - M @ (psi + d)
         dy = jax.scipy.linalg.lu_solve(LU, b)
         dy_norm = jnp.sqrt(jnp.mean((dy / scale_y0)**2))
         rate = dy_norm / dy_norm_old
@@ -800,10 +794,6 @@ def _check_arg(arg):
     flat_args, in_tree = tree_flatten((y0, t_eval[0], *args))
     in_avals = tuple(safe_map(abstractify, flat_args))
     converted, consts = closure_convert(func, in_tree, in_avals)
-
-    if mass is None:
-        mass = onp.ones(y0.shape[0], dtype=y0.dtype)
-
     return _bdf_odeint_wrapper(converted, mass, rtol, atol, y0, t_eval, *consts, *args)
 
 
@@ -844,7 +834,10 @@ def flax_scan(f, init, xs, length=None):  # pragma: no cover
 @jax.partial(jax.jit, static_argnums=(0, 1, 2, 3))
 def _bdf_odeint_wrapper(func, mass, rtol, atol, y0, ts, *args):
     y0, unravel = ravel_pytree(y0)
-    mass, _ = ravel_pytree(mass)
+    if mass is None:
+        mass = onp.identity(y0.shape[0], dtype=y0.dtype)
+    else:
+        mass = block_diag(tree_flatten(mass)[0])
     func = ravel_first_arg(func, unravel)
     out = _bdf_odeint(func, mass, rtol, atol, y0, ts, *args)
     return jax.vmap(unravel)(out)
@@ -878,12 +871,20 @@ def aug_dynamics(augmented_state, t, *args):
 
         return (-y_dot, y_bar_dot, *rest)
 
-    algebraic_variables = mass == 0.
+    algebraic_variables = jnp.diag(mass) == 0.
     differentiable_variables = algebraic_variables == False  # noqa: E712
+    mass_is_I = (mass == jnp.eye(mass.shape[0])).all()
+    is_dae = jnp.any(algebraic_variables)
+
+    if not mass_is_I:
+        M_dd = mass[onp.ix_(differentiable_variables, differentiable_variables)]
+        LU_invM_dd = jax.scipy.linalg.lu_factor(M_dd)
 
     def initialise(g0, y0, t0):
         # [1] gives init conditions for y_bar_a = g_d - J_ad^T (J_aa^T)^-1 g_a
-        if jnp.any(algebraic_variables):
+        if mass_is_I:
+            y_bar = g0
+        elif is_dae:
             J = jax.jacfwd(func)(y0, t0, *args)
 
             # boolean arguments not implemented in jnp.ix_
@@ -894,19 +895,21 @@ def initialise(g0, y0, t0):
             invJ_aa = jax.scipy.linalg.lu_solve(LU, g0_a)
             y_bar = jax.ops.index_update(
                 g0, differentiable_variables,
-                (g0_a - J_ad @ invJ_aa) / mass[differentiable_variables]
+                jax.scipy.linalg.lu_solve(LU_invM_dd,
+                                          g0_a - J_ad @ invJ_aa)
             )
         else:
-            y_bar = g0 / mass
+            y_bar = jax.scipy.linalg.lu_solve(LU_invM_dd, g0)
         return y_bar
 
     y_bar = initialise(g[-1], ys[-1], ts[-1])
     ts_bar = []
     t0_bar = 0.
 
-    def arg_to_ones(arg):
-        return onp.ones(arg.shape[0] if arg.ndim > 0 else 1, dtype=arg.dtype)
-    aug_mass = (mass, mass, jnp.array(1.), tree_map(arg_to_ones, args))
+    def arg_to_identity(arg):
+        return onp.identity(arg.shape[0] if arg.ndim > 0 else 1, dtype=arg.dtype)
+
+    aug_mass = (mass, mass, jnp.array(1.), tree_map(arg_to_identity, args))
 
     def scan_fun(carry, i):
         y_bar, t0_bar, args_bar = carry
diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 54a49f5daa..9853cbbe76 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -122,9 +122,7 @@ def create_solve(self, model, t_eval):
         y0 = jnp.array(model.y0).reshape(-1)
         mass = None
         if self.method == 'BDF':
-            mass = model.mass_matrix.entries.diagonal()
-            if onp.count_nonzero(mass) != model.mass_matrix.entries.nnz:
-                raise RuntimeError("Solver only supports a diagonal mass matrix")
+            mass = model.mass_matrix.entries.toarray()
 
         def rhs_ode(y, t, inputs):
             return model.rhs_eval(t, y, inputs),
diff --git a/tests/unit/test_solvers/test_jax_bdf_solver.py b/tests/unit/test_solvers/test_jax_bdf_solver.py
index fc3b7d7255..1ec810c09d 100644
--- a/tests/unit/test_solvers/test_jax_bdf_solver.py
+++ b/tests/unit/test_solvers/test_jax_bdf_solver.py
@@ -64,7 +64,10 @@ def fun(y, t):
                 y[1] - 2.0 * y[0],
             ])
 
-        mass = jax.numpy.array([2.0, 0.0])
+        mass = jax.numpy.array([
+            [2.0, 0.0],
+            [0.0, 0.0],
+        ])
 
         # give some bad initial conditions, solver should calculate correct ones using
         # this as a guess
@@ -147,7 +150,10 @@ def fun(y, t, inputs):
                 y[1] - 2.0 * y[0],
             ])
 
-        mass = jax.numpy.array([2.0, 0.0])
+        mass = jax.numpy.array([
+            [2.0, 0.0],
+            [0.0, 0.0],
+        ])
 
         y0 = jax.numpy.array([1.0, 2.0])
 

From 6a85083b20a54426f308b7cc92ec1bd086d64ea3 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sun, 19 Jul 2020 09:29:38 +0100
Subject: [PATCH 36/39] #1104 remove commented out code

---
 pybamm/solvers/jax_solver.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pybamm/solvers/jax_solver.py b/pybamm/solvers/jax_solver.py
index 9853cbbe76..db912d0da1 100644
--- a/pybamm/solvers/jax_solver.py
+++ b/pybamm/solvers/jax_solver.py
@@ -156,16 +156,6 @@ def solve_model_bdf(inputs):
                 mass=mass,
                 **self.extra_options
             )
-            #sstring = ''
-            #sstring += 'JAX {} solver - stats\n'.format(self.method)
-            #sstring += '\tNumber of steps: {}\n'.format(stepper.n_steps)
-            #sstring += '\tnumber of function evaluations: {}\n'.format(
-            #    stepper.n_function_evals)
-            #sstring += '\tnumber of jacobian evaluations: {}\n'.format(
-            #    stepper.n_jacobian_evals)
-            #sstring += '\tnumber of LU decompositions: {}\n'.format(
-            #    stepper.n_lu_decompositions)
-            #pybamm.logger.info(sstring)
             return jnp.transpose(y)
 
         if self.method == 'RK45':

From 1cd40f8c631facf936f12ed28a34b99e20d7ca53 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Sun, 19 Jul 2020 09:36:02 +0100
Subject: [PATCH 37/39] #1104 add jax bdf solver to compare-dae-solver

---
 examples/scripts/compare-dae-solver.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/scripts/compare-dae-solver.py b/examples/scripts/compare-dae-solver.py
index 8b3a467a26..77766658f4 100644
--- a/examples/scripts/compare-dae-solver.py
+++ b/examples/scripts/compare-dae-solver.py
@@ -1,5 +1,6 @@
 import pybamm
 import numpy as np
+import time
 
 pybamm.set_logging_level("INFO")
 
@@ -50,6 +51,12 @@
         """
     )
 
+model.convert_to_format = 'jax'
+model.events = []
+solver = pybamm.JaxSolver(method='BDF', root_method='lm', atol=1e-8, rtol=1e-8)
+jax_bdf_sol = solver.solve(model, t_eval)
+solutions.append(jax_bdf_sol)
+
 # plot
 plot = pybamm.QuickPlot(solutions)
 plot.dynamic_plot()

From 9ceabe22ec5c5487826c20a4090e7be02ab10501 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 20 Jul 2020 07:07:20 +0100
Subject: [PATCH 38/39] Revert "#1104 add jax bdf solver to compare-dae-solver"

This reverts commit 1cd40f8c631facf936f12ed28a34b99e20d7ca53.
---
 examples/scripts/compare-dae-solver.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/examples/scripts/compare-dae-solver.py b/examples/scripts/compare-dae-solver.py
index 77766658f4..8b3a467a26 100644
--- a/examples/scripts/compare-dae-solver.py
+++ b/examples/scripts/compare-dae-solver.py
@@ -1,6 +1,5 @@
 import pybamm
 import numpy as np
-import time
 
 pybamm.set_logging_level("INFO")
 
@@ -51,12 +50,6 @@
         """
     )
 
-model.convert_to_format = 'jax'
-model.events = []
-solver = pybamm.JaxSolver(method='BDF', root_method='lm', atol=1e-8, rtol=1e-8)
-jax_bdf_sol = solver.solve(model, t_eval)
-solutions.append(jax_bdf_sol)
-
 # plot
 plot = pybamm.QuickPlot(solutions)
 plot.dynamic_plot()

From f6c51f97d8455701a370a8db1df993f92f88a681 Mon Sep 17 00:00:00 2001
From: Martin Robinson <martinjrobins@gmail.com>
Date: Mon, 20 Jul 2020 07:09:53 +0100
Subject: [PATCH 39/39] #1104 more detail to changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index eac5700e56..1090184dd7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 ## Features
 
--   Added support for index 1 dae equations and sensitivity calculations to JAX BDF solver ([#1107](https://github.com/pybamm-team/PyBaMM/pull/1107))
+-   Added support for index 1 semi-explicit dae equations and sensitivity calculations to JAX BDF solver ([#1107](https://github.com/pybamm-team/PyBaMM/pull/1107))
 -   Allowed keyword arguments to be passed to `Simulation.plot()` ([#1099](https://github.com/pybamm-team/PyBaMM/pull/1099))
 
 ## Optimizations