From 35a20ff9f7d9c74336ba805b32a24cfe6fd72f5b Mon Sep 17 00:00:00 2001
From: Songchen Tan
Date: Tue, 26 Nov 2024 16:53:09 -0500
Subject: [PATCH] Remake Halley example
---
README.md | 1 +
docs/Project.toml | 3 +
docs/make.jl | 6 +-
docs/src/examples/halley.md | 76 +++++++++++
examples/halley.ipynb | 264 ------------------------------------
examples/halley.jl | 46 -------
examples/integration.jl | 108 +++++++--------
src/array.jl | 2 +
8 files changed, 137 insertions(+), 369 deletions(-)
create mode 100644 docs/src/examples/halley.md
delete mode 100644 examples/halley.ipynb
delete mode 100644 examples/halley.jl
diff --git a/README.md b/README.md
index 73af9a4..511d886 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@
+
diff --git a/docs/Project.toml b/docs/Project.toml index 34fc0c8..a8c5b14 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,3 +1,6 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" TaylorDiff = "b36ab563-344f-407b-a36a-4f200bebf99c" +TaylorIntegration = "92b13dbe-c966-51a2-8445-caca9f8a7d42" +TaylorSeries = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea" diff --git a/docs/make.jl b/docs/make.jl index 06131cc..249e0f5 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,9 +12,13 @@ makedocs(; prettyurls = get(ENV, "CI", "false") == "true", canonical = "https://juliadiff.org/TaylorDiff.jl", edit_link = "main", - assets = String[]), + assets = String[] + ), pages = [ "Home" => "index.md", + "Examples" => [ + "Efficient Halley's method for nonlinear solving" => "examples/halley.md" + ], "Theory" => "theory.md", "API" => "api.md" ]) diff --git a/docs/src/examples/halley.md b/docs/src/examples/halley.md new file mode 100644 index 0000000..ffc96aa --- /dev/null +++ b/docs/src/examples/halley.md @@ -0,0 +1,76 @@ +# Efficient Halley's method for nonlinear solving + +## Introduction + +Say we have a system of $n$ equations with $n$ unknowns $f(x)=0$, and $f\in \mathbb R^n\to\mathbb R^n$ is sufficiently smooth. + +Given a initial guess $x_0$, Newton's method finds a solution by iterating like + +$$x_{i+1}=x_i-J(x_i)^{-1}f(x_i)$$ + +and this method converges quadratically. + +We can make it converge faster using higher-order derivative information. For example, Halley's method iterates like + +$$x_{i+1}=x_i-(a_i\odot a_i)\oslash(a_i-b_i/2)$$ + +where the vector multiplication and division $\odot,\oslash$ are defined element-wise, and term $a_i$ and $b_i$ are defined by $J(x_i)a_i = f(x_i)$ and $J(x_i)b_i = H(x_i)a_ia_i$. + +Halley's method is proved to converge cubically, which is faster than Newton's method. Here, we demonstrate that with TaylorDiff.jl, you can compute the Hessian-vector-vector product $H(x_i)a_ia_i$ very efficiently, such that the Halley's method is almost as cheap as Newton's method per iteration. + +## Implementation + +We first define the two iteration schemes mentioned above: + +```@example 1 +using TaylorDiff, LinearAlgebra +import ForwardDiff + +function newton(f, x, p; tol = 1e-12, maxiter = 100) + fp = Base.Fix2(f, p) + for i in 1:maxiter + fx = fp(x) + error = norm(fx) + println("Iteration $i: x = $x, f(x) = $fx, error = $error") + error < tol && return + J = ForwardDiff.jacobian(fp, x) + a = J \ fx + @. x -= a + end +end + +function halley(f, x, p; tol = 1e-12, maxiter = 100) + fp = Base.Fix2(f, p) + for i in 1:maxiter + fx = f(x, p) + error = norm(fx) + println("Iteration $i: x = $x, f(x) = $fx, error = $error") + error < tol && return + J = ForwardDiff.jacobian(fp, x) + a = J \ fx + hvvp = derivative(fp, x, a, Val(2)) + b = J \ hvvp + @. x -= (a * a) / (a - b / 2) + end +end +``` + +Note that in Halley's method, the hessian-vector-vector product is computed with `derivative(fp, x, a, Val(2))`. It is guaranteed that asymptotically this is only taking 2x more time compared to evaluating `fp(x)` itself. + +Now we define some test function: + +```@example 1 +f(x, p) = x .* x - p +``` + +The Newton's method takes 6 iterations to converge: + +```@example 1 +newton(f, [1., 1.], [2., 2.]) +``` + +While the Halley's method takes 4 iterations to converge: + +```@example 1 +halley(f, [1., 1.], [2., 2.]) +``` \ No newline at end of file diff --git a/examples/halley.ipynb b/examples/halley.ipynb deleted file mode 100644 index 0033fdf..0000000 --- a/examples/halley.ipynb +++ /dev/null @@ -1,264 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The Jacobian- and Hessian-free Halley's Method\n", - "\n", - "Say we have a system of $n$ equations with $n$ unknowns\n", - "\n", - "$$\n", - "f(x)=0\n", - "$$\n", - "\n", - "and $f\\in \\mathbb R^n\\to\\mathbb R^n$ is sufficiently smooth.\n", - "\n", - "Given a initial guess $x_0$, Halley's method specifies a series of points approximating the solution, where each iteration is\n", - "\n", - "$$\n", - "x^{(i+1)}=x^{(i)}+\\frac{a^{(i)}a^{(i)}}{a^{(i)}+b^{(i)}/2}\n", - "$$\n", - "\n", - "where the vector multiplication and division $ab, a/b$ is defined in Banach algebra, and the vectors $a^{(i)}, b^{(i)}$ are defined as\n", - "\n", - "$$\n", - "J(x^{(i)})a^{(i)} = -f(x^{(i)})\n", - "$$\n", - "\n", - "and\n", - "\n", - "$$\n", - "J(x^{(i)})b^{(i)} = H(x^{(i)})a^{(i)}a^{(i)}\n", - "$$\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The full Jacobian (a matrix) and full Hessian (a 3-tensor) representation can be avoided by using forward-mode automatic differentiation. It is well known that a forward evaluation on a dual number $(x, v)$ gives the Jacobian-vector product,\n", - "\n", - "$$\n", - "f(x,v)=(f(x),Jv)\n", - "$$\n", - "\n", - "and similarly a forward evaluation on a second order Taylor expansion gives the Hessian-vector-vector product,\n", - "\n", - "$$\n", - "f(x,v,0)=f(x,Jv,Hvv)\n", - "$$\n", - "\n", - "Below, we demonstrate this possibility with TaylorDiff.jl." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Jacobian-free Newton Krylov\n", - "\n", - "To get started we first get familiar with the JFNK:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "newton (generic function with 1 method)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# The Jacobi- and Hessian-free Halley method for solving nonlinear equations\n", - "\n", - "using TaylorDiff\n", - "using LinearAlgebra\n", - "using LinearSolve\n", - "\n", - "function newton(f, x0, p; tol=1e-10, maxiter=100)\n", - " x = x0\n", - " for i in 1:maxiter\n", - " fx = f(x, p)\n", - " error = norm(fx)\n", - " println(\"Iteration $i: x = $x, f(x) = $fx, error = $error\")\n", - " if error < tol\n", - " return x\n", - " end\n", - " get_derivative = (v, u, a, b) -> v .= derivative(x -> f(x, p), x, u, 1)\n", - " operator = FunctionOperator(get_derivative, similar(x), similar(x))\n", - " problem = LinearProblem(operator, -fx)\n", - " sol = solve(problem, KrylovJL_GMRES())\n", - " x += sol.u\n", - " end\n", - " return x\n", - "end" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Jacobian- and Hessian-free Halley\n", - "\n", - "This naturally follows, only difference is replacing the rhs by Hessian-vector-vector product:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "halley (generic function with 1 method)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "function halley(f, x0, p; tol=1e-10, maxiter=100)\n", - " x = x0\n", - " for i in 1:maxiter\n", - " fx = f(x, p)\n", - " error = norm(fx)\n", - " println(\"Iteration $i: x = $x, f(x) = $fx, error = $error\")\n", - " if error < tol\n", - " return x\n", - " end\n", - " get_derivative = (v, u, a, b) -> v .= derivative(x -> f(x, p), x, u, 1)\n", - " operator = FunctionOperator(get_derivative, similar(x), similar(x))\n", - " problem = LinearProblem(operator, -fx)\n", - " a = solve(problem, KrylovJL_GMRES()).u\n", - " Haa = derivative(x -> f(x, p), x, a, 2)\n", - " problem2 = LinearProblem(operator, Haa)\n", - " b = solve(problem2, KrylovJL_GMRES()).u\n", - " x += (a .* a) ./ (a .+ b ./ 2)\n", - " end\n", - " return x\n", - "end" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "f (generic function with 1 method)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Testing with simple examples:\n", - "\n", - "f(x, p) = x .* x - p" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration 1: x = [1.0, 1.0], f(x) = [-1.0, -1.0], error = 1.4142135623730951\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration 2: x = [1.5, 1.5], f(x) = [0.25, 0.25], error = 0.3535533905932738\n", - "Iteration 3: x = [1.4166666666666667, 1.4166666666666667], f(x) = [0.006944444444444642, 0.006944444444444642], error = 0.009820927516480105\n", - "Iteration 4: x = [1.4142156862745099, 1.4142156862745099], f(x) = [6.007304882871267e-6, 6.007304882871267e-6], error = 8.495612038666664e-6\n", - "Iteration 5: x = [1.4142135623746899, 1.4142135623746899], f(x) = [4.510614104447086e-12, 4.510614104447086e-12], error = 6.378971641140442e-12\n" - ] - }, - { - "data": { - "text/plain": [ - "2-element Vector{Float64}:\n", - " 1.4142135623746899\n", - " 1.4142135623746899" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "newton(f, [1., 1.], [2., 2.])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration 1: x = [1.0, 1.0], f(x) = [-1.0, -1.0], error = 1.4142135623730951\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Iteration 2: x = [1.4000000000000001, 1.4000000000000001], f(x) = [-0.03999999999999959, -0.03999999999999959], error = 0.05656854249492323\n", - "Iteration 3: x = [1.4142131979695431, 1.4142131979695431], f(x) = [-1.0306887576749801e-6, -1.0306887576749801e-6], error = 1.4576140196894333e-6\n", - "Iteration 4: x = [1.414213562373142, 1.414213562373142], f(x) = [1.3278267374516872e-13, 1.3278267374516872e-13], error = 1.877830580585795e-13\n" - ] - }, - { - "data": { - "text/plain": [ - "2-element Vector{Float64}:\n", - " 1.414213562373142\n", - " 1.414213562373142" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "halley(f, [1., 1.], [2., 2.])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Julia 1.10.1", - "language": "julia", - "name": "julia-1.10" - }, - "language_info": { - "file_extension": ".jl", - "mimetype": "application/julia", - "name": "julia", - "version": "1.10.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/halley.jl b/examples/halley.jl deleted file mode 100644 index 9b9d054..0000000 --- a/examples/halley.jl +++ /dev/null @@ -1,46 +0,0 @@ -# The Jacobi- and Hessian-free Halley method for solving nonlinear equations - -using TaylorDiff -using LinearAlgebra -using LinearSolve - -function newton(f, x0, p; tol = 1e-10, maxiter = 100) - x = x0 - for i in 1:maxiter - fx = f(x, p) - error = norm(fx) - println("Iteration $i: x = $x, f(x) = $fx, error = $error") - if error < tol - return x - end - get_derivative = (v, u, a, b) -> v .= derivative(x -> f(x, p), x, u, 1) - operator = FunctionOperator(get_derivative, similar(x), similar(x)) - problem = LinearProblem(operator, -fx) - sol = solve(problem, KrylovJL_GMRES()) - x += sol.u - end - return x -end - -function halley(f, x0, p; tol = 1e-10, maxiter = 100) - x = x0 - for i in 1:maxiter - fx = f(x, p) - error = norm(fx) - println("Iteration $i: x = $x, f(x) = $fx, error = $error") - if error < tol - return x - end - get_derivative = (v, u, a, b) -> v .= derivative(x -> f(x, p), x, u, 1) - operator = FunctionOperator(get_derivative, similar(x), similar(x)) - problem = LinearProblem(operator, -fx) - a = solve(problem, KrylovJL_GMRES()).u - Haa = derivative(x -> f(x, p), x, a, 2) - problem2 = LinearProblem(operator, Haa) - b = solve(problem2, KrylovJL_GMRES()).u - x += (a .* a) ./ (a .+ b ./ 2) - end - return x -end - -f(x, p) = x .* x - p diff --git a/examples/integration.jl b/examples/integration.jl index 25c09b6..6908480 100644 --- a/examples/integration.jl +++ b/examples/integration.jl @@ -1,4 +1,4 @@ -using TaylorDiff +using TaylorDiff: TaylorDiff, TaylorScalar, TaylorArray, make_seed, value, partials, flatten using TaylorSeries using TaylorIntegration: jetcoeffs! using BenchmarkTools @@ -6,78 +6,71 @@ using BenchmarkTools """ No magic, just a type-stable way to generate a new object since TaylorScalar is immutable """ -function update_coefficient(x::TaylorScalar{T, N}, index::Integer, value::T) where {T, N} - return TaylorScalar(ntuple(i -> (i == index ? value : x.value[i]), Val{N}())) +function setindex(x::TaylorScalar{T, P}, index, d) where {T, P} + v = flatten(x) + ntuple(i -> i == index + 1 ? d : v[i], Val(P + 1)) |> TaylorScalar end +function setindex(x::TaylorArray{T, N, A, P}, index, d) where {T, N, A, P} + v = flatten(x) + ntuple(i -> i == index + 1 ? d : v[i], Val(P + 1)) |> TaylorArray +end + + """ -Computes the taylor integration of order N - 1, i.e. N = order + 1 +Computes the taylor integration of order P -eqsdiff: RHS -t: constructed by TaylorScalar{T, N}(t0, 1), which means unit perturbation -x0: initial value +- `f`: ODE function +- `t`: constructed by TaylorScalar{P}(t0, one(t0)) +- `x0`: initial value +- `p`: parameters """ -function jetcoeffs_taylordiff(eqsdiff::Function, t::TaylorScalar{T, N}, x0::U, - params) where - {T <: Real, U <: Number, N} - x = TaylorScalar{U, N}(x0) # x.values[1] is defined, others are 0 - for index in 1:(N - 1) # computes x.values[index + 1] - f = eqsdiff(x, params, t) - df = TaylorDiff.extract_derivative(f, index) - x = update_coefficient(x, index + 1, df) +function my_jetcoeffs(f, t::TaylorScalar{T, P}, x0, p) where {T, P} + x = x0 isa AbstractArray ? TaylorArray{P}(x0) : TaylorScalar{P}(x0) + for index in 1:P # computes x.partials[index] + fx = f(x, p, t) + d = index == 1 ? value(fx) : partials(fx)[index - 1] / index + x = setindex(x, index, d) end x end """ -Computes the taylor integration of order N - 1, i.e. N = order + 1 +Computes the taylor integration of order P -eqsdiff!: RHS, in non-allocation form -t: constructed by TaylorScalar{T, N}(t0, 1), which means unit perturbation -x0: initial value +- `f!`: ODE function, in non-allocating form +- `t`: constructed by TaylorScalar{P}(t0, one(t0)) +- `x0`: initial value +- `p`: parameters """ -function jetcoeffs_array_taylordiff( - eqsdiff!::Function, t::TaylorScalar{T, N}, x0::AbstractArray{U, D}, - params) where - {T <: Real, U <: Number, N, D} - x = map(TaylorScalar{U, N}, x0) # x.values[1] is defined, others are 0 - f = similar(x) - for index in 1:(N - 1) # computes x.values[index + 1] - eqsdiff!(f, x, params, t) - df = TaylorDiff.extract_derivative.(f, index) - x = update_coefficient.(x, index + 1, df) +function my_jetcoeffs!(f!, t::TaylorScalar{T, P}, x0, p) where {T, P} + x = x0 isa AbstractArray ? TaylorArray{P}(x0) : TaylorScalar{P}(x0) + out = similar(x) + for index in 1:P # computes x.partials[index] + f!(out, x, p, t) + d = index == 1 ? value(out) : partials(out)[index - 1] / index + x = setindex(x, index, d) end x end -""" -In TaylorDiff.jl, the polynomial coefficients are just the n-th order derivatives, -not normalized by n!. So to compare with TaylorSeries.jl, one need to normalize -""" -function normalize_taylordiff_coeffs(t::TaylorScalar) - return [x / factorial(i - 1) for (i, x) in enumerate(t.value)] -end - function scalar_test() - rhs(x, p, t) = x * x - + f(x, p, t) = x * x x0 = 0.1 t0 = 0.0 - order = 6 - N = 7 # N = order + 1 + P = 6 # TaylorIntegration test - t = t0 + Taylor1(typeof(t0), order) - x = Taylor1(x0, order) - @btime jetcoeffs!($rhs, $t, $x, nothing) + t = t0 + Taylor1(typeof(t0), P) + x = Taylor1(x0, P) + @btime jetcoeffs!($f, $t, $x, nothing) # TaylorDiff test - td = TaylorScalar{typeof(t0), N}(t0, one(t0)) - @btime jetcoeffs_taylordiff($rhs, $td, $x0, nothing) + td = TaylorScalar{P}(t0, one(t0)) + @btime my_jetcoeffs($f, $td, $x0, nothing) - result = jetcoeffs_taylordiff(rhs, td, x0, nothing) - normalized = normalize_taylordiff_coeffs(result) - @assert x.coeffs ≈ normalized + result = my_jetcoeffs(f, td, x0, nothing) + @assert x.coeffs ≈ collect(flatten(result)) end function array_test() @@ -89,21 +82,20 @@ function array_test() end u0 = [1.0; 0.0; 0.0] t0 = 0.0 - order = 6 - N = 7 + P = 6 + # TaylorIntegration test - t = t0 + Taylor1(typeof(t0), order) - u = [Taylor1(x, order) for x in u0] + t = t0 + Taylor1(typeof(t0), P) + u = [Taylor1(x, P) for x in u0] du = similar(u) uaux = similar(u) @btime jetcoeffs!($lorenz, $t, $u, $du, $uaux, nothing) # TaylorDiff test - td = TaylorScalar{typeof(t0), N}(t0, one(t0)) - @btime jetcoeffs_array_taylordiff($lorenz, $td, $u0, nothing) - result = jetcoeffs_array_taylordiff(lorenz, td, u0, nothing) - normalized = normalize_taylordiff_coeffs.(result) + td = TaylorScalar{P}(t0, one(t0)) + @btime my_jetcoeffs!($lorenz, $td, $u0, nothing) + result = my_jetcoeffs!(lorenz, td, u0, nothing) for i in eachindex(u) - @assert u[i].coeffs ≈ normalized[i] + @assert u[i].coeffs ≈ collect(flatten(result[i])) end end diff --git a/src/array.jl b/src/array.jl index 504ec1f..4a69b2d 100644 --- a/src/array.jl +++ b/src/array.jl @@ -23,6 +23,8 @@ struct TaylorArray{T, N, A <: AbstractArray{T, N}, P} <: end end +TaylorArray(all::NTuple{P, A}) where {A, P} = TaylorArray(all[1], all[2:end]) + function TaylorArray{P}(value::A) where {A <: AbstractArray, P} TaylorArray(value, ntuple(i -> broadcast(zero, value), Val(P))) end