From 189c1ea8797eb0830be1cfb825f4151544588ebf Mon Sep 17 00:00:00 2001 From: Vasily Pisarev <50239959+vvpisarev@users.noreply.github.com> Date: Mon, 20 Dec 2021 22:43:46 +0300 Subject: [PATCH] Rename to Downhill.jl (#22) * rename to Downhill.jl * Prepare to rename and release --- .github/workflows/TagBot.yml | 15 +++++ .gitignore | 2 +- Project.toml | 8 +-- README.md | 2 +- docs/make.jl | 13 ++-- docs/src/functions.md | 6 +- docs/src/index.md | 35 +++++++++- examples/rosenbrock.jl | 6 +- src/{DescentMethods.jl => Downhill.jl} | 5 +- src/abstract_types.jl | 8 +-- src/chol_bfgs.jl | 8 +-- src/conjgrad.jl | 13 ++-- src/grad_descent.jl | 8 +-- src/hypergradient.jl | 8 +-- src/optimize.jl | 89 ++++++++++++++++---------- src/quasinewton.jl | 8 +-- src/rate_descent.jl | 24 +++---- src/wrappers.jl | 73 +++++++++++---------- test/convergence.jl | 2 +- test/interface.jl | 4 +- test/linesearch.jl | 2 +- test/runtests.jl | 4 +- 22 files changed, 206 insertions(+), 137 deletions(-) create mode 100644 .github/workflows/TagBot.yml rename src/{DescentMethods.jl => Downhill.jl} (88%) diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..623860f --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index c88f519..d8c2b5f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ *.mem *.cov docs/build -Manifest.toml +Manifest*.toml diff --git a/Project.toml b/Project.toml index 4cbc219..2f99983 100644 --- a/Project.toml +++ b/Project.toml @@ -1,12 +1,10 @@ -name = "DescentMethods" -uuid = "f4becde8-b16e-4b5a-8f91-16ef0c22c8bc" -authors = ["Vasily "] +name = "Downhill" +uuid = "a4c28711-7027-4a57-8564-74545b4697a4" version = "0.1.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] -julia = ">= 1.6" \ No newline at end of file +julia = ">= 1.6" diff --git a/README.md b/README.md index e9ef723..c633025 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DescentMethods.jl +# Downhill.jl A collection of descent-based optimization methods. diff --git a/docs/make.jl b/docs/make.jl index 8474e28..c809654 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,19 +1,22 @@ push!(LOAD_PATH,"../") using Documenter -using DescentMethods +using Downhill makedocs( ; - sitename="Documentation", - modules=[DescentMethods], + sitename="Downhill.jl documentation", + modules=[Downhill], pages = [ "index.md", "Optimization Methods" => "core_types.md", "Basic Functions" => "functions.md", - ] + ], + format = Documenter.HTML( + prettyurls = get(ENV, "CI", nothing) == "true" + ) ) deploydocs( - repo = "github.com/vvpisarev/DescentMethods.jl.git", + repo = "github.com/vvpisarev/Downhill.jl.git", ) diff --git a/docs/src/functions.md b/docs/src/functions.md index 39cb610..fce8b7d 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -1,13 +1,13 @@ # Basic Functions ```@meta -CurrentModule = DescentMethods +CurrentModule = Downhill ``` ```@docs +optimize + optimize! solver - -reset! ``` diff --git a/docs/src/index.md b/docs/src/index.md index 21c6630..2d2c39a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,5 +1,36 @@ -# DescentMethods.jl +# Downhill.jl ```@meta -CurrentModule = DescentMethods +CurrentModule = Downhill ``` + +A collection of descent-based optimization methods. + +The package is meant to be used for small-scale optimization problems. +The use case is the problems where an optimization is some intermediate step +that has to be run repeatedly. + +## Basic usage + +```julia +julia> function rosenbrock!(x::AbstractVector, g::AbstractVector; b=100) + f = zero(eltype(g)) + fill!(g, 0) + inds = eachindex(x, g) + for i in 2:last(inds) + f += (1 - x[i-1])^2 + b * (x[i] - x[i-1]^2)^2 + g[i-1] += 2 * (x[i-1] - 1) + 4 * b * x[i-1] * (x[i-1]^2 - x[i]) + g[i] += 2 * b * (x[i] - x[i-1]^2) + end + return f, g + end + +julia> let x0 = zeros(2) + opt = BFGS(x0) + optresult = optimize!(rosenbrock!, opt, x0) + optresult.argument + end +2-element Vector{Float64}: + 0.9999999998907124 + 0.9999999998080589 +``` \ No newline at end of file diff --git a/examples/rosenbrock.jl b/examples/rosenbrock.jl index 997b186..e606ace 100644 --- a/examples/rosenbrock.jl +++ b/examples/rosenbrock.jl @@ -1,4 +1,4 @@ -using DescentMethods +using Downhill """ rosenbrock(x; b=2) @@ -58,7 +58,7 @@ ros!(x, g) = rosenbrock!(x, g) ans_nonmutating = let x0 = zeros(2) opt = BFGS(x0) - optresult = optimize!(opt, ros, x0; maxiter=1000, log_stream=tempname(cleanup=false), verbosity=2) + optresult = optimize!(ros, opt, x0; maxiter=1000) println(""" ==Nonmutating gradient evaluation== @@ -73,7 +73,7 @@ end ans_mutating = let x0 = zeros(2) opt = BFGS(x0) - optresult = optimize!(opt, ros!, x0; maxiter=1000) + optresult = optimize!(ros!, opt, x0; maxiter=1000) println(""" ==Mutating gradient evaluation== diff --git a/src/DescentMethods.jl b/src/Downhill.jl similarity index 88% rename from src/DescentMethods.jl rename to src/Downhill.jl index 1a3d2f5..934f55c 100644 --- a/src/DescentMethods.jl +++ b/src/Downhill.jl @@ -1,7 +1,6 @@ -module DescentMethods +module Downhill using LinearAlgebra -using Printf: Format, format using Logging using Logging: Info, default_logcolor, default_metafmt @@ -9,7 +8,7 @@ export FixedRateDescent, MomentumDescent, NesterovMomentum export SteepestDescent, HyperGradDescent, CGDescent export BFGS, CholBFGS -export optimize! +export optimize!, optimize include("utils.jl") include("abstract_types.jl") diff --git a/src/abstract_types.jl b/src/abstract_types.jl index 67a25bb..54a166b 100644 --- a/src/abstract_types.jl +++ b/src/abstract_types.jl @@ -1,10 +1,10 @@ -export DescentMethod, CoreMethod, Wrapper +export AbstractOptBuffer, OptBuffer, Wrapper -abstract type DescentMethod end +abstract type AbstractOptBuffer end -abstract type CoreMethod <: DescentMethod end +abstract type OptBuffer <: AbstractOptBuffer end -abstract type Wrapper <: DescentMethod end +abstract type Wrapper <: AbstractOptBuffer end const OptLogLevel = LogLevel(-10) const LSLogLevel = LogLevel(-20) diff --git a/src/chol_bfgs.jl b/src/chol_bfgs.jl index 2bc2764..3a57590 100644 --- a/src/chol_bfgs.jl +++ b/src/chol_bfgs.jl @@ -4,7 +4,7 @@ Quasi-Newton descent method. """ mutable struct CholBFGS{T<:AbstractFloat, V<:AbstractVector{T}, - C<:Cholesky{T}} <: CoreMethod + C<:Cholesky{T}} <: OptBuffer hess::C x::V g::V @@ -43,7 +43,7 @@ function CholBFGS(x::AbstractVector{T}) where {T} end function init!( - M::CholBFGS{T}, optfn!, x0; + optfn!, M::CholBFGS{T}, x0; reset, constrain_step = infstep ) where {T} optfn!(x0, zero(T), x0) @@ -97,7 +97,7 @@ function reset!(M::CholBFGS, x0, init_H::AbstractMatrix) return end -function callfn!(M::CholBFGS, fdf::F, x, α, d) where {F} +function callfn!(fdf::F, M::CholBFGS, x, α, d) where {F} __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) @@ -113,7 +113,7 @@ function __descent_dir!(M::CholBFGS) return M.d end -function step!(M::CholBFGS, optfn!::F; constrain_step=infstep) where {F} +function step!(optfn!::F, M::CholBFGS; constrain_step=infstep) where {F} #= argument and gradient from the end of the last iteration are stored into `xpre` and `gpre` diff --git a/src/conjgrad.jl b/src/conjgrad.jl index dc2e369..29fdc03 100644 --- a/src/conjgrad.jl +++ b/src/conjgrad.jl @@ -3,7 +3,7 @@ Conjugate gradient method (Hager-Zhang version [W.Hager, H.Zhang // SIAM J. Optim (2006) Vol. 16, pp. 170-192]) """ -mutable struct CGDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct CGDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V xpre::V g::V @@ -52,7 +52,7 @@ function __descent_dir!(M::CGDescent) return d end -function init!(M::CGDescent{T}, optfn!, x0; reset, constrain_step = infstep) where {T} +function init!(optfn!, M::CGDescent{T}, x0; reset, constrain_step = infstep) where {T} y, g = optfn!(x0, zero(T), x0) __update_gpre!(M, M.g) map!(-, M.dir, M.g) @@ -71,7 +71,7 @@ function reset!(M::CGDescent, x0) return end -function callfn!(M::CGDescent, fdf, x, α, d) +function callfn!(fdf, M::CGDescent, x, α, d) __update_arg!(M, x, α, d) let x = argumentvec(M) (y, g) = fdf(x, gradientvec(M)) @@ -81,7 +81,7 @@ function callfn!(M::CGDescent, fdf, x, α, d) end end -function step!(M::CGDescent, optfn!; constrain_step = infstep) +function step!(optfn!, M::CGDescent; constrain_step = infstep) M.x, M.xpre = M.xpre, M.x map!(-, M.gdiff, M.g, M.gpre) @@ -90,7 +90,10 @@ function step!(M::CGDescent, optfn!; constrain_step = infstep) M.g, M.gpre = M.gpre, M.g ypre = M.y maxstep = constrain_step(xpre, d) - α = strong_backtracking!(optfn!, xpre, d, ypre, M.gpre, α = M.α, αmax = maxstep, β = 0.01, σ = 0.1) + α = strong_backtracking!( + optfn!, xpre, d, ypre, M.gpre; + α = M.α, αmax = maxstep, β = 0.01, σ = 0.1 + ) fdiff = M.y - ypre if fdiff < 0 M.α = 2 * fdiff / dot(d, M.gpre) diff --git a/src/grad_descent.jl b/src/grad_descent.jl index f38dc24..e43f5eb 100644 --- a/src/grad_descent.jl +++ b/src/grad_descent.jl @@ -4,7 +4,7 @@ Descent method which minimizes the objective function in the direction of antigradient at each step. """ -mutable struct SteepestDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct SteepestDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V xpre::V g::V @@ -28,7 +28,7 @@ SteepestDescent(x::AbstractVector) = SteepestDescent(x, 1) """ `optfn!` must be the 3-arg closure that computes fdf(x + α*d) and overwrites `M`'s gradient """ -function init!(::SteepestDescent{T}, optfn!, x0; kw...) where {T} +function init!(optfn!, ::SteepestDescent{T}, x0; kw...) where {T} optfn!(x0, zero(T), x0) return end @@ -45,7 +45,7 @@ function reset!(M::SteepestDescent, x0, α = M.α) return end -function callfn!(M::SteepestDescent, fdf, x, α, d) +function callfn!(fdf, M::SteepestDescent, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) @@ -58,7 +58,7 @@ end return M.dir end -function step!(M::SteepestDescent, optfn!; constrain_step = infstep) +function step!(optfn!, M::SteepestDescent; constrain_step = infstep) M.x, M.xpre = M.xpre, M.x d = __descent_dir!(M) xpre = M.xpre diff --git a/src/hypergradient.jl b/src/hypergradient.jl index 3b7c30f..f3976e2 100644 --- a/src/hypergradient.jl +++ b/src/hypergradient.jl @@ -4,7 +4,7 @@ Descent method which minimizes the objective function in the direction of antigradient at each step. """ -mutable struct HyperGradDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct HyperGradDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V g::V gpre::V @@ -29,7 +29,7 @@ end HyperGradDescent(x::AbstractVector{T}) where {T} = HyperGradDescent(x, 0, 1e-4) -function init!(M::HyperGradDescent{T}, optfn!, x0; reset, kw...) where {T} +function init!(optfn!, M::HyperGradDescent{T}, x0; reset, kw...) where {T} reset != false && reset!(M, x0) optfn!(x0, zero(T), x0) fill!(M.gpre, false) @@ -52,14 +52,14 @@ function reset!(M::HyperGradDescent{T}, x0, α = zero(T), μ = M.μ) where {T} return M end -@inline function callfn!(M::HyperGradDescent, fdf, x, α, d) +@inline function callfn!(fdf, M::HyperGradDescent, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) return y, g end -function step!(M::HyperGradDescent, optfn!; constrain_step = infstep) +function step!(optfn!, M::HyperGradDescent; constrain_step = infstep) M.gpre, M.g = M.g, M.gpre M.α += abs(M.μ * dot(M.g, M.gpre)) d = rmul!(M.gpre, -1) diff --git a/src/optimize.jl b/src/optimize.jl index 3688012..e18d884 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,16 +1,16 @@ """ - optimize!(M::Wrapper, fdf, x0) + optimize!(fdf, M::Wrapper, x0) Find an optimizer for `fdf`, starting with the initial approximation `x0`. `fdf(x, g)` must return a tuple (f(x), ∇f(x)) and, if `g` is mutable, overwrite it with the gradient. """ -function optimize!(M::Wrapper, fdf, x0; reset=true, log_stream=nothing, verbosity=1) - return __optim_impl!(M, fdf, x0, log_stream; reset, verbosity) +function optimize!(fdf, M::Wrapper, x0; reset=true, tracking=nothing, verbosity=0) + return __optim_impl!(fdf, M, x0, tracking; reset, verbosity) end -function __optim_impl!(M::Wrapper, fdf, x0, logger::AbstractLogger; reset=true) - optfn! = OptFunc(M, fdf) +function __optim_impl!(fdf, M::Wrapper, x0, logger::AbstractLogger; reset=true) + optfn! = OptFunc(fdf, M) with_logger(logger) do n = length(x0) @@ -22,62 +22,63 @@ function __optim_impl!(M::Wrapper, fdf, x0, logger::AbstractLogger; reset=true) # last $n value$(n == 1 ? "" : "s") - gradient vector """ @logmsg OptLogLevel "==SOLVER INITIALIZATION==" - init!(M, optfn!, x0; reset) + init!(optfn!, M, x0; reset) @logmsg OptLogLevel "==SOLVER INITIALIZED==" for niter in Iterators.countfrom(1) @logmsg OptLogLevel "# Iteration $niter" - step!(M, optfn!) + step!(optfn!, M) stopcond(M) && break end return convstat(M) end end -function __optim_impl!(M::Wrapper, fdf, x0, log_stream::IO; - reset=true, verbosity +function __optim_impl!( + fdf, M::Wrapper, x0, tracking::IO; + reset=true, verbosity, kw... ) logger = ConsoleLogger( - log_stream, + tracking, LogLevel(-10 * verbosity); meta_formatter=metafmt_noprefix_nosuffix, show_limited=false, ) - __optim_impl!(M, fdf, x0, logger; reset) + __optim_impl!(fdf, M, x0, logger; reset) end function __optim_impl!( - M::Wrapper, fdf, x0, log_stream::AbstractString; + fdf, M::Wrapper, x0, tracking::AbstractString; reset=true, verbosity::Integer ) - open(log_stream, "w") do log_io + open(tracking, "w") do log_io logger = ConsoleLogger( log_io, LogLevel(-10 * verbosity); meta_formatter=metafmt_noprefix_nosuffix, show_limited=false, ) - return __optim_impl!(M, fdf, x0, logger; reset) + return __optim_impl!(fdf, M, x0, logger; reset) end end -function __optim_impl!(M::Wrapper, fdf, x0, ::Nothing; reset=true, kw...) - __optim_impl!(M, fdf, x0, NullLogger(); reset) +function __optim_impl!(fdf, M::Wrapper, x0, ::Nothing; reset=true, kw...) + __optim_impl!(fdf, M, x0, NullLogger(); reset) end """ optimize!( - M::CoreMethod, fdf, x0; + fdf, M::OptBuffer, x₀; gtol=1e-6, maxiter=100, maxcalls=nothing, reset=true, constrain_step=nothing, - log_stream=nothing, - verbosity=1 + tracking=stdout, + verbosity=0 ) -Find an optimizer for `fdf`, starting with the initial approximation `x0`. +Find an optimizer for `fdf`, starting with the initial approximation `x₀`. # Arguments: -- `M::CoreMethod`: the core method to use for optimization +- `M::OptBuffer`: the core method to use for optimization - `fdf(x, g)::Function`: function to optimize. It must return a tuple (f(x), ∇f(x)) and, if `g` is mutable, overwrite it with the gradient. @@ -92,21 +93,22 @@ Find an optimizer for `fdf`, starting with the initial approximation `x0`. - `reset=true`: a value to pass as a keyword argument to the optimizer `init!` method - `constrain_step(x0, d)`: a function to constrain step from `x0` in the direction `d`. It must return a real-numbered value `α` such that `x0 + αd` is the maximum allowed step -- `log_stream::Union{IO,AbstractString,Nothing}`: IO stream or a file name to log the +- `tracking::Union{Nothing,IO,AbstractString}`: IO stream or a file name to log the optimization process or `nothing` to disable logging (default: `nothing`) -- `verbosity::Integer=1`: verbosity of logging. `1` (default) logs all points of objective - function evaluation with corresponding values and gradients. `2` shows additional - statistics regarding the line search. +- `verbosity::Integer`: verbosity of logging. `0` (default) disables tracking. `1` logs all + points of objective function evaluation with corresponding values and gradients. + `2` shows additional statistics regarding the line search. Option ignored if + `tracking == nothing`. """ function optimize!( - M::CoreMethod, fdf, x0; + fdf, M::OptBuffer, x0; gtol = convert(float(eltype(x0)), 1e-6), maxiter = 100, maxcalls = nothing, reset = true, constrain_step = nothing, - log_stream = nothing, - verbosity::Integer=1, + tracking = nothing, + verbosity::Integer=0, ) if !isnothing(gtol) && gtol > 0 M = StopByGradient(M, gtol) @@ -124,12 +126,31 @@ function optimize!( if !isnothing(constrain_step) M = ConstrainStepSize(M, constrain_step) end - optimize!(M, fdf, x0; reset, log_stream, verbosity) + optimize!(fdf, M, x0; reset, tracking, verbosity) +end + +function optimize!(fdf, M::Type{<:OptBuffer}, x0; kw...) + return optimize!(fdf, M(x0), x0; kw...) +end + +""" + optimize( + fdf, x₀; + method, + kw... + ) + +Find an optimizer for `fdf`, starting with the initial approximation `x₀`. + `method` keyword chooses a specific optimization method. See [`optimize!`](@ref) for + the description of other keywords. +""" +function optimize(fdf, x0; method, kw...) + return optimize!(fdf, method, x0; kw...) end """ - DescentMethods.solver( - M::CoreMethod; + Downhill.solver( + M::OptBuffer; gtol = 1e-6, maxiter = 100, maxcalls = nothing, @@ -140,7 +161,7 @@ Return the wrapper object for a chosen method to solve an optimization problem w parameters. For the description of keywords, see [`optimize!`](@ref) """ function solver( - M::CoreMethod; + M::OptBuffer; gtol=convert(eltype(argumentvec(M)), 1e-6), maxiter=100, maxcalls=nothing, @@ -166,13 +187,13 @@ function solver( end """ - DescentMethods.solver( + Downhill.solver( M::DataType, x; gtol=1e-6, maxiter = 100, maxcalls = nothing, constrain_step) Return the wrapper object for a chosen method to solve an optimization problem with given parameters compatible with the dimensions of `x`. """ -function solver(M::Type{<:CoreMethod}, x::AbstractVector; kw...) +function solver(M::Type{<:OptBuffer}, x::AbstractVector; kw...) return solver(M(x); kw...) end diff --git a/src/quasinewton.jl b/src/quasinewton.jl index d10fec7..5984583 100644 --- a/src/quasinewton.jl +++ b/src/quasinewton.jl @@ -5,7 +5,7 @@ Quasi-Newton descent method. """ mutable struct BFGS{T<:AbstractFloat, V<:AbstractVector{T}, - M<:AbstractMatrix{T}} <: CoreMethod + M<:AbstractMatrix{T}} <: OptBuffer invH::M x::V g::V @@ -40,7 +40,7 @@ function BFGS(x::AbstractVector{T}) where {T} end function init!( - M::BFGS{T}, optfn!, x0; + optfn!, M::BFGS{T}, x0; reset, constrain_step = infstep ) where {T} optfn!(x0, zero(T), x0) @@ -87,7 +87,7 @@ function reset!(M::BFGS, x0, scale::Real=1) return M end -@inline function callfn!(M::BFGS, fdf, x, α, d) +@inline function callfn!(fdf, M::BFGS, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) @@ -100,7 +100,7 @@ function __descent_dir!(M::BFGS) return M.d end -function step!(M::BFGS, optfn!::F; constrain_step::S=infstep) where {F,S} +function step!(optfn!::F, M::BFGS; constrain_step::S=infstep) where {F,S} #= argument and gradient from the end of the last iteration are stored into `xpre` and `gpre` diff --git a/src/rate_descent.jl b/src/rate_descent.jl index 83dd917..a9a34d1 100644 --- a/src/rate_descent.jl +++ b/src/rate_descent.jl @@ -4,7 +4,7 @@ Descent method which minimizes the objective function in the direction of antigradient at each step. """ -mutable struct FixedRateDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct FixedRateDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V g::V α::T @@ -21,7 +21,7 @@ end FixedRateDescent(x::AbstractVector) = FixedRateDescent(x, 0.01) -function init!(M::FixedRateDescent{T}, optfn!, x0; kw...) where {T} +function init!(optfn!, M::FixedRateDescent{T}, x0; kw...) where {T} optfn!(x0, zero(T), x0) return end @@ -38,14 +38,14 @@ function reset!(M::FixedRateDescent, x0, α = M.α) return end -@inline function callfn!(M::FixedRateDescent, fdf, x, α, d) +@inline function callfn!(fdf, M::FixedRateDescent, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) return y, g end -function step!(M::FixedRateDescent, optfn!; constrain_step = infstep) +function step!(optfn!, M::FixedRateDescent; constrain_step = infstep) d = rmul!(M.g, -1) maxstep = constrain_step(M.x, d) s = M.α <= maxstep ? M.α : maxstep / 2 @@ -80,7 +80,7 @@ end Descent method which minimizes the objective function in the direction of antigradient at each step. """ -mutable struct MomentumDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct MomentumDescent{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V g::V v::V @@ -106,7 +106,7 @@ function MomentumDescent( ) end -function init!(M::MomentumDescent{T}, optfn!, x0; kw...) where {T} +function init!(optfn!, M::MomentumDescent{T}, x0; kw...) where {T} optfn!(x0, zero(T), x0) fill!(M.v, zero(T)) return M @@ -136,14 +136,14 @@ function reset!( return M end -@inline function callfn!(M::MomentumDescent, fdf, x, α, d) +@inline function callfn!(fdf, M::MomentumDescent, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) return y, g end -function step!(M::MomentumDescent, optfn!; constrain_step = infstep) +function step!(optfn!, M::MomentumDescent; constrain_step = infstep) map!(M.v, M.v, M.g) do v, g M.decay_rate * v - M.learn_rate * g end @@ -182,7 +182,7 @@ end Descent method which minimizes the objective function in the direction of antigradient at each step. """ -mutable struct NesterovMomentum{T<:AbstractFloat,V<:AbstractVector{T}} <: CoreMethod +mutable struct NesterovMomentum{T<:AbstractFloat,V<:AbstractVector{T}} <: OptBuffer x::V g::V v::V @@ -210,7 +210,7 @@ function NesterovMomentum(x::AbstractVector; learn_rate::Real=0.01, decay_rate:: ) end -function init!(M::NesterovMomentum{T}, optfn!, x0; kw...) where {T} +function init!(optfn!, M::NesterovMomentum{T}, x0; kw...) where {T} optfn!(x0, zero(T), x0) fill!(M.v, zero(T)) return M @@ -233,14 +233,14 @@ function reset!(M::NesterovMomentum, x0, learn_rate=M.learn_rate, decay_rate=M.d return M end -@inline function callfn!(M::NesterovMomentum, fdf, x, α, d) +@inline function callfn!(fdf, M::NesterovMomentum, x, α, d) __update_arg!(M, x, α, d) y, g = fdf(M.x, M.g) __update_grad!(M, g) return y, g end -function step!(M::NesterovMomentum, optfn!; constrain_step = infstep) +function step!(optfn!, M::NesterovMomentum; constrain_step = infstep) α, β = M.α, M.β M.v *= β maxstep = constrain_step(M.x, M.v) diff --git a/src/wrappers.jl b/src/wrappers.jl index 65fb2fe..655e728 100644 --- a/src/wrappers.jl +++ b/src/wrappers.jl @@ -1,4 +1,4 @@ -function base_method(::DescentMethod) end +function base_method(::AbstractOptBuffer) end argumentvec(M::Wrapper) = argumentvec(base_method(M)) gradientvec(M::Wrapper) = gradientvec(base_method(M)) @@ -6,19 +6,18 @@ step_origin(M::Wrapper) = step_origin(base_method(M)) __descent_dir!(M::Wrapper) = __descent_dir!(base_method(M)) -@inline callfn!(M::Wrapper, fdf, x, α, d) = callfn!(base_method(M), fdf, x, α, d) +@inline callfn!(fdf, M::Wrapper, x, α, d) = callfn!(fdf, base_method(M), x, α, d) - -function init!(M::Wrapper, args...; kw...) - init!(base_method(M), args...; kw...) +function init!(fdf, M::Wrapper, args...; kw...) + init!(fdf, base_method(M), args...; kw...) return end """ - reset!(M::DescentMethod, args...; kwargs...) + reset!(M::AbstractOptBuffer, args...; kwargs...) -Reset the solver parameters to the default (or to specific value - see the documentation for - the specific types). +Reset the solver parameters to the default (or to specific value -- see the documentation + for the specific types). Each method has to implement a parameter-free `reset!(M)` method. """ @@ -27,19 +26,19 @@ function reset!(M::Wrapper, args...; kw...) return end -step!(M::Wrapper, fn::F; kw...) where {F} = step!(base_method(M), fn; kw...) +step!(fn::F, M::Wrapper; kw...) where {F} = step!(fn, base_method(M); kw...) stopcond(M::Wrapper) = stopcond(base_method(M)) -@inline stopcond(M::CoreMethod) = false +@inline stopcond(M::OptBuffer) = false conv_success(M::Wrapper) = conv_success(base_method(M)) -@inline conv_success(M::CoreMethod) = false +@inline conv_success(M::OptBuffer) = false iter_count(M::Wrapper) = iter_count(base_method(M)) call_count(M::Wrapper) = call_count(base_method(M)) -iter_count(M::CoreMethod) = -1 -call_count(M::CoreMethod) = -1 +iter_count(M::OptBuffer) = -1 +call_count(M::OptBuffer) = -1 """ convstat(M::Wrapper) @@ -78,7 +77,7 @@ end Wrapper type to stop optimization once the magnitude of gradient is less than the specified value. """ -struct StopByGradient{T<:DescentMethod, F} <: Wrapper +struct StopByGradient{T<:AbstractOptBuffer, F} <: Wrapper descent::T gtol::F end @@ -101,20 +100,20 @@ end Wrapper type to stop optimization once the number of the objective function calls exceeds the specified value. """ -mutable struct LimitCalls{T<:DescentMethod}<:Wrapper +mutable struct LimitCalls{T<:AbstractOptBuffer}<:Wrapper descent::T call_limit::Int call_count::Int end -LimitCalls(M::DescentMethod) = LimitCalls(M, typemax(Int), 0) -LimitCalls(M::DescentMethod, maxcalls::Integer) = LimitCalls(M, maxcalls, 0) +LimitCalls(M::AbstractOptBuffer) = LimitCalls(M, typemax(Int), 0) +LimitCalls(M::AbstractOptBuffer, maxcalls::Integer) = LimitCalls(M, maxcalls, 0) base_method(M::LimitCalls) = M.descent -function init!(M::LimitCalls, args...; kw...) +function init!(fdf, M::LimitCalls, args...; kw...) M.call_count = 0 - init!(M.descent, args...; kw...) + init!(fdf, M.descent, args...; kw...) return end @@ -125,8 +124,8 @@ function reset!(M::LimitCalls, args...; call_limit, kw...) return end -function callfn!(M::LimitCalls, fdf::F, x, α, d) where {F} - fg = callfn!(M.descent, fdf, x, α, d) +function callfn!(fdf::F, M::LimitCalls, x, α, d) where {F} + fg = callfn!(fdf, M.descent, x, α, d) M.call_count += 1 return fg end @@ -140,20 +139,20 @@ call_count(M::LimitCalls) = M.call_count Wrapper type to stop optimization once the number of the optimization iterations exceeds the specified value. """ -mutable struct LimitIters{T<:DescentMethod}<:Wrapper +mutable struct LimitIters{T<:AbstractOptBuffer}<:Wrapper descent::T iter_limit::Int iter_count::Int end -LimitIters(M::DescentMethod) = LimitIters(M, typemax(Int), 0) -LimitIters(M::DescentMethod, maxiters::Integer) = LimitIters(M, maxiters, 0) +LimitIters(M::AbstractOptBuffer) = LimitIters(M, typemax(Int), 0) +LimitIters(M::AbstractOptBuffer, maxiters::Integer) = LimitIters(M, maxiters, 0) base_method(M::LimitIters) = M.descent -function init!(M::LimitIters, args...; kw...) +function init!(fdf, M::LimitIters, args...; kw...) M.iter_count = 0 - init!(M.descent, args...; kw...) + init!(fdf, M.descent, args...; kw...) return end @@ -165,8 +164,8 @@ function reset!(M::LimitIters, args...; iter_limit=M.iter_limit, kw...) end -function step!(M::LimitIters, fdf::F; kw...) where {F} - s = step!(M.descent, fdf; kw...) +function step!(fdf::F, M::LimitIters; kw...) where {F} + s = step!(fdf, M.descent; kw...) M.iter_count += 1 return s end @@ -181,29 +180,29 @@ stopcond(M::LimitIters) = M.iter_count < M.iter_limit ? stopcond(M.descent) : tr Wrapper type to limit step sizes attempted in optimization, given a function `(origin, direction) -> max_step`. """ -struct ConstrainStepSize{T<:DescentMethod, F} <: Wrapper - descent::T +struct ConstrainStepSize{F, T<:AbstractOptBuffer} <: Wrapper constraint::F + descent::T end -ConstrainStepSize(M::DescentMethod) = ConstrainStepSize(M, infstep) +ConstrainStepSize(M::AbstractOptBuffer) = ConstrainStepSize(infstep, M) base_method(M::ConstrainStepSize) = M.descent -function init!(M::ConstrainStepSize, args...; kw...) - init!(M.descent, args...; constrain_step=M.constraint, kw...) +function init!(fdf, M::ConstrainStepSize, args...; kw...) + init!(fdf, M.descent, args...; constrain_step=M.constraint, kw...) return end -step!(M::ConstrainStepSize, optfn!) = step!(M.descent, optfn!, constrain_step=M.constraint) +step!(optfn!, M::ConstrainStepSize) = step!(optfn!, M.descent; constrain_step=M.constraint) -struct OptFunc{M<:DescentMethod,F<:Base.Callable}<:Function - method::M +struct OptFunc{F<:Base.Callable,M<:AbstractOptBuffer}<:Function fdf::F + buffer::M end function (optfn::OptFunc)(x, α, d) - y, g = callfn!(optfn.method, optfn.fdf, x, α, d) + y, g = callfn!(optfn.fdf, optfn.buffer, x, α, d) @logmsg OptLogLevel "$(join(x .+ α .* d, ' ')) $y $(join(g, ' '))" y, g end diff --git a/test/convergence.jl b/test/convergence.jl index 81484b2..89c8f00 100644 --- a/test/convergence.jl +++ b/test/convergence.jl @@ -3,7 +3,7 @@ x0 = fill(-1, dim) @testset for method in OPT_TYPES opt = method(x0) - optresult = optimize!(opt, rosenbrock!, x0; maxiter=1000) + optresult = optimize!(rosenbrock!, opt, x0; maxiter=1000) @test isapprox(optresult.argument, [1, 1], rtol=0.05) end end diff --git a/test/interface.jl b/test/interface.jl index d3307e4..5053ffc 100644 --- a/test/interface.jl +++ b/test/interface.jl @@ -22,13 +22,13 @@ end @testset "Solvers" begin @testset "$(typeof(descent).name)" for descent in descent_methods - opt = DescentMethods.solver( + opt = Downhill.solver( descent; gtol=1e-3, maxiter=100, maxcalls=1000, ) - @test optimize!(opt, rosenbrock!, init_vec) isa NamedTuple + @test optimize!(rosenbrock!, opt, init_vec) isa NamedTuple end end end diff --git a/test/linesearch.jl b/test/linesearch.jl index 9f556ed..c954d68 100644 --- a/test/linesearch.jl +++ b/test/linesearch.jl @@ -52,7 +52,7 @@ function convex_test(fdf, x0, Q, d=nothing) d = - 0.01 * grad0 # for greater values of d ⟹ α ≉ α₀ end - α = DescentMethods.strong_backtracking!(fdf, x0, d, y0, grad0) + α = Downhill.strong_backtracking!(fdf, x0, d, y0, grad0) "Exact minimizer of convex quadratic function. See Nocedal p. 56, eq. 3.55." α₀ = - (grad0' * d) / (d' * Q * d) diff --git a/test/runtests.jl b/test/runtests.jl index d809bff..9fa2808 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using Test -using DescentMethods -using DescentMethods: argumentvec, gradientvec, step_origin, mcholesky! +using Downhill +using Downhill: argumentvec, gradientvec, step_origin, mcholesky! using LinearAlgebra