diff --git a/Project.toml b/Project.toml index 96b0a3822..642dfd8c2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Pathfinder" uuid = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454" authors = ["Seth Axen and contributors"] -version = "0.9.0-DEV" +version = "0.9.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -41,8 +41,8 @@ ADTypes = "0.2, 1" Accessors = "0.1.12" Distributions = "0.25.87" DynamicHMC = "3.4.0" -DynamicPPL = "0.24.7, 0.25, 0.27" -Folds = "0.2.2" +DynamicPPL = "0.25.2, 0.27" +Folds = "0.2.9" ForwardDiff = "0.10.19" IrrationalConstants = "0.1.1, 0.2" LinearAlgebra = "1.6" @@ -61,8 +61,8 @@ ReverseDiff = "1.4.5" SciMLBase = "1.95.0, 2" Statistics = "1.6" StatsBase = "0.33.7, 0.34" -Transducers = "0.4.66" -Turing = "0.30.5, 0.31, 0.32" +Transducers = "0.4.81" +Turing = "0.31.4, 0.32, 0.33" UnPack = "1" julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index d2a6cfc76..1bd3cf4d5 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -30,4 +30,4 @@ StatsFuns = "1" StatsPlots = "0.14.21, 0.15" TransformVariables = "0.6.2, 0.7, 0.8" TransformedLogDensities = "1.0.2" -Turing = "0.30.5, 0.31, 0.32" +Turing = "0.31.4, 0.32, 0.33" diff --git a/ext/PathfinderTuringExt.jl b/ext/PathfinderTuringExt.jl index 6bcac38a5..d959f5ec2 100644 --- a/ext/PathfinderTuringExt.jl +++ b/ext/PathfinderTuringExt.jl @@ -2,149 +2,86 @@ module PathfinderTuringExt if isdefined(Base, :get_extension) using Accessors: Accessors - using ADTypes: ADTypes using DynamicPPL: DynamicPPL using MCMCChains: MCMCChains using Pathfinder: Pathfinder - using Random: Random using Turing: Turing - import Pathfinder: flattened_varnames_list else # using Requires using ..Accessors: Accessors - using ..ADTypes: ADTypes using ..DynamicPPL: DynamicPPL using ..MCMCChains: MCMCChains using ..Pathfinder: Pathfinder - using ..Random: Random using ..Turing: Turing - import ..Pathfinder: flattened_varnames_list end -# utilities for working with Turing model parameter names using only the DynamicPPL API - -function Pathfinder.flattened_varnames_list(model::DynamicPPL.Model) - varnames_ranges = varnames_to_ranges(model) - nsyms = maximum(maximum, values(varnames_ranges)) - syms = Vector{Symbol}(undef, nsyms) - for (var_name, range) in varnames_to_ranges(model) - sym = Symbol(var_name) - if length(range) == 1 - syms[range[begin]] = sym - continue - end - for i in eachindex(range) - syms[range[i]] = Symbol("$sym[$i]") - end - end - return syms -end - -# code snippet shared by @torfjelde """ - varnames_to_ranges(model::DynamicPPL.Model) - varnames_to_ranges(model::DynamicPPL.VarInfo) - varnames_to_ranges(model::DynamicPPL.Metadata) - -Get `Dict` mapping variable names in model to their ranges in a corresponding parameter vector. + create_log_density_problem(model::DynamicPPL.Model) -# Examples +Create a log density problem from a `model`. -```julia -julia> @model function demo() - s ~ Dirac(1) - x = Matrix{Float64}(undef, 2, 4) - x[1, 1] ~ Dirac(2) - x[2, 1] ~ Dirac(3) - x[3] ~ Dirac(4) - y ~ Dirac(5) - x[4] ~ Dirac(6) - x[:, 3] ~ arraydist([Dirac(7), Dirac(8)]) - x[[2, 1], 4] ~ arraydist([Dirac(9), Dirac(10)]) - return s, x, y - end -demo (generic function with 2 methods) - -julia> demo()() -(1, Any[2.0 4.0 7 10; 3.0 6.0 8 9], 5) +The return value is an object implementing the LogDensityProblems API whose log-density is +that of the `model` transformed to unconstrained space with the appropriate log-density +adjustment due to change of variables. +""" +function create_log_density_problem(model::DynamicPPL.Model) + # create an unconstrained VarInfo + varinfo = DynamicPPL.link(DynamicPPL.VarInfo(model), model) + # DefaultContext ensures that the log-density adjustment is computed + prob = DynamicPPL.LogDensityFunction(varinfo, model, DynamicPPL.DefaultContext()) + return prob +end -julia> varnames_to_ranges(demo()) -Dict{AbstractPPL.VarName, UnitRange{Int64}} with 8 entries: - s => 1:1 - x[4] => 5:5 - x[:,3] => 6:7 - x[1,1] => 2:2 - x[2,1] => 3:3 - x[[2, 1],4] => 8:9 - x[3] => 4:4 - y => 10:10 -``` """ -function varnames_to_ranges end + draws_to_chains(model::DynamicPPL.Model, draws) -> MCMCChains.Chains -varnames_to_ranges(model::DynamicPPL.Model) = varnames_to_ranges(DynamicPPL.VarInfo(model)) -function varnames_to_ranges(varinfo::DynamicPPL.UntypedVarInfo) - return varnames_to_ranges(varinfo.metadata) -end -function varnames_to_ranges(varinfo::DynamicPPL.TypedVarInfo) - offset = 0 - dicts = map(varinfo.metadata) do md - vns2ranges = varnames_to_ranges(md) - vals = collect(values(vns2ranges)) - vals_offset = map(r -> offset .+ r, vals) - offset += reduce((curr, r) -> max(curr, r[end]), vals; init=0) - Dict(zip(keys(vns2ranges), vals_offset)) +Convert a `(nparams, ndraws)` matrix of unconstrained `draws` to an `MCMCChains.Chains` +object with corresponding constrained draws and names according to `model`. +""" +function draws_to_chains(model::DynamicPPL.Model, draws::AbstractMatrix) + varinfo = DynamicPPL.link(DynamicPPL.VarInfo(model), model) + draw_con_varinfos = map(eachcol(draws)) do draw + # this re-evaluates the model but allows supporting dynamic bijectors + # https://github.com/TuringLang/Turing.jl/issues/2195 + return Turing.Inference.getparams(model, DynamicPPL.unflatten(varinfo, draw)) end - - return reduce(merge, dicts) -end -function varnames_to_ranges(metadata::DynamicPPL.Metadata) - idcs = map(Base.Fix1(getindex, metadata.idcs), metadata.vns) - ranges = metadata.ranges[idcs] - return Dict(zip(metadata.vns, ranges)) + param_con_names = map(first, first(draw_con_varinfos)) + draws_con = reduce( + vcat, Iterators.map(transpose ∘ Base.Fix1(map, last), draw_con_varinfos) + ) + chns = MCMCChains.Chains(draws_con, param_con_names) + return chns end -function Pathfinder.pathfinder( - model::DynamicPPL.Model; - rng=Random.GLOBAL_RNG, - init_scale=2, - init_sampler=Pathfinder.UniformSampler(init_scale), - init=nothing, - adtype::ADTypes.AbstractADType=Pathfinder.default_ad(), - kwargs..., -) - var_names = flattened_varnames_list(model) - prob = Turing.optim_problem( - model, Turing.MAP(); constrained=false, init_theta=init, adtype - ) - init_sampler(rng, prob.prob.u0) - result = Pathfinder.pathfinder(prob.prob; rng, input=model, kwargs...) - draws = reduce(vcat, transpose.(prob.transform.(eachcol(result.draws)))) - chns = MCMCChains.Chains(draws, var_names; info=(; pathfinder_result=result)) - result_new = Accessors.@set result.draws_transformed = chns +function Pathfinder.pathfinder(model::DynamicPPL.Model; kwargs...) + log_density_problem = create_log_density_problem(model) + result = Pathfinder.pathfinder(log_density_problem; input=model, kwargs...) + + # add transformed draws as Chains + chains_info = (; pathfinder_result=result) + chains = Accessors.@set draws_to_chains(model, result.draws).info = chains_info + result_new = Accessors.@set result.draws_transformed = chains return result_new end -function Pathfinder.multipathfinder( - model::DynamicPPL.Model, - ndraws::Int; - rng=Random.GLOBAL_RNG, - init_scale=2, - init_sampler=Pathfinder.UniformSampler(init_scale), - nruns::Int, - adtype=Pathfinder.default_ad(), - kwargs..., -) - var_names = flattened_varnames_list(model) - fun = Turing.optim_function(model, Turing.MAP(); constrained=false, adtype) - init1 = fun.init() - init = [init_sampler(rng, init1)] - for _ in 2:nruns - push!(init, init_sampler(rng, deepcopy(init1))) +function Pathfinder.multipathfinder(model::DynamicPPL.Model, ndraws::Int; kwargs...) + log_density_problem = create_log_density_problem(model) + result = Pathfinder.multipathfinder(log_density_problem, ndraws; input=model, kwargs...) + + # add transformed draws as Chains + chains_info = (; pathfinder_result=result) + chains = Accessors.@set draws_to_chains(model, result.draws).info = chains_info + + # add transformed draws as Chains for each individual path + single_path_results_new = map(result.pathfinder_results) do r + single_chains_info = (; pathfinder_result=r) + single_chains = Accessors.@set draws_to_chains(model, r.draws).info = + single_chains_info + r_new = Accessors.@set r.draws_transformed = single_chains + return r_new end - result = Pathfinder.multipathfinder(fun.func, ndraws; rng, input=model, init, kwargs...) - draws = reduce(vcat, transpose.(fun.transform.(eachcol(result.draws)))) - chns = MCMCChains.Chains(draws, var_names; info=(; pathfinder_result=result)) - result_new = Accessors.@set result.draws_transformed = chns + + result_new = Accessors.@set (Accessors.@set result.draws_transformed = + chains).pathfinder_results = single_path_results_new return result_new end diff --git a/src/Pathfinder.jl b/src/Pathfinder.jl index d7177ea42..5ba239272 100644 --- a/src/Pathfinder.jl +++ b/src/Pathfinder.jl @@ -50,8 +50,6 @@ include("resample.jl") include("singlepath.jl") include("multipath.jl") -include("integration/turing.jl") - function __init__() Requires.@require AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" begin include("integration/advancedhmc.jl") diff --git a/src/integration/turing.jl b/src/integration/turing.jl deleted file mode 100644 index 1d8ddd392..000000000 --- a/src/integration/turing.jl +++ /dev/null @@ -1,41 +0,0 @@ -""" - flattened_varnames_list(model::DynamicPPL.Model) -> Vector{Symbol} - -Get a vector of varnames as `Symbol`s with one-to-one correspondence to the -flattened parameter vector. - -!!! note - This function is only available when Turing has been loaded. - -# Examples - -```julia -julia> @model function demo() - s ~ Dirac(1) - x = Matrix{Float64}(undef, 2, 4) - x[1, 1] ~ Dirac(2) - x[2, 1] ~ Dirac(3) - x[3] ~ Dirac(4) - y ~ Dirac(5) - x[4] ~ Dirac(6) - x[:, 3] ~ arraydist([Dirac(7), Dirac(8)]) - x[[2, 1], 4] ~ arraydist([Dirac(9), Dirac(10)]) - return s, x, y - end -demo (generic function with 2 methods) - -julia> flattened_varnames_list(demo()) -10-element Vector{Symbol}: - :s - Symbol("x[1,1]") - Symbol("x[2,1]") - Symbol("x[3]") - Symbol("x[4]") - Symbol("x[:,3][1]") - Symbol("x[:,3][2]") - Symbol("x[[2, 1],4][1]") - Symbol("x[[2, 1],4][2]") - :y -``` -""" -function flattened_varnames_list end diff --git a/test/integration/AdvancedHMC/Project.toml b/test/integration/AdvancedHMC/Project.toml index 02e9ff1d0..d90ca4f6f 100644 --- a/test/integration/AdvancedHMC/Project.toml +++ b/test/integration/AdvancedHMC/Project.toml @@ -16,7 +16,7 @@ TransformVariables = "84d833dd-6860-57f9-a1a7-6da5db126cff" TransformedLogDensities = "f9bc47f6-f3f8-4f3b-ab21-f8bc73906f26" [compat] -AdvancedHMC = "0.4, 0.5.2, 0.6" +AdvancedHMC = "0.6" Distributions = "0.25.87" ForwardDiff = "0.10.19" LogDensityProblems = "2.1.0" diff --git a/test/integration/Turing/Project.toml b/test/integration/Turing/Project.toml index 5cdacb82e..8c0b5df09 100644 --- a/test/integration/Turing/Project.toml +++ b/test/integration/Turing/Project.toml @@ -1,10 +1,13 @@ [deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Pathfinder = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] +LogDensityProblems = "2.1.0" Pathfinder = "0.9" -Turing = "0.30.5, 0.31, 0.32" +Turing = "0.31.4, 0.32, 0.33" julia = "1.6" diff --git a/test/integration/Turing/runtests.jl b/test/integration/Turing/runtests.jl index de5ee966f..58799c8da 100644 --- a/test/integration/Turing/runtests.jl +++ b/test/integration/Turing/runtests.jl @@ -1,7 +1,15 @@ -using Pathfinder, Random, Test, Turing +using LogDensityProblems, LinearAlgebra, Pathfinder, Random, Test, Turing +using Turing.Bijectors + +if isdefined(Base, :get_extension) + PathfinderTuringExt = Base.get_extension(Pathfinder, :PathfinderTuringExt) +else + PathfinderTuringExt = Pathfinder.PathfinderTuringExt +end Random.seed!(0) +#! format: off @model function regression_model(x, y) σ ~ truncated(Normal(); lower=0) α ~ Normal() @@ -11,35 +19,125 @@ Random.seed!(0) return (; y) end +# adapted from https://github.com/TuringLang/Turing.jl/issues/2195 +@model function dynamic_const_model() + lb ~ Uniform(0, 0.1) + ub ~ Uniform(0.11, 0.2) + x ~ Bijectors.transformed( + Normal(0, 1), Bijectors.inverse(Bijectors.Logit(lb, ub)) + ) +end + +@model function transformed_model(dist, bijector) + y ~ Bijectors.transformed(dist, bijector) +end +#! format: on + @testset "Turing integration" begin - x = 0:0.01:1 - y = sin.(x) .+ randn.() .* 0.2 .+ x - X = [x x .^ 2 x .^ 3] - model = regression_model(X, y) - expected_param_names = Symbol.(["α", "β[1]", "β[2]", "β[3]", "σ"]) - - result = pathfinder(model; ndraws=10_000) - @test result isa PathfinderResult - @test result.input === model - @test size(result.draws) == (5, 10_000) - @test result.draws_transformed isa MCMCChains.Chains - @test result.draws_transformed.info.pathfinder_result isa PathfinderResult - @test sort(names(result.draws_transformed)) == expected_param_names - @test all(>(0), result.draws_transformed[:σ]) - init_params = Vector(result.draws_transformed.value[1, :, 1]) - chns = sample(model, NUTS(), 10_000; init_params, progress=false) - @test mean(chns).nt.mean ≈ mean(result.draws_transformed).nt.mean rtol = 0.1 - - result = multipathfinder(model, 10_000; nruns=4) - @test result isa MultiPathfinderResult - @test result.input === model - @test size(result.draws) == (5, 10_000) - @test length(result.pathfinder_results) == 4 - @test result.draws_transformed isa MCMCChains.Chains - @test result.draws_transformed.info.pathfinder_result isa MultiPathfinderResult - @test sort(names(result.draws_transformed)) == expected_param_names - @test all(>(0), result.draws_transformed[:σ]) - init_params = Vector(result.draws_transformed.value[1, :, 1]) - chns = sample(model, NUTS(), 10_000; init_params, progress=false) - @test mean(chns).nt.mean ≈ mean(result.draws_transformed).nt.mean rtol = 0.1 + @testset "create_log_density_problem" begin + @testset for bijector in [elementwise(log), Bijectors.SimplexBijector()], + udist in [Normal(1, 2), Normal(3, 4)], + n in [1, 5] + + binv = Bijectors.inverse(bijector) + dist = filldist(udist, n) + dist_trans = Bijectors.transformed(dist, binv) + model = transformed_model(dist, binv) + prob = PathfinderTuringExt.create_log_density_problem(model) + @test LogDensityProblems.capabilities(prob) isa + LogDensityProblems.LogDensityOrder{0} + x = rand(n, 10) + # after applying the Jacobian correction, the log-density of the model should + # be the same as the log-density of the distribution in unconstrained space + @test LogDensityProblems.logdensity.(Ref(prob), eachcol(x)) ≈ logpdf(dist, x) + end + end + + @testset "draws_to_chains" begin + draws = randn(3, 100) + model = dynamic_const_model() + chns = PathfinderTuringExt.draws_to_chains(model, draws) + @test chns isa MCMCChains.Chains + @test size(chns) == (100, 3, 1) + @test names(chns) == [:lb, :ub, :x] + @test all(0 .< chns[:, :lb, 1] .< 0.1) + @test all(0.11 .< chns[:, :ub, 1] .< 0.2) + @test all(chns[:, :lb, 1] .< chns[:, :x, 1] .< chns[:, :ub, 1]) + end + + @testset "integration tests" begin + @testset "regression model" begin + x = 0:0.01:1 + y = sin.(x) .+ randn.() .* 0.2 .+ x + X = [x x .^ 2 x .^ 3] + model = regression_model(X, y) + expected_param_names = Symbol.(["α", "β[1]", "β[2]", "β[3]", "σ"]) + + result = pathfinder(model; ndraws=10_000) + @test result isa PathfinderResult + @test result.input === model + @test size(result.draws) == (5, 10_000) + @test result.draws_transformed isa MCMCChains.Chains + @test result.draws_transformed.info.pathfinder_result isa PathfinderResult + @test sort(names(result.draws_transformed)) == expected_param_names + @test all(>(0), result.draws_transformed[:σ]) + init_params = Vector(result.draws_transformed.value[1, :, 1]) + chns = sample(model, NUTS(), 10_000; init_params, progress=false) + @test mean(chns).nt.mean ≈ mean(result.draws_transformed).nt.mean rtol = 0.1 + + result = multipathfinder(model, 10_000; nruns=4) + @test result isa MultiPathfinderResult + @test result.input === model + @test size(result.draws) == (5, 10_000) + @test length(result.pathfinder_results) == 4 + @test result.draws_transformed isa MCMCChains.Chains + @test result.draws_transformed.info.pathfinder_result isa MultiPathfinderResult + @test sort(names(result.draws_transformed)) == expected_param_names + @test all(>(0), result.draws_transformed[:σ]) + init_params = Vector(result.draws_transformed.value[1, :, 1]) + chns = sample(model, NUTS(), 10_000; init_params, progress=false) + @test mean(chns).nt.mean ≈ mean(result.draws_transformed).nt.mean rtol = 0.1 + + for r in result.pathfinder_results + @test r.draws_transformed isa MCMCChains.Chains + end + end + end + + @testset "transformed IID normal solved exactly" begin + @testset for bijector in [elementwise(log), Bijectors.SimplexBijector()], + udist in [Normal(1, 2), Normal(3, 4)], + n in [1, 5] + + binv = Bijectors.inverse(bijector) + dist = filldist(udist, n) + model = transformed_model(dist, binv) + result = pathfinder(model) + @test mean(result.fit_distribution) ≈ fill(mean(udist), n) + @test cov(result.fit_distribution) ≈ Diagonal(fill(var(udist), n)) + + result = multipathfinder(model, 100; nruns=4, ndraws_per_run=100) + @test result isa MultiPathfinderResult + for r in result.pathfinder_results + @test mean(r.fit_distribution) ≈ fill(mean(udist), n) + @test cov(r.fit_distribution) ≈ Diagonal(fill(var(udist), n)) + end + end + end + + @testset "models with dynamic constraints successfully fitted" begin + result = pathfinder(dynamic_const_model(); ndraws=10_000) + chns = result.draws_transformed + @test all(0 .< chns[:, :lb, 1] .< 0.1) + @test all(0.11 .< chns[:, :ub, 1] .< 0.2) + @test all(chns[:, :lb, 1] .< chns[:, :x, 1] .< chns[:, :ub, 1]) + + result = multipathfinder(dynamic_const_model(), 10_000; nruns=4) + for r in result.pathfinder_results + chns = r.draws_transformed + @test all(0 .< chns[:, :lb, 1] .< 0.1) + @test all(0.11 .< chns[:, :ub, 1] .< 0.2) + @test all(chns[:, :lb, 1] .< chns[:, :x, 1] .< chns[:, :ub, 1]) + end + end end