diff --git a/Project.toml b/Project.toml
index 6b4d02b54..a2c6f4132 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,9 +15,12 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568"
 PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RandomFeatures = "36c3bae2-c0c3-419d-b3b4-eebadd35c5e5"
 ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
@@ -32,8 +35,9 @@ GaussianProcesses = "0.12"
 MCMCChains = "4.14, 5, 6"
 PyCall = "1.93"
 ScikitLearn = "0.6, 0.7"
+RandomFeatures = "0.3"
 StatsBase = "0.33"
-julia = "1.6"
+julia = "1.6, 1.7, 1.8"
 
 [extras]
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/docs/Project.toml b/docs/Project.toml
index a500ef4a7..51a4903c7 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,4 +1,3 @@
 [deps]
 CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-
diff --git a/docs/make.jl b/docs/make.jl
index 9e20907e5..cbbe22da9 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,7 +12,11 @@ design = ["AbstractMCMC sampling API" => "API/AbstractMCMC.md"]
 
 api = [
     "CalibrateEmulateSample" => [
-        "Emulators" => ["General Emulator" => "API/Emulators.md", "Gaussian Process" => "API/GaussianProcess.md"],
+        "Emulators" => [
+            "General Interface" => "API/Emulators.md",
+            "Gaussian Process" => "API/GaussianProcess.md",
+#            "Random Features" => "API/RandomFeatures.md",
+        ],
         "MarkovChainMonteCarlo" => "API/MarkovChainMonteCarlo.md",
         "Utilities" => "API/Utilities.md",
     ],
diff --git a/docs/src/API/GaussianProcess.md b/docs/src/API/GaussianProcess.md
index 79be9e7c0..3b1058352 100644
--- a/docs/src/API/GaussianProcess.md
+++ b/docs/src/API/GaussianProcess.md
@@ -8,6 +8,7 @@ CurrentModule = CalibrateEmulateSample.Emulators
 GaussianProcessesPackage
 PredictionType
 GaussianProcess
-build_models!
+build_models!(::GaussianProcess{GPJL}, ::PairedDataContainer{FT}) where {FT <: AbstractFloat}
 optimize_hyperparameters!(::GaussianProcess{GPJL})
+predict(::GaussianProcess{GPJL},  ::AbstractMatrix{FT}) where {FT <: AbstractFloat}
 ```
\ No newline at end of file
diff --git a/docs/src/API/RandomFeatures.md b/docs/src/API/RandomFeatures.md
new file mode 100644
index 000000000..b5f5956bc
--- /dev/null
+++ b/docs/src/API/RandomFeatures.md
@@ -0,0 +1,39 @@
+# RandomFeatures
+
+```@meta
+CurrentModule = CalibrateEmulateSample.Emulators
+```
+
+## Scalar interface
+
+```@docs
+ScalarRandomFeatureInterface
+ScalarRandomFeatureInterface(::Int,::Int)
+build_models!(::ScalarRandomFeatureInterface, ::PairedDataContainer{FT}) where {FT <: AbstractFloat}
+predict(::ScalarRandomFeatureInterface, ::M) where {M <: AbstractMatrix}
+```
+
+## Vector Interface
+
+```@docs
+VectorRandomFeatureInterface
+VectorRandomFeatureInterface(::Int, ::Int, ::Int)
+build_models!(::VectorRandomFeatureInterface, ::PairedDataContainer{FT}) where {FT <: AbstractFloat}
+predict(::VectorRandomFeatureInterface, ::M) where {M <: AbstractMatrix}
+```
+
+## Other utilities
+```@docs
+get_rfms
+get_fitted_features
+get_batch_sizes
+get_n_features
+get_input_dim
+get_output_dim
+get_rng
+get_diagonalize_input
+get_feature_decomposition
+get_optimizer_options
+optimize_hyperparameters!(::ScalarRandomFeatureInterface) 
+optimize_hyperparameters!(::VectorRandomFeatureInterface) 
+```
diff --git a/examples/Emulator/GaussianProcess/plot_GP.jl b/examples/Emulator/GaussianProcess/plot_GP.jl
index 5a815969c..a0ee9751f 100644
--- a/examples/Emulator/GaussianProcess/plot_GP.jl
+++ b/examples/Emulator/GaussianProcess/plot_GP.jl
@@ -68,9 +68,9 @@ if !isdir(output_directory)
 end
 
 #create the machine learning tools: Gaussian Process
-gppackage = GPJL()
+gppackage = SKLJL()
 pred_type = YType()
-gaussian_process = GaussianProcess(gppackage, noise_learn = true)
+gaussian_process = GaussianProcess(gppackage, noise_learn = false)
 
 # Generate training data (x-y pairs, where x ∈ ℝ ᵖ, y ∈ ℝ ᵈ)
 # x = [x1, x2]: inputs/predictors/features/parameters
@@ -92,7 +92,7 @@ gx[2, :] = g2x
 
 # Add noise η
 μ = zeros(d)
-Σ = 0.1 * [[0.8, 0.0] [0.0, 0.5]] # d x d
+Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d
 noise_samples = rand(MvNormal(μ, Σ), n)
 # y = G(x) + η
 Y = gx .+ noise_samples
@@ -182,9 +182,9 @@ println("GP trained")
 
 # Plot mean and variance of the predicted observables y1 and y2
 # For this, we generate test points on a x1-x2 grid.
-n_pts = 50
-x1 = range(0.0, stop = 2 * π, length = n_pts)
-x2 = range(0.0, stop = 2 * π, length = n_pts)
+n_pts = 200
+x1 = range(0.0, stop = (4.0 / 5.0) * 2 * π, length = n_pts)
+x2 = range(0.0, stop = (4.0 / 5.0) * 2 * π, length = n_pts)
 X1, X2 = meshgrid(x1, x2)
 # Input for predict has to be of size N_samples x input_dim
 inputs = permutedims(hcat(X1[:], X2[:]), (2, 1))
diff --git a/examples/Emulator/RandomFeature/Project.toml b/examples/Emulator/RandomFeature/Project.toml
new file mode 100644
index 000000000..9d41c3993
--- /dev/null
+++ b/examples/Emulator/RandomFeature/Project.toml
@@ -0,0 +1,15 @@
+[deps]
+CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[compat]
+FiniteDiff = "~2.10"
+julia = "~1.6"
diff --git a/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl b/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl
new file mode 100644
index 000000000..d202f0391
--- /dev/null
+++ b/examples/Emulator/RandomFeature/scalar_optimize_and_plot_RF.jl
@@ -0,0 +1,259 @@
+# Reference the in-tree version of CalibrateEmulateSample on Julias load path
+include(joinpath(@__DIR__, "..", "..", "ci", "linkfig.jl"))
+
+# Import modules
+using Random
+using StableRNGs
+using Distributions
+using Statistics
+using LinearAlgebra
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.DataContainers
+using CalibrateEmulateSample.ParameterDistributions
+
+case = "scalar"
+println("running case $case")
+
+diagonalize_input = true
+
+plot_flag = true
+if plot_flag
+    using Plots
+    gr(size = (1500, 700))
+    Plots.scalefontsizes(1.3)
+    font = Plots.font("Helvetica", 18)
+    fontdict = Dict(:guidefont => font, :xtickfont => font, :ytickfont => font, :legendfont => font)
+
+end
+
+function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where {T}
+    m, n = length(vy), length(vx)
+    gx = reshape(repeat(vx, inner = m, outer = 1), m, n)
+    gy = reshape(repeat(vy, inner = 1, outer = n), m, n)
+
+    return gx, gy
+end
+rng_seed = 41
+Random.seed!(rng_seed)
+output_directory = joinpath(@__DIR__, "output")
+if !isdir(output_directory)
+    mkdir(output_directory)
+end
+
+#problem
+n = 100  # number of training points
+p = 2   # input dim 
+d = 2   # output dim
+X = 2.0 * π * rand(p, n)
+# G(x1, x2)
+g1x = sin.(X[1, :]) .+ cos.(X[2, :])
+g2x = sin.(X[1, :]) .- cos.(X[2, :])
+gx = zeros(2, n)
+gx[1, :] = g1x
+gx[2, :] = g2x
+
+# Add noise η
+μ = zeros(d)
+Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d
+noise_samples = rand(MvNormal(μ, Σ), n)
+# y = G(x) + η
+Y = gx .+ noise_samples
+
+iopairs = PairedDataContainer(X, Y, data_are_columns = true)
+@assert get_inputs(iopairs) == X
+@assert get_outputs(iopairs) == Y
+
+#plot training data with and without noise
+if plot_flag
+    p1 = plot(
+        X[1, :],
+        X[2, :],
+        g1x,
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zguidefontrotation = 90,
+    )
+
+    figpath = joinpath(output_directory, "RF_observed_y1nonoise.png")
+    savefig(figpath)
+
+    p2 = plot(
+        X[1, :],
+        X[2, :],
+        g2x,
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zguidefontrotation = 90,
+    )
+    figpath = joinpath(output_directory, "RF_observed_y2nonoise.png")
+    savefig(figpath)
+
+    p3 = plot(
+        X[1, :],
+        X[2, :],
+        Y[1, :],
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zguidefontrotation = 90,
+    )
+    figpath = joinpath(output_directory, "RF_observed_y1.png")
+    savefig(figpath)
+
+    p4 = plot(
+        X[1, :],
+        X[2, :],
+        Y[2, :],
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zguidefontrotation = 90,
+    )
+    figpath = joinpath(output_directory, "RF_observed_y2.png")
+    savefig(figpath)
+
+end
+
+# setup random features
+n_features = 180
+
+srfi = ScalarRandomFeatureInterface(n_features, p, diagonalize_input = diagonalize_input)
+emulator = Emulator(srfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true)
+println("build RF with $n training points and $(n_features) random features.")
+
+optimize_hyperparameters!(emulator) # although RF already optimized
+
+# Plot mean and variance of the predicted observables y1 and y2
+# For this, we generate test points on a x1-x2 grid.
+n_pts = 200
+x1 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts)
+x2 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts)
+X1, X2 = meshgrid(x1, x2)
+# Input for predict has to be of size N_samples x input_dim
+inputs = permutedims(hcat(X1[:], X2[:]), (2, 1))
+
+rf_mean, rf_cov = predict(emulator, inputs, transform_to_real = true)
+println("end predictions at ", n_pts * n_pts, " points")
+
+
+#plot predictions
+for y_i in 1:d
+    rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
+    rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000
+
+    mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000
+    if plot_flag
+        p5 = plot(
+            x1,
+            x2,
+            mean_grid,
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zlabel = "mean of y" * string(y_i),
+            zguidefontrotation = 90,
+        )
+    end
+    var_grid = reshape(rf_var[y_i, :], n_pts, n_pts)
+    if plot_flag
+        p6 = plot(
+            x1,
+            x2,
+            var_grid,
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zlabel = "var of y" * string(y_i),
+            zguidefontrotation = 90,
+        )
+
+        plot(p5, p6, layout = (1, 2), legend = false)
+
+        savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_predictions.png"))
+    end
+end
+
+# Plot the true components of G(x1, x2)
+g1_true = sin.(inputs[1, :]) .+ cos.(inputs[2, :])
+g1_true_grid = reshape(g1_true, n_pts, n_pts)
+if plot_flag
+    p7 = plot(
+        x1,
+        x2,
+        g1_true_grid,
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zlabel = "sin(x1) + cos(x2)",
+        zguidefontrotation = 90,
+    )
+    savefig(joinpath(output_directory, "RF_" * case * "_true_g1.png"))
+end
+
+g2_true = sin.(inputs[1, :]) .- cos.(inputs[2, :])
+g2_true_grid = reshape(g2_true, n_pts, n_pts)
+if plot_flag
+    p8 = plot(
+        x1,
+        x2,
+        g2_true_grid,
+        st = :surface,
+        camera = (30, 60),
+        c = :cividis,
+        xlabel = "x1",
+        ylabel = "x2",
+        zlabel = "sin(x1) - cos(x2)",
+        zguidefontrotation = 90,
+    )
+    g_true_grids = [g1_true_grid, g2_true_grid]
+
+    savefig(joinpath(output_directory, "RF_" * case * "_true_g2.png"))
+
+end
+
+# Plot the difference between the truth and the mean of the predictions
+for y_i in 1:d
+
+    # Reshape rf_cov to size N_samples x output_dim
+    rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
+    rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 40000 x 2
+
+    mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts)
+    var_grid = reshape(rf_var[y_i, :], n_pts, n_pts)
+    # Compute and plot 1/variance * (truth - prediction)^2
+
+    if plot_flag
+        zlabel = "1/var * (true_y" * string(y_i) * " - predicted_y" * string(y_i) * ")^2"
+
+        p9 = plot(
+            x1,
+            x2,
+            sqrt.(1.0 ./ var_grid .* (g_true_grids[y_i] .- mean_grid) .^ 2),
+            st = :surface,
+            camera = (30, 60),
+            c = :magma,
+            zlabel = zlabel,
+            xlabel = "x1",
+            ylabel = "x2",
+            zguidefontrotation = 90,
+        )
+
+        savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_difference_truth_prediction.png"))
+    end
+end
diff --git a/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl b/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl
new file mode 100644
index 000000000..05e4f348f
--- /dev/null
+++ b/examples/Emulator/RandomFeature/vector_optimize_and_plot_RF.jl
@@ -0,0 +1,286 @@
+# Reference the in-tree version of CalibrateEmulateSample on Julias load path
+include(joinpath(@__DIR__, "..", "..", "ci", "linkfig.jl"))
+
+# Import modules
+using Random
+using StableRNGs
+using Distributions
+using Statistics
+using LinearAlgebra
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.DataContainers
+using CalibrateEmulateSample.ParameterDistributions
+plot_flag = true
+if plot_flag
+    using Plots
+    gr(size = (1500, 700))
+    Plots.scalefontsizes(1.3)
+    font = Plots.font("Helvetica", 18)
+    fontdict = Dict(:guidefont => font, :xtickfont => font, :ytickfont => font, :legendfont => font)
+
+end
+
+
+function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}) where {T}
+    m, n = length(vy), length(vx)
+    gx = reshape(repeat(vx, inner = m, outer = 1), m, n)
+    gy = reshape(repeat(vy, inner = 1, outer = n), m, n)
+
+    return gx, gy
+end
+
+function main()
+
+    rng_seed = 41
+    Random.seed!(rng_seed)
+    output_directory = joinpath(@__DIR__, "output")
+    if !isdir(output_directory)
+        mkdir(output_directory)
+    end
+
+    cases = ["svd-diag", "svd-nondiag", "nosvd-diag", "nosvd-nondiag"]
+    case_mask = 1:4 # which cases to do
+
+    #problem
+    n = 150  # number of training points
+    p = 2   # input dim 
+    d = 2   # output dim
+    X = 2.0 * π * rand(p, n)
+    # G(x1, x2)
+    g1x = sin.(X[1, :]) .+ cos.(X[2, :])
+    g2x = sin.(X[1, :]) .- cos.(X[2, :])
+    gx = zeros(2, n)
+    gx[1, :] = g1x
+    gx[2, :] = g2x
+
+    # Add noise η
+    μ = zeros(d)
+    Σ = 0.1 * [[0.8, 0.1] [0.1, 0.5]] # d x d
+    noise_samples = rand(MvNormal(μ, Σ), n)
+    # y = G(x) + η
+    Y = gx .+ noise_samples
+
+    iopairs = PairedDataContainer(X, Y, data_are_columns = true)
+    @assert get_inputs(iopairs) == X
+    @assert get_outputs(iopairs) == Y
+
+    #plot training data with and without noise
+    if plot_flag
+        p1 = plot(
+            X[1, :],
+            X[2, :],
+            g1x,
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zguidefontrotation = 90,
+        )
+
+        figpath = joinpath(output_directory, "RF_observed_y1nonoise.png")
+        savefig(figpath)
+
+        p2 = plot(
+            X[1, :],
+            X[2, :],
+            g2x,
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zguidefontrotation = 90,
+        )
+        figpath = joinpath(output_directory, "RF_observed_y2nonoise.png")
+        savefig(figpath)
+
+        p3 = plot(
+            X[1, :],
+            X[2, :],
+            Y[1, :],
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zguidefontrotation = 90,
+        )
+        figpath = joinpath(output_directory, "RF_observed_y1.png")
+        savefig(figpath)
+
+        p4 = plot(
+            X[1, :],
+            X[2, :],
+            Y[2, :],
+            st = :surface,
+            camera = (30, 60),
+            c = :cividis,
+            xlabel = "x1",
+            ylabel = "x2",
+            zguidefontrotation = 90,
+        )
+        figpath = joinpath(output_directory, "RF_observed_y2.png")
+        savefig(figpath)
+
+    end
+
+    for case in cases[case_mask]
+        println("running case $case")
+
+
+
+
+        # setup random features
+        n_features = 200
+
+        if case == "svd-diag"
+            vrfi = VectorRandomFeatureInterface(n_features, p, d, diagonalize_output = true)
+            emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true)
+        elseif case == "svd-nondiag"
+            vrfi = VectorRandomFeatureInterface(n_features, p, d)
+            emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true)
+        elseif case == "nosvd-diag"
+            vrfi = VectorRandomFeatureInterface(n_features, p, d, diagonalize_output = true)
+            emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true, decorrelate = false)
+        elseif case == "nosvd-nondiag"
+            vrfi = VectorRandomFeatureInterface(n_features, p, d)
+            emulator = Emulator(vrfi, iopairs, obs_noise_cov = Σ, normalize_inputs = true, decorrelate = false)
+        end
+        println("build RF with $n training points and $(n_features) random features.")
+
+        optimize_hyperparameters!(emulator) # although RF already optimized
+
+        # Plot mean and variance of the predicted observables y1 and y2
+        # For this, we generate test points on a x1-x2 grid.
+        n_pts = 200
+        x1 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts)
+        x2 = range(0.0, stop = 4.0 / 5.0 * 2 * π, length = n_pts)
+        X1, X2 = meshgrid(x1, x2)
+        # Input for predict has to be of size  input_dim x N_samples
+        inputs = permutedims(hcat(X1[:], X2[:]), (2, 1))
+
+        rf_mean, rf_cov = predict(emulator, inputs, transform_to_real = true)
+        println("end predictions at ", n_pts * n_pts, " points")
+
+
+        #plot predictions
+        for y_i in 1:d
+
+            rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
+            rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 2 x 40000
+
+            mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts) # 2 x 40000
+            if plot_flag
+                p5 = plot(
+                    x1,
+                    x2,
+                    mean_grid,
+                    st = :surface,
+                    camera = (30, 60),
+                    c = :cividis,
+                    xlabel = "x1",
+                    ylabel = "x2",
+                    zlabel = "mean of y" * string(y_i),
+                    zguidefontrotation = 90,
+                )
+            end
+            var_grid = reshape(rf_var[y_i, :], n_pts, n_pts)
+            if plot_flag
+                p6 = plot(
+                    x1,
+                    x2,
+                    var_grid,
+                    st = :surface,
+                    camera = (30, 60),
+                    c = :cividis,
+                    xlabel = "x1",
+                    ylabel = "x2",
+                    zlabel = "var of y" * string(y_i),
+                    zguidefontrotation = 90,
+                )
+
+                plot(p5, p6, layout = (1, 2), legend = false)
+
+                savefig(joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_predictions.png"))
+            end
+        end
+
+        # Plot the true components of G(x1, x2)
+        g1_true = sin.(inputs[1, :]) .+ cos.(inputs[2, :])
+        g1_true_grid = reshape(g1_true, n_pts, n_pts)
+        if plot_flag
+            p7 = plot(
+                x1,
+                x2,
+                g1_true_grid,
+                st = :surface,
+                camera = (30, 60),
+                c = :cividis,
+                xlabel = "x1",
+                ylabel = "x2",
+                zlabel = "sin(x1) + cos(x2)",
+                zguidefontrotation = 90,
+            )
+            savefig(joinpath(output_directory, "RF_" * case * "_true_g1.png"))
+        end
+
+        g2_true = sin.(inputs[1, :]) .- cos.(inputs[2, :])
+        g2_true_grid = reshape(g2_true, n_pts, n_pts)
+        if plot_flag
+            p8 = plot(
+                x1,
+                x2,
+                g2_true_grid,
+                st = :surface,
+                camera = (30, 60),
+                c = :cividis,
+                xlabel = "x1",
+                ylabel = "x2",
+                zlabel = "sin(x1) - cos(x2)",
+                zguidefontrotation = 90,
+            )
+            g_true_grids = [g1_true_grid, g2_true_grid]
+
+            savefig(joinpath(output_directory, "RF_" * case * "_true_g2.png"))
+
+        end
+        MSE = 1 / size(rf_mean, 2) * sqrt(sum((rf_mean[1, :] - g1_true) .^ 2 + (rf_mean[2, :] - g2_true) .^ 2))
+        println("L^2 error of mean and latent truth:", MSE)
+
+        # Plot the difference between the truth and the mean of the predictions
+        for y_i in 1:d
+
+            # Reshape rf_cov to size N_samples x output_dim
+            rf_var_temp = [diag(rf_cov[j]) for j in 1:length(rf_cov)] # (40000,)
+            rf_var = permutedims(vcat([x' for x in rf_var_temp]...), (2, 1)) # 40000 x 2
+
+            mean_grid = reshape(rf_mean[y_i, :], n_pts, n_pts)
+            var_grid = reshape(rf_var[y_i, :], n_pts, n_pts)
+            # Compute and plot 1/variance * (truth - prediction)^2
+
+            if plot_flag
+                zlabel = "1/var * (true_y" * string(y_i) * " - predicted_y" * string(y_i) * ")^2"
+
+                p9 = plot(
+                    x1,
+                    x2,
+                    sqrt.(1.0 ./ var_grid .* (g_true_grids[y_i] .- mean_grid) .^ 2),
+                    st = :surface,
+                    camera = (30, 60),
+                    c = :magma,
+                    zlabel = zlabel,
+                    xlabel = "x1",
+                    ylabel = "x2",
+                    zguidefontrotation = 90,
+                )
+
+                savefig(
+                    joinpath(output_directory, "RF_" * case * "_y" * string(y_i) * "_difference_truth_prediction.png"),
+                )
+            end
+        end
+    end
+end
+
+main()
diff --git a/examples/GCM/Project.toml b/examples/GCM/Project.toml
new file mode 100644
index 000000000..aba51ad3c
--- /dev/null
+++ b/examples/GCM/Project.toml
@@ -0,0 +1,16 @@
+[deps]
+CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3"
+Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[compat]
+FiniteDiff = "~2.10"
+julia = "~1.6"
diff --git a/examples/GCM/data_from_eki_inflateyonly_100.jld2 b/examples/GCM/data_from_eki_inflateyonly_100.jld2
new file mode 100644
index 000000000..1eae5106d
Binary files /dev/null and b/examples/GCM/data_from_eki_inflateyonly_100.jld2 differ
diff --git a/examples/GCM/emulate_sample_script.jl b/examples/GCM/emulate_sample_script.jl
new file mode 100644
index 000000000..806a3058d
--- /dev/null
+++ b/examples/GCM/emulate_sample_script.jl
@@ -0,0 +1,227 @@
+# Script to run Emulation and Sampling on data from GCM
+
+# Import modules
+using Distributions  # probability distributions and associated functions
+using LinearAlgebra
+using Plots
+using Random
+using JLD2
+# CES 
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.MarkovChainMonteCarlo
+using CalibrateEmulateSample.ParameterDistributions
+using CalibrateEmulateSample.DataContainers
+
+@time begin
+
+    rng_seed = 2413798
+    Random.seed!(rng_seed)
+
+    #    expname = "vrf-nondiag-logdet_newprior"
+    #emulator_type ∈ ["GPR","ScalarRFR","VectorRFR-svd-diag","VectorRFR-svd-nondiag", "VectorRFR-nondiag"]
+    #    emulator_type = "GPR"
+    #    expname = "gpr_newprior"
+
+    #    emulator_type = "ScalarRFR"
+    #    expname = "srf_newprior"
+
+    #    emulator_type = "VectorRFR-svd-diag"
+    #    expname = "vrf-svd-diag_newprior"
+
+    #    emulator_type = "VectorRFR-svd-nondiag"
+    #    expname = "vrf-svd-nondiag_newprior"
+
+    emulator_type = "VectorRFR-nondiag"
+    expname = "vrf-nondiag_newprior"
+
+    # Output figure save directory
+    example_directory = @__DIR__
+    println(example_directory)
+    figure_save_directory = joinpath(example_directory, "output")
+    data_save_directory = joinpath(example_directory, "output")
+    if !isdir(figure_save_directory)
+        mkdir(figure_save_directory)
+    end
+    if !isdir(data_save_directory)
+        mkdir(data_save_directory)
+    end
+
+    # Load data from file
+    datafile = "data_from_eki_inflateyonly_100.jld2"
+    inputs = load(datafile)["inputs"] #100 x 2 x 10
+    outputs = load(datafile)["outputs"] #100 x 96 x 10
+    truth = load(datafile)["truth"] # 96
+    obs_noise_cov = load(datafile)["obs_noise_cov"] # 96 x 96
+
+    #take only first 400 points
+    iter_mask = 1:4
+    #data_mask = 1:32
+    #    data_mask= 33:64
+    #    data_mask= 65:96
+    data_mask = 1:96
+    #data_mask = [5*i for i = 1:Int(floor(96/5))]
+
+    inputs = inputs[:, :, iter_mask]
+    outputs = outputs[:, data_mask, iter_mask]
+    obs_noise_cov = obs_noise_cov[data_mask, data_mask]
+    truth = truth[data_mask]
+
+    # priorfile = "priors.jld2"
+    # prior = load(priorfile)["prior"]
+
+    # derived quantities
+    N_ens, input_dim, N_iter = size(inputs)
+    output_dim = size(outputs, 2)
+
+    stacked_inputs = reshape(permutedims(inputs, (1, 3, 2)), (N_ens * N_iter, input_dim))
+    stacked_outputs = reshape(permutedims(outputs, (1, 3, 2)), (N_ens * N_iter, output_dim))
+    input_output_pairs = PairedDataContainer(stacked_inputs, stacked_outputs, data_are_columns = false) #data are rows
+    normalized = true
+
+    # setup random features
+    eki_options_override = Dict("tikhonov" => 0, "multithread" => "ensemble") #faster than tullio multithread for training
+
+
+    if emulator_type == "VectorRFR-svd-nondiag" || emulator_type == "VectorRFR-nondiag"
+        if emulator_type == "VectorRFR-svd-nondiag"
+            println("Running Vector RF model - using SVD and assuming non-diagonal variance ")
+        elseif emulator_type == "VectorRFR-nondiag"
+            println("Running Vector RF model - without SVD and assuming non-diagonal variance ")
+        end
+
+        n_features = 80 * Int(floor(5 * sqrt(N_ens * N_iter)))
+        println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.")
+
+
+        mlt = VectorRandomFeatureInterface(n_features, input_dim, output_dim, optimizer_options = eki_options_override)
+
+    elseif emulator_type == "VectorRFR-svd-diag"
+
+        println("Running Vector RF model - using SVD and assuming diagonal variance")
+        n_features = 20 * Int(floor(5 * sqrt(N_ens * N_iter)))
+        println("build RF with $(N_ens*N_iter) training points and $(n_features) random features.")
+
+        mlt = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            diagonalize_output = true,
+            optimizer_options = eki_options_override,
+        )
+
+    elseif emulator_type == "ScalarRFR"
+        println("Running Scalar RF model")
+        n_features = 5 * Int(floor(5 * sqrt(N_ens * N_iter)))
+        mlt = ScalarRandomFeatureInterface(n_features, input_dim, optimizer_options = eki_options_override)
+
+    else
+        emulator_type == "GPR"
+        println("Running Gaussian Process model")
+        gppackage = SKLJL()
+        mlt = GaussianProcess(gppackage, noise_learn = false)
+
+    end
+
+    if emulator_type == "VectorRFR-nondiag"
+        emulator = Emulator(
+            mlt,
+            input_output_pairs;
+            obs_noise_cov = obs_noise_cov,
+            normalize_inputs = normalized,
+            decorrelate = false,
+        )
+    else
+        emulator = Emulator(mlt, input_output_pairs; obs_noise_cov = obs_noise_cov, normalize_inputs = normalized)
+
+    end
+    optimize_hyperparameters!(emulator)
+
+    #
+    # save the emulator!
+    # 
+    @save joinpath(data_save_directory, "emulator_" * expname * ".jld2") emulator
+
+    #
+    # predict at some validation points
+    #
+    validate_id = ["phys", "mean", "rand"]
+
+    for vid in validate_id
+        if vid == "phys"
+            new_input = [log(0.7 / 0.3) log(7200)]' # physical parameter value (at truth)
+        elseif vid == "mean"
+            new_input = [log(0.5 / (1 - 0.5)) log(43200)]' # mean-of-prior parameter value ("near-ish" truth)
+        elseif vid == "rand"
+            new_input = [log(0.31735951644387783 / (1 - 0.31735951644387783)) log(90632.50269636544)]' # random parameter value ("far" from truth
+        end
+
+        pred_mean, pred_cov = predict(emulator, new_input, transform_to_real = true)
+        pred_sd = sqrt.([max(10 * eps(), pred_cov[1][i, i]) for i in 1:size(pred_cov[1], 1)])
+
+
+        # NB pred_cov is a vector of matrices
+        tobj = load("truthobj_" * vid * "param.jld2")["truthobj"]
+        t_mean = tobj.mean[data_mask]
+        t_cov = tobj.cov[data_mask, data_mask]
+
+        println("prediction error at truth for " * vid * " case:")
+        println("    mean: ", norm(t_mean - pred_mean))
+        println("     cov: ", norm(t_cov - pred_cov[1]))
+
+        save(
+            joinpath(data_save_directory, vid * "_" * expname * "_results.jld2"),
+            "pred_mean",
+            pred_mean,
+            "pred_cov",
+            pred_cov,
+            "pred_sd",
+            pred_sd,
+        )
+        save(joinpath(data_save_directory, vid * "_" * expname * "_truth.jld2"), "true_mean", t_mean, "true_cov", t_cov)
+    end
+
+    plot_input = [log(0.7 / 0.3) log(7200)]' # physical parameter value (at truth)
+    plot_mean, plot_cov = predict(emulator, plot_input, transform_to_real = true)
+    plot_sd = sqrt.([max(10 * eps(), plot_cov[1][i, i]) for i in 1:size(plot_cov[1], 1)])
+
+
+    ids = [1:32, 33:64, 65:96]
+    plotnames = ["rh", "pr", "ext"]
+
+    for (id, pn) in zip(ids, plotnames)
+        if data_mask == 1:96
+            plt = plot(
+                collect(id),
+                plot_mean[id],
+                show = true,
+                ribbon = [2 * plot_sd[id]; 2 * plot_sd[id]],
+                linewidth = 5,
+                size = (600, 600),
+                label = "",
+            )
+            figpath = joinpath(figure_save_directory, "predict_" * expname * "_" * pn * "_at_truth.png")
+            savefig(figpath)
+            println("plot saved at " * figpath)
+        else
+            if data_mask == id
+                plot_mask = 1:length(data_mask)
+                plt = plot(
+                    collect(id),
+                    plot_mean[plot_mask],
+                    show = true,
+                    ribbon = [2 * plot_sd[plot_mask]; 2 * plot_sd[plot_mask]],
+                    linewidth = 5,
+                    size = (600, 600),
+                    label = "",
+                )
+                figpath = joinpath(figure_save_directory, "predict_" * expname * "_" * pn * "_at_truth.png")
+                savefig(figpath)
+                println("plot saved at " * figpath)
+            end
+        end
+
+    end
+
+
+
+end # for @time
diff --git a/examples/GCM/priors.jld2 b/examples/GCM/priors.jld2
new file mode 100644
index 000000000..860634326
Binary files /dev/null and b/examples/GCM/priors.jld2 differ
diff --git a/examples/GCM/sbatch_emulate_sample_jl b/examples/GCM/sbatch_emulate_sample_jl
new file mode 100644
index 000000000..efbdb391c
--- /dev/null
+++ b/examples/GCM/sbatch_emulate_sample_jl
@@ -0,0 +1,36 @@
+#!/bin/bash                      
+#Submit this script with: sbatch thefilename                                                                                      
+#SBATCH --time=6:00:00   # walltime                                                                                              
+#SBATCH --ntasks-per-node=1   # number of processor cores (i.e. tasks)                                                                     
+#SBATCH --cpus-per-task=28
+#SBATCH --mem-per-cpu=6000
+#SBATCH -J "emulate_sample"   # job name                                                                                               
+#SBATCH --output=output/out_err/slurm_%j.out                                                                                      
+#SBATCH --error=output/out_err/slurm_%j.err                                                                                       
+
+#general
+set -euo pipefail #kill job if anything fails\
+#set -x #
+
+#modules (not automatically loaded with session)
+module load julia/1.8.5
+
+#julia package management
+
+export JULIA_PROJECT=@.
+#precompiling is now done manually before
+#julia -e 'using Pkg; Pkg.instantiate(); Pkg.API.precompile()'
+
+#run code
+start=$(date +%s)
+
+
+julia --project -t 28 emulate_sample_script.jl
+
+end=$(date +%s)
+runtime=$((end-start))
+echo "********************"
+echo "run time: $runtime"
+echo "********************"
+
+
diff --git a/examples/GCM/truthobj_meanparam.jld2 b/examples/GCM/truthobj_meanparam.jld2
new file mode 100644
index 000000000..cef3b8f9c
Binary files /dev/null and b/examples/GCM/truthobj_meanparam.jld2 differ
diff --git a/examples/GCM/truthobj_physparam.jld2 b/examples/GCM/truthobj_physparam.jld2
new file mode 100644
index 000000000..b2ab8e962
Binary files /dev/null and b/examples/GCM/truthobj_physparam.jld2 differ
diff --git a/examples/GCM/truthobj_randparam.jld2 b/examples/GCM/truthobj_randparam.jld2
new file mode 100644
index 000000000..248ce29f1
Binary files /dev/null and b/examples/GCM/truthobj_randparam.jld2 differ
diff --git a/examples/Lorenz/GModel.jl b/examples/Lorenz/GModel.jl
index fd2820f00..4829fe9c8 100644
--- a/examples/Lorenz/GModel.jl
+++ b/examples/Lorenz/GModel.jl
@@ -7,11 +7,14 @@ export lorenz_forward
 include("GModel_common.jl")
 
 function run_ensembles(settings, lorenz_params, nd, N_ens)
-    g_ens = zeros(nd, N_ens)
-    for i in 1:N_ens
+    nthreads = Threads.nthreads()
+    g_ens = zeros(nthreads, nd, N_ens)
+    Threads.@threads for i in 1:N_ens
+        tid = Threads.threadid()
         # run the model with the current parameters, i.e., map θ to G(θ)
-        g_ens[:, i] = lorenz_forward(settings, lorenz_params[i])
+        g_ens[tid, :, i] = lorenz_forward(settings, lorenz_params[i])
     end
+    g_ens = dropdims(sum(g_ens, dims = 1), dims = 1)
     return g_ens
 end
 
diff --git a/examples/Lorenz/calibrate.jl b/examples/Lorenz/calibrate.jl
index 79038276a..b337a653d 100644
--- a/examples/Lorenz/calibrate.jl
+++ b/examples/Lorenz/calibrate.jl
@@ -39,7 +39,7 @@ function main()
     dynamics = 2 # Transient is 2
     # Statistics integration length
     # This has to be less than 360 and 360 must be divisible by Ts_days
-    Ts_days = 90.0 # Integration length in days
+    Ts_days = 30.0 # Integration length in days
     # Stats type, which statistics to construct from the L96 system
     # 4 is a linear fit over a batch of length Ts_days
     # 5 is the mean over a batch of length Ts_days
@@ -150,8 +150,9 @@ function main()
     else
         println("Using truth values to compute covariance")
         n_samples = 20
-        yt = zeros(length(gt), n_samples)
-        for i in 1:n_samples
+        nthreads = Threads.nthreads()
+        yt = zeros(nthreads, length(gt), n_samples)
+        Threads.@threads for i in 1:n_samples
             lorenz_settings_local = GModel.LSettings(
                 dynamics,
                 stats_type,
@@ -167,8 +168,10 @@ function main()
                 ω_fixed,
                 ω_true,
             )
-            yt[:, i] = GModel.run_G_ensemble(params_true, lorenz_settings_local)
+            tid = Threads.threadid()
+            yt[tid, :, i] = GModel.run_G_ensemble(params_true, lorenz_settings_local)
         end
+        yt = dropdims(sum(yt, dims = 1), dims = 1)
         # Variance of truth data
         #Γy = convert(Array, Diagonal(dropdims(mean((yt.-mean(yt,dims=1)).^2,dims=1),dims=1)))
         # Covariance of truth data
@@ -190,7 +193,7 @@ function main()
 
     # EKP parameters
     N_ens = 20 # number of ensemble members
-    N_iter = 5 # number of EKI iterations
+    N_iter = 6 # number of EKI iterations
     # initial parameters: N_params x N_ens
     initial_params = EKP.construct_initial_ensemble(priors, N_ens; rng_seed = rng_seed)
 
diff --git a/examples/Lorenz/emulate_sample.jl b/examples/Lorenz/emulate_sample.jl
index 46e2286ba..48a304f6c 100644
--- a/examples/Lorenz/emulate_sample.jl
+++ b/examples/Lorenz/emulate_sample.jl
@@ -1,6 +1,5 @@
 # Import modules
 include(joinpath(@__DIR__, "..", "ci", "linkfig.jl"))
-include(joinpath(@__DIR__, "GModel.jl")) # Contains Lorenz 96 source code
 
 # Import modules
 using Distributions  # probability distributions and associated functions
@@ -35,180 +34,211 @@ end
 
 
 function main()
-    #####
-    # Should be loaded:
-    τc = 5.0
-    F_true = 8.0 # Mean F
-    A_true = 2.5 # Transient F amplitude
-    ω_true = 2.0 * π / (360.0 / τc) # Frequency of the transient F
-    ####
-
-    exp_name = "Lorenz_histogram_F$(F_true)_A$(A_true)-w$(ω_true)"
-    rng_seed = 44009
-    Random.seed!(rng_seed)
-    # loading relevant data
-    homedir = pwd()
-    println(homedir)
-    figure_save_directory = joinpath(homedir, "output/")
-    data_save_directory = joinpath(homedir, "output/")
-    data_save_file = joinpath(data_save_directory, "calibrate_results.jld2")
-
-    if !isfile(data_save_file)
-        throw(
-            ErrorException(
-                "data file $data_save_file not found. \n First run: \n > julia --project calibrate.jl \n and store results $data_save_file",
-            ),
-        )
-    end
 
-    ekiobj = load(data_save_file)["eki"]
-    priors = load(data_save_file)["priors"]
-    truth_sample = load(data_save_file)["truth_sample"]
-    truth_sample_mean = load(data_save_file)["truth_sample_mean"]
-    truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space
-    truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained)
-    Γy = ekiobj.obs_noise_cov
-    ###
-    ###  Emulate: Gaussian Process Regression
-    ###
-
-    # Emulate-sample settings
-    standardize = true
-    retained_svd_frac = 0.95
-
-    gppackage = Emulators.GPJL()
-    pred_type = Emulators.YType()
-    gauss_proc = GaussianProcess(
-        gppackage;
-        kernel = nothing, # use default squared exponential kernel
-        prediction_type = pred_type,
-        noise_learn = false,
-    )
-
-    # Standardize the output data
-    # Use median over all data since all data are the same type
-    truth_sample_norm = vcat(truth_sample...)
-    norm_factor = get_standardizing_factors(truth_sample_norm)
-    println(size(norm_factor))
-    #norm_factor = vcat(norm_factor...)
-    norm_factor = fill(norm_factor, size(truth_sample))
-    println("Standardization factors")
-    println(norm_factor)
-
-    # Get training points from the EKP iteration number in the second input term  
-    N_iter = 5
-    input_output_pairs = Utilities.get_training_points(ekiobj, N_iter)
-    # Save data
-    @save joinpath(data_save_directory, "input_output_pairs.jld2") input_output_pairs
-
-    normalized = true
-    emulator = Emulator(
-        gauss_proc,
-        input_output_pairs;
-        obs_noise_cov = Γy,
-        normalize_inputs = normalized,
-        standardize_outputs = standardize,
-        standardize_outputs_factors = norm_factor,
-        retained_svd_frac = retained_svd_frac,
-    )
-    optimize_hyperparameters!(emulator)
-
-    # Check how well the Gaussian Process regression predicts on the
-    # true parameters
-    #if retained_svd_frac==1.0
-    y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true)
-
-    println("GP prediction on true parameters: ")
-    println(vec(y_mean))
-    println(" GP variance")
-    println(diag(y_var[1], 0))
-    println("true data: ")
-    println(truth_sample_mean) # same, regardless of norm_factor
-    println("GP MSE: ")
-    println(mean((truth_sample_mean - vec(y_mean)) .^ 2))
-
-    #end
-    ###
-    ###  Sample: Markov Chain Monte Carlo
-    ###
-    # initial values
-    u0 = vec(mean(get_inputs(input_output_pairs), dims = 2))
-    println("initial parameters: ", u0)
-
-    # First let's run a short chain to determine a good step size
-    truth_sample = truth_sample
-    mcmc = MCMCWrapper(RWMHSampling(), truth_sample, priors, emulator; init_params = u0)
-    new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0)
-
-    # Now begin the actual MCMC
-    println("Begin MCMC - with step size ", new_step)
-    chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000)
-    posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain)
-
-    post_mean = mean(posterior)
-    post_cov = cov(posterior)
-    println("post_mean")
-    println(post_mean)
-    println("post_cov")
-    println(post_cov)
-    println("D util")
-    println(det(inv(post_cov)))
-    println(" ")
-
-
-    # Plot the posteriors together with the priors and the true parameter values (in the constrained space)
-    n_params = length(truth_params)
-    save_directory = joinpath(figure_save_directory)
-
-    posterior_distribution_unconstrained_dict = get_distribution(posterior) #as it is a Samples, the distribution are samples
-    posterior_samples_unconstrained_dict = get_distribution(posterior) #as it is a Samples, the distribution are samples
-    param_names = get_name(posterior)
-    posterior_samples_unconstrained_arr = vcat([posterior_samples_unconstrained_dict[n] for n in param_names]...)
-    posterior_samples = transform_unconstrained_to_constrained(priors, posterior_samples_unconstrained_arr)
-
-    for idx in 1:n_params
-        if idx == 1
-            param = "F"
-            xbounds = [F_true - 1.0, F_true + 1.0]
-            xs = collect(xbounds[1]:((xbounds[2] - xbounds[1]) / 100):xbounds[2])
-        elseif idx == 2
-            param = "A"
-            xbounds = [A_true - 1.0, A_true + 1.0]
-            xs = collect(xbounds[1]:((xbounds[2] - xbounds[1]) / 100):xbounds[2])
-        elseif idx == 3
-            param = "ω"
-            xs = collect((ω_true - 0.2):0.01:(ω_true + 0.2))
-            xbounds = [xs[1], xs[end]]
-        else
-            throw("not implemented")
+    cases = [
+        "GP", # diagonalize, train scalar GP, assume diag inputs
+        "RF-scalar-diagin", # diagonalize, train scalar RF, assume diag inputs (most comparable to GP)
+        "RF-scalar", # diagonalize, train scalar RF, don't asume diag inputs
+        "RF-vector-svd-diag",
+        "RF-vector-svd-nondiag",
+        "RF-vector-nosvd-diag",
+        "RF-vector-nosvd-nondiag",
+    ]
+
+    #### CHOOSE YOUR CASE: 
+    mask = 2:7
+    for case in cases[mask]
+
+
+        #case = cases[7]
+        println("case: ", case)
+
+        overrides = Dict("train_fraction" => 0.6, "verbose" => true)
+
+        # Should be loaded:
+        τc = 5.0
+        F_true = 8.0 # Mean F
+        A_true = 2.5 # Transient F amplitude
+        ω_true = 2.0 * π / (360.0 / τc) # Frequency of the transient F
+        ####
+
+        exp_name = "Lorenz_histogram_F$(F_true)_A$(A_true)-w$(ω_true)"
+        rng_seed = 44011
+        rng = Random.MersenneTwister(rng_seed)
+
+        # loading relevant data
+        homedir = pwd()
+        println(homedir)
+        figure_save_directory = joinpath(homedir, "output/")
+        data_save_directory = joinpath(homedir, "output/")
+        data_save_file = joinpath(data_save_directory, "calibrate_results.jld2")
+
+        if !isfile(data_save_file)
+            throw(
+                ErrorException(
+                    "data file $data_save_file not found. \n First run: \n > julia --project calibrate.jl \n and store results $data_save_file",
+                ),
+            )
         end
 
-        label = "true " * param
-
-        histogram(posterior_samples[idx, :], bins = 100, normed = true, fill = :slategray, lab = "posterior")
-        prior_plot = get_distribution(mcmc.prior)
+        ekiobj = load(data_save_file)["eki"]
+        priors = load(data_save_file)["priors"]
+        truth_sample = load(data_save_file)["truth_sample"]
+        truth_sample_mean = load(data_save_file)["truth_sample_mean"]
+        truth_params_constrained = load(data_save_file)["truth_input_constrained"] #true parameters in constrained space
+        truth_params = transform_constrained_to_unconstrained(priors, truth_params_constrained)
+        Γy = ekiobj.obs_noise_cov
+
+        n_params = length(truth_params) # "input dim"
+        output_dim = size(Γy, 1)
+        ###
+        ###  Emulate: Gaussian Process Regression
+        ###
+
+        # Emulate-sample settings
+        # choice of machine-learning tool
+        if case == "GP"
+            gppackage = Emulators.GPJL()
+            pred_type = Emulators.YType()
+            mlt = GaussianProcess(
+                gppackage;
+                kernel = nothing, # use default squared exponential kernel
+                prediction_type = pred_type,
+                noise_learn = false,
+            )
+        elseif case ∈ ["RF-scalar", "RF-scalar-diagin"]
+            n_features = 300
+            diagonalize_input = case == "RF-scalar-diagin" ? true : false
+            mlt = ScalarRandomFeatureInterface(
+                n_features,
+                n_params,
+                rng = rng,
+                diagonalize_input = diagonalize_input,
+                optimizer_options = overrides,
+            )
+        elseif case ∈ ["RF-vector-svd-diag", "RF-vector-nosvd-diag", "RF-vector-svd-nondiag", "RF-vector-nosvd-nondiag"]
+            # do we want to assume that the outputs are decorrelated in the machine-learning problem?
+            diagonalize_output = case ∈ ["RF-vector-svd-diag", "RF-vector-nosvd-diag"] ? true : false
+            n_features = 300
+            mlt = VectorRandomFeatureInterface(
+                n_features,
+                n_params,
+                output_dim,
+                rng = rng,
+                diagonalize_output = diagonalize_output, # assume outputs are decorrelated
+                optimizer_options = overrides,
+            )
+        end
 
-        # This requires StatsPlots
-        xs_rows = permutedims(repeat(xs, 1, size(posterior_samples, 1)), (2, 1)) # This hack will enable us to apply the transformation using the full prior
-        xs_rows_unconstrained = transform_constrained_to_unconstrained(priors, xs_rows)
-        plot!(
-            xs,
-            pdf.(prior_plot[param_names[idx]], xs_rows_unconstrained[idx, :]),
-            w = 2.6,
-            color = :blue,
-            lab = "prior",
+        # Standardize the output data
+        # Use median over all data since all data are the same type
+        truth_sample_norm = vcat(truth_sample...)
+        norm_factor = get_standardizing_factors(truth_sample_norm)
+        println(size(norm_factor))
+        #norm_factor = vcat(norm_factor...)
+        norm_factor = fill(norm_factor, size(truth_sample))
+        println("Standardization factors")
+        println(norm_factor)
+
+        # Get training points from the EKP iteration number in the second input term  
+        N_iter = 6 # note we have 6 iterations stored
+        input_output_pairs = Utilities.get_training_points(ekiobj, N_iter - 1) # 1:N-1
+        input_output_pairs_test = Utilities.get_training_points(ekiobj, N_iter:N_iter) # just "next" iteration used for testing
+        # Save data
+        @save joinpath(data_save_directory, "input_output_pairs.jld2") input_output_pairs
+
+        standardize = true
+        retained_svd_frac = 1.0
+        normalized = true
+        # do we want to use SVD to decorrelate outputs
+        decorrelate = case ∈ ["RF-vector-nosvd-diag", "RF-vector-nosvd-nondiag"] ? false : true
+
+        emulator = Emulator(
+            mlt,
+            input_output_pairs;
+            obs_noise_cov = Γy,
+            normalize_inputs = normalized,
+            standardize_outputs = standardize,
+            standardize_outputs_factors = norm_factor,
+            retained_svd_frac = retained_svd_frac,
+            decorrelate = decorrelate,
         )
-        #plot!(xs, mcmc.prior[idx].dist, w=2.6, color=:blue, lab="prior")
-        plot!([truth_params[idx]], seriestype = "vline", w = 2.6, lab = label)
-        plot!(xlims = xbounds)
+        optimize_hyperparameters!(emulator)
+
+        # Check how well the Gaussian Process regression predicts on the
+        # true parameters
+        #if retained_svd_frac==1.0
+        y_mean, y_var = Emulators.predict(emulator, reshape(truth_params, :, 1), transform_to_real = true)
+        y_mean_test, y_var_test =
+            Emulators.predict(emulator, get_inputs(input_output_pairs_test), transform_to_real = true)
+
+        println("ML prediction on true parameters: ")
+        println(vec(y_mean))
+        println("true data: ")
+        println(truth_sample_mean) # same, regardless of norm_factor 
+        println(" ML predicted variance")
+        println(diag(y_var[1], 0))
+        println("ML MSE (truth): ")
+        println(mean((truth_sample_mean - vec(y_mean)) .^ 2))
+        println("ML MSE (next ensemble): ")
+        println(mean((get_outputs(input_output_pairs_test) - y_mean_test) .^ 2))
+
+        #end
+        ###
+        ###  Sample: Markov Chain Monte Carlo
+        ###
+        # initial values
+        u0 = vec(mean(get_inputs(input_output_pairs), dims = 2))
+        println("initial parameters: ", u0)
+
+        # First let's run a short chain to determine a good step size
+        truth_sample = truth_sample
+        mcmc = MCMCWrapper(RWMHSampling(), truth_sample, priors, emulator; init_params = u0)
+        new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 2000, discard_initial = 0)
+
+        # Now begin the actual MCMC
+        println("Begin MCMC - with step size ", new_step)
+        chain = MarkovChainMonteCarlo.sample(mcmc, 100_000; stepsize = new_step, discard_initial = 2_000)
+        posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain)
+
+        post_mean = mean(posterior)
+        post_cov = cov(posterior)
+        println("post_mean")
+        println(post_mean)
+        println("post_cov")
+        println(post_cov)
+        println("D util")
+        println(det(inv(post_cov)))
+        println(" ")
+
+        constrained_truth_params = transform_unconstrained_to_constrained(posterior, truth_params)
+        param_names = get_name(posterior)
+
+        posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns
+        constrained_posterior_samples =
+            mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1)
+
+        gr(dpi = 300, size = (300, 300))
+        p = cornerplot(permutedims(constrained_posterior_samples, (2, 1)), label = param_names, compact = true)
+        plot!(p.subplots[1], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # vline on top histogram
+        plot!(p.subplots[3], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash) # hline on right histogram
+        plot!(p.subplots[2], [constrained_truth_params[1]], seriestype = "vline", w = 1.5, c = :steelblue, ls = :dash) # v & h line on scatter.
+        plot!(p.subplots[2], [constrained_truth_params[2]], seriestype = "hline", w = 1.5, c = :steelblue, ls = :dash)
+        figpath = joinpath(figure_save_directory, "posterior_2d-" * case * ".png")
+        savefig(figpath)
 
-        title!(param)
 
-        figpath = joinpath(figure_save_directory, "posterior_$(param)_" * exp_name * ".png")
-        savefig(figpath)
-        linkfig(figpath)
+        # Save data
+        save(
+            joinpath(data_save_directory, "posterior.jld2"),
+            "posterior",
+            posterior,
+            "input_output_pairs",
+            input_output_pairs,
+            "truth_params",
+            truth_params,
+        )
     end
-
 end
 
 main()
diff --git a/src/CalibrateEmulateSample.jl b/src/CalibrateEmulateSample.jl
index 9f227d0ee..f5956b175 100644
--- a/src/CalibrateEmulateSample.jl
+++ b/src/CalibrateEmulateSample.jl
@@ -14,13 +14,14 @@ import EnsembleKalmanProcesses: EnsembleKalmanProcesses, ParameterDistributions,
 
 export EnsembleKalmanProcesses, ParameterDistributions, Observations, DataContainers
 
-# No internal deps, heavy external deps
-#include("GaussianProcessEmulator.jl")
-include("Emulator.jl")
 
 # Internal deps, light external deps
 include("Utilities.jl")
 
+# No internal deps, heavy external deps
+#include("GaussianProcessEmulator.jl")
+include("Emulator.jl")
+
 # Internal deps, light external deps
 include("MarkovChainMonteCarlo.jl")
 
diff --git a/src/Emulator.jl b/src/Emulator.jl
index e4fa76aa8..6f4d2572d 100644
--- a/src/Emulator.jl
+++ b/src/Emulator.jl
@@ -7,9 +7,11 @@ using Statistics
 using Distributions
 using LinearAlgebra
 using DocStringExtensions
+using Random
 
 export Emulator
 
+export build_models!
 export optimize_hyperparameters!
 export predict
 
@@ -24,7 +26,7 @@ abstract type MachineLearningTool end
 
 # include the different <: ML models
 include("GaussianProcess.jl") #for GaussianProcess
-# include("RandomFeature.jl")
+include("RandomFeature.jl")
 # include("NeuralNetwork.jl")
 # etc.
 
@@ -75,6 +77,8 @@ struct Emulator{FT <: AbstractFloat}
     retained_svd_frac::FT
 end
 
+get_machine_learning_tool(emulator::Emulator) = emulator.machine_learning_tool
+
 # Constructor for the Emulator Object
 function Emulator(
     machine_learning_tool::MachineLearningTool,
@@ -83,6 +87,7 @@ function Emulator(
     normalize_inputs::Bool = true,
     standardize_outputs::Bool = false,
     standardize_outputs_factors::Union{AbstractVector{FT}, Nothing} = nothing,
+    decorrelate::Bool = true,
     retained_svd_frac::FT = 1.0,
 ) where {FT <: AbstractFloat}
 
@@ -118,24 +123,35 @@ function Emulator(
         training_outputs = get_outputs(input_output_pairs)
     end
 
-    # [3.] Decorrelating the outputs [always performed]
-
-    #Transform data if obs_noise_cov available 
-    # (if obs_noise_cov==nothing, transformed_data is equal to data)
-    decorrelated_training_outputs, decomposition =
-        svd_transform(training_outputs, obs_noise_cov, retained_svd_frac = retained_svd_frac)
+    # [3.] Decorrelating the outputs, not performed for vector RF
+    if decorrelate
+        #Transform data if obs_noise_cov available 
+        # (if obs_noise_cov==nothing, transformed_data is equal to data)
+        decorrelated_training_outputs, decomposition =
+            svd_transform(training_outputs, obs_noise_cov, retained_svd_frac = retained_svd_frac)
+
+        # write new pairs structure 
+        if retained_svd_frac < 1.0
+            #note this changes the dimension of the outputs
+            training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs)
+            input_dim, output_dim = size(training_pairs, 1)
+        else
+            training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs)
+        end
 
-    # write new pairs structure 
-    if retained_svd_frac < 1.0
-        #note this changes the dimension of the outputs
-        training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs)
-        input_dim, output_dim = size(training_pairs, 1)
+        # [4.] build an emulator
+        build_models!(machine_learning_tool, training_pairs)
     else
-        training_pairs = PairedDataContainer(training_inputs, decorrelated_training_outputs)
+        if decorrelate || !isa(machine_learning_tool, VectorRandomFeatureInterface)
+            throw(ArgumentError("$machine_learning_tool is incompatible with option Emulator(...,decorrelate = false)"))
+        end
+        decomposition = nothing
+        training_pairs = PairedDataContainer(training_inputs, training_outputs)
+
+        # [4.] build an emulator - providing the noise covariance as a Tikhonov regularizer
+        build_models!(machine_learning_tool, training_pairs, regularization_matrix = obs_noise_cov)
     end
 
-    # [4.] build an emulator
-    build_models!(machine_learning_tool, training_pairs)
 
     return Emulator{FT}(
         machine_learning_tool,
@@ -170,6 +186,7 @@ function predict(
     emulator::Emulator{FT},
     new_inputs::AbstractMatrix{FT};
     transform_to_real = false,
+    vector_rf_unstandardize = true,
 ) where {FT <: AbstractFloat}
     # Check if the size of new_inputs is consistent with the GP model's input
     # dimension. 
@@ -177,7 +194,7 @@ function predict(
 
     N_samples = size(new_inputs, 2)
     size(new_inputs, 1) == input_dim ||
-        throw(ArgumentError("GP object and input observations do not have consistent dimensions"))
+        throw(ArgumentError("Emulator object and input observations do not have consistent dimensions"))
 
     # [1.] normalize
     normalized_new_inputs = normalize(emulator, new_inputs)
@@ -186,28 +203,68 @@ function predict(
     ds_outputs, ds_output_var = predict(emulator.machine_learning_tool, normalized_new_inputs)
 
     # [3.] transform back to real coordinates or remain in decorrelated coordinates
-    if transform_to_real && emulator.decomposition === nothing
-        throw(ArgumentError("""Need SVD decomposition to transform back to original space, 
-                 but GaussianProcess.decomposition == nothing. 
-                 Try setting transform_to_real=false"""))
-    elseif transform_to_real && emulator.decomposition !== nothing
-        #transform back to real coords - cov becomes dense
-        s_outputs, s_output_cov = svd_reverse_transform_mean_cov(ds_outputs, ds_output_var, emulator.decomposition)
-        if output_dim == 1
-            s_output_cov = [s_output_cov[i][1] for i in 1:N_samples]
+    if !isa(get_machine_learning_tool(emulator), VectorRandomFeatureInterface)
+        if transform_to_real && emulator.decomposition === nothing
+            throw(ArgumentError("""Need SVD decomposition to transform back to original space, 
+                     but GaussianProcess.decomposition == nothing. 
+                     Try setting transform_to_real=false"""))
+        elseif transform_to_real && emulator.decomposition !== nothing
+
+            #transform back to real coords - cov becomes dense
+            s_outputs, s_output_cov = svd_reverse_transform_mean_cov(ds_outputs, ds_output_var, emulator.decomposition)
+            if output_dim == 1
+                s_output_cov = reshape([s_output_cov[i][1] for i in 1:N_samples], 1, :)
+            end
+
+            # [4.] unstandardize
+            return reverse_standardize(emulator, s_outputs, s_output_cov)
+        else
+            # remain in decorrelated, standardized coordinates (cov remains diagonal)
+            # Convert to vector of  matrices to match the format  
+            # when transform_to_real=true
+            ds_output_diagvar = vec([Diagonal(ds_output_var[:, j]) for j in 1:N_samples])
+            if output_dim == 1
+                ds_output_diagvar = reshape([ds_output_diagvar[i][1] for i in 1:N_samples], 1, :)
+            end
+            return ds_outputs, ds_output_diagvar
         end
-        # [4.] unstandardize
-        return reverse_standardize(emulator, s_outputs, s_output_cov)
     else
-        # remain in decorrelated, standardized coordinates (cov remains diagonal)
-        # Convert to vector of  matrices to match the format  
-        # when transform_to_real=true
-        ds_output_diagvar = vec([Diagonal(ds_output_var[:, j]) for j in 1:N_samples])
-        if output_dim == 1
-            ds_output_diagvar = [ds_output_diagvar[i][1] for i in 1:N_samples]
+        # create a vector of covariance matrices
+        ds_output_covvec = vec([ds_output_var[:, :, j] for j in 1:N_samples])
+
+        if emulator.decomposition === nothing # without applying SVD    
+            if vector_rf_unstandardize
+                # [4.] unstandardize
+                return reverse_standardize(emulator, ds_outputs, ds_output_covvec)
+            else
+                return ds_outputs, ds_output_covvec
+            end
+
+        else #if we applied SVD               
+            if transform_to_real # ...and want to return coordinates
+                s_outputs, s_output_cov =
+                    svd_reverse_transform_mean_cov(ds_outputs, ds_output_covvec, emulator.decomposition)
+                if output_dim == 1
+                    s_output_cov = reshape([s_output_cov[i][1] for i in 1:N_samples], 1, :)
+                end
+                # [4.] unstandardize
+                return reverse_standardize(emulator, s_outputs, s_output_cov)
+            else #... and want to stay in decorrelated standardized coordinates
+                diag_out_flag = get_diagonalize_output(get_machine_learning_tool(emulator))
+                if diag_out_flag
+                    ds_output_diagvar = vec([Diagonal(ds_output_covvec[j]) for j in 1:N_samples]) # extracts diagonal
+                    if output_dim == 1
+                        ds_output_diagvar = ([ds_output_covvec[i][1, 1] for i in 1:N_samples], 1, :) # extracts value
+                    end
+                    return ds_outputs, ds_output_diagvar
+                else
+                    return ds_outputs, ds_output_covvec
+                end
+            end
         end
-        return ds_outputs, ds_output_diagvar
+
     end
+
 end
 
 # Normalization, Standardization, and Decorrelation
@@ -244,10 +301,14 @@ $(DocStringExtensions.TYPEDSIGNATURES)
 Standardize with a vector of factors (size equal to output dimension).
 """
 function standardize(
-    outputs::AbstractVecOrMat{FT},
-    output_covs::Vector{<:Union{AbstractMatrix{FT}, UniformScaling{FT}}},
-    factors::AbstractVector{FT},
-) where {FT <: AbstractFloat}
+    outputs::VorM,
+    output_covs::VofMUS,
+    factors::V,
+) where {
+    VorM <: AbstractVecOrMat,
+    VofMUS <: Union{AbstractVector{<:AbstractMatrix}, AbstractVector{<:UniformScaling}},
+    V <: AbstractVector,
+}
     # Case where `output_covs` is a Vector of covariance matrices
     n = length(factors)
     outputs = outputs ./ factors
@@ -273,10 +334,15 @@ function standardize(
 end
 
 function standardize(
-    outputs::AbstractVecOrMat{FT},
-    output_cov::Union{AbstractMatrix{FT}, UniformScaling{FT}},
-    factors::AbstractVector{FT},
-) where {FT <: AbstractFloat}
+    outputs::VorM,
+    output_cov::MorVorUS,
+    factors::V,
+) where {
+    VorM <: AbstractVecOrMat,
+    MorVorUS <: Union{AbstractMatrix, AbstractVector{<:AbstractFloat}, UniformScaling}, # must be vector of floats or could cause inf. loop with other method definition.
+    V <: AbstractVector,
+}
+
     # Case where `output_cov` is a single covariance matrix
     stdized_out, stdized_covs = standardize(outputs, [output_cov], factors)
     return stdized_out, stdized_covs[1]
@@ -291,9 +357,13 @@ dimension). `output_cov` is a Vector of covariance matrices, such as is returned
 """
 function reverse_standardize(
     emulator::Emulator{FT},
-    outputs::AbstractVecOrMat{FT},
-    output_covs::Union{AbstractMatrix{FT}, Vector{<:AbstractMatrix{FT}}},
-) where {FT <: AbstractFloat}
+    outputs::VorM,
+    output_covs::VorMtwo,
+) where {
+    VorM <: AbstractVecOrMat,
+    VorMtwo <: AbstractVecOrMat, # can be different type
+    FT <: AbstractFloat,
+}
     if emulator.standardize_outputs
         return standardize(outputs, output_covs, 1.0 ./ emulator.standardize_outputs_factors)
     else
@@ -375,6 +445,7 @@ $(DocStringExtensions.TYPEDSIGNATURES)
 Transform the mean and covariance back to the original (correlated) coordinate system
   - `μ` - predicted mean; size *output\\_dim* × *N\\_predicted\\_points*.
   - `σ2` - predicted variance; size *output\\_dim* × *N\\_predicted\\_points*.
+         - predicted covariance; size *N\\_predicted\\_points* array of size *output\\_dim* × *output\\_dim*.
   - `decomposition` - SVD decomposition of *obs\\_noise\\_cov*.
 
 Returns the transformed mean (size *output\\_dim* × *N\\_predicted\\_points*) and variance. 
@@ -386,12 +457,20 @@ each element is a matrix of size *output\\_dim* × *output\\_dim*.
 """
 function svd_reverse_transform_mean_cov(
     μ::AbstractMatrix{FT},
-    σ2::AbstractMatrix{FT},
+    σ2::AbstractVector,# vector of output_dim x output_dim matrices
     decomposition::SVD,
 ) where {FT <: AbstractFloat}
-    @assert size(μ) == size(σ2)
 
-    output_dim, N_predicted_points = size(σ2)
+    N_predicted_points = length(σ2)
+    if !(eltype(σ2) <: AbstractMatrix)
+        throw(
+            ArgumentError(
+                "input σ2 must be a vector of eltype <: AbstractMatrix, instead  eltype(σ2) = ",
+                string(eltype(σ2)),
+            ),
+        )
+    end
+
     # We created meanvGP = D_inv * Vt * mean_v so meanv = V * D * meanvGP
     sqrt_singular_values = Diagonal(sqrt.(decomposition.S))
     transformed_μ = decomposition.V * sqrt_singular_values * μ
@@ -400,13 +479,27 @@ function svd_reverse_transform_mean_cov(
     # Back transformation
 
     for j in 1:N_predicted_points
-        σ2_j = decomposition.V * sqrt_singular_values * Diagonal(σ2[:, j]) * sqrt_singular_values * decomposition.Vt
+        σ2_j = decomposition.V * sqrt_singular_values * σ2[j] * sqrt_singular_values * decomposition.Vt
         transformed_σ2[j] = Symmetric(σ2_j)
     end
 
     return transformed_μ, transformed_σ2
+
 end
 
+function svd_reverse_transform_mean_cov(
+    μ::AbstractMatrix{FT},
+    σ2::AbstractMatrix{FT},
+    decomposition::SVD,
+) where {FT <: AbstractFloat}
+    @assert size(μ) == size(σ2)
+    output_dim, N_predicted_points = size(σ2)
+    σ2_as_cov = vec([Diagonal(σ2[:, j]) for j in 1:N_predicted_points])
+
+    return svd_reverse_transform_mean_cov(μ, σ2_as_cov, decomposition)
+end
+
+
 
 
 end
diff --git a/src/MarkovChainMonteCarlo.jl b/src/MarkovChainMonteCarlo.jl
index 1abb1134b..2e376b32e 100644
--- a/src/MarkovChainMonteCarlo.jl
+++ b/src/MarkovChainMonteCarlo.jl
@@ -165,7 +165,10 @@ function EmulatorPosteriorModel(
             # Vector of N_samples covariance matrices. For MH, N_samples is always 1, so we 
             # have to reshape()/re-cast input/output; simpler to do here than add a 
             # predict() method.
-            g, g_cov = Emulators.predict(em, reshape(θ, :, 1), transform_to_real = false)
+            g, g_cov =
+                Emulators.predict(em, reshape(θ, :, 1), transform_to_real = false, vector_rf_unstandardize = false)
+            #TODO vector_rf will always unstandardize, but other methods will not, so we require this additional flag.
+
             if isa(g_cov[1], Real)
                 return logpdf(MvNormal(obs_sample, g_cov[1] * I), vec(g)) + get_logpdf(prior, θ)
             else
diff --git a/src/RandomFeature.jl b/src/RandomFeature.jl
new file mode 100644
index 000000000..3465d2c0d
--- /dev/null
+++ b/src/RandomFeature.jl
@@ -0,0 +1,661 @@
+
+using RandomFeatures
+const RF = RandomFeatures
+using EnsembleKalmanProcesses
+const EKP = EnsembleKalmanProcesses
+using ..ParameterDistributions
+using ..Utilities
+using StableRNGs
+using Random
+
+abstract type RandomFeatureInterface <: MachineLearningTool end
+
+abstract type MultithreadType end
+struct TullioThreading <: MultithreadType end
+struct EnsembleThreading <: MultithreadType end
+
+#common functions
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Converts a flat array into a cholesky factor.
+"""
+function flat_to_chol(x::V) where {V <: AbstractVector}
+    choldim = Int(floor(sqrt(2 * length(x))))
+    cholmat = zeros(choldim, choldim)
+    for i in 1:choldim
+        for j in 1:i
+            cholmat[i, j] = x[sum(0:(i - 1)) + j]
+        end
+    end
+    return cholmat
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Builds Covariance matrices from a flat array of hyperparameters
+"""
+function hyperparameters_from_flat(
+    x::VV,
+    input_dim::Int,
+    output_dim::Int,
+    diagonalize_input::Bool,
+    diagonalize_output::Bool,
+) where {VV <: AbstractVector}
+    # if dim = 1 then parameters are a 1x1 matrix
+    # if we diagonalize then parameters are diagonal entries + constant
+    # if we don't diagonalize then parameters are a cholesky product + constant on diagonal.
+
+    #input space
+    if input_dim == 1
+        in_max = 1
+        U = x[in_max] * ones(1, 1)
+    elseif diagonalize_input
+        in_max = input_dim + 2
+        U = convert(Matrix, x[in_max - 1] * (Diagonal(x[1:(in_max - 2)]) + x[in_max] * I))
+    elseif !diagonalize_input
+        in_max = Int(input_dim * (input_dim + 1) / 2) + 2
+        cholU = flat_to_chol(x[1:(in_max - 2)])
+        U = x[in_max - 1] * (cholU * permutedims(cholU, (2, 1)) + x[in_max] * I)
+    end
+
+    # output_space    
+    out_min = in_max + 1
+    if output_dim == 1
+        V = x[end] * ones(1, 1)
+    elseif diagonalize_output
+        V = convert(Matrix, x[end - 1] * (Diagonal(x[out_min:(end - 2)]) + x[end] * I))
+    elseif !diagonalize_output
+        cholV = flat_to_chol(x[out_min:(end - 2)])
+        V = x[end - 1] * (cholV * permutedims(cholV, (2, 1)) + x[end] * I)
+    end
+
+    # sometimes this process does not give a (numerically) symmetric matrix
+    U = 0.5 * (U + permutedims(U, (2, 1)))
+    V = 0.5 * (V + permutedims(V, (2, 1)))
+
+    return U, V
+
+end
+
+function hyperparameters_from_flat(x::VV, input_dim::Int, diagonalize_input::Bool) where {VV <: AbstractVector}
+    output_dim = 1
+    diagonalize_output = false
+    U, V = hyperparameters_from_flat(x, input_dim, output_dim, diagonalize_input, diagonalize_output)
+    # Note that the scalar setting has a slight inconsistency from the 1-D output vector case
+    # We correct here by rescaling U using the single hyperparameter in V
+    return V[1, 1] * U
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Builds a prior over the hyperparameters (i.e. the cholesky/diagaonal or individaul entries of the input/output covariances).
+For example, the case where the input covariance ``U = γ_1 * (LL^T + γ_2 I)``, 
+we set priors for the entries of the lower triangular matrix  ``L`` as normal, and constant scalings ``γ_i`` as log-normal to retain positivity.
+"""
+function build_default_prior(input_dim::Int, output_dim::Int, diagonalize_input::Bool, diagonalize_output::Bool)
+    n_hp_in = n_hyperparameters_cov(input_dim, diagonalize_input)
+    n_hp_out = n_hyperparameters_cov(output_dim, diagonalize_output)
+
+    # if dim = 1 , positive constant
+    # elseif diagonalized, positive on diagonal and positive scalings
+    # else chol factor, and positive scalings
+    if input_dim > 1
+        if diagonalize_input
+            param_diag = constrained_gaussian("input_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_hp_in - 2)
+            param_scaling = constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2)
+            input_prior = combine_distributions([param_diag, param_scaling])
+        else
+            param_chol = constrained_gaussian("input_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_hp_in - 2)
+            param_scaling = constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2)
+            input_prior = combine_distributions([param_chol, param_scaling])
+        end
+    else
+        input_prior = constrained_gaussian("input_prior", 1.0, 1.0, 0.0, Inf)
+    end
+
+    if output_dim > 1
+        if diagonalize_output
+            param_diag = constrained_gaussian("output_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_hp_out - 2)
+            param_scaling = constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2)
+            output_prior = combine_distributions([param_diag, param_scaling])
+        else
+            param_chol = constrained_gaussian("output_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_hp_out - 2)
+            param_scaling = constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2)
+            output_prior = combine_distributions([param_chol, param_scaling])
+        end
+    else
+        output_prior = constrained_gaussian("output_prior", 1.0, 1.0, 0.0, Inf)
+    end
+
+    return combine_distributions([input_prior, output_prior])
+
+end
+
+function build_default_prior(input_dim::Int, diagonalize_input::Bool)
+    #scalar case consistent with vector case
+    output_dim = 1
+    diagonalize_output = false
+    return build_default_prior(input_dim, output_dim, diagonalize_input, diagonalize_output)
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+The number of hyperparameters required to characterize covariance (determined from `hyperparameters_from_flat`).
+"""
+function n_hyperparameters_cov(dim::Int, diagonalize::Bool)
+    n_hp = 0
+    n_hp += diagonalize ? dim : Int(dim * (dim + 1) / 2)     # inputs: diagonal or cholesky
+    n_hp += (dim > 1) ? 2 : 0     # add two more for constant diagonal in each case.
+    return n_hp
+end
+
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Calculate number of hyperparameters required to create the default prior in the given input/output space (determined from `hyperparameters_from_flat`)
+"""
+function calculate_n_hyperparameters(input_dim::Int, output_dim::Int, diagonalize_input::Bool, diagonalize_output::Bool)
+    n_hp_in = n_hyperparameters_cov(input_dim, diagonalize_input)
+    n_hp_out = n_hyperparameters_cov(output_dim, diagonalize_output)
+    return n_hp_in + n_hp_out
+end
+
+function calculate_n_hyperparameters(input_dim::Int, diagonalize_input::Bool)
+    #scalar case consistent with vector case
+    output_dim = 1
+    diagonalize_output = false
+    return calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output)
+end
+
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Calculates key quantities (mean, covariance, and coefficients) of the objective function to be optimized for hyperparameter training. Buffers: mean_store, cov_store, buffer are provided to aid memory management.
+"""
+function calculate_mean_cov_and_coeffs(
+    rfi::RFI,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUSorD,
+    n_features::Int,
+    n_train::Int,
+    n_test::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    io_pairs::PairedDataContainer,
+    decomp_type::S,
+    mean_store::M,
+    cov_store::A,
+    buffer::A,
+    multithread_type::MT,
+) where {
+    RFI <: RandomFeatureInterface,
+    RNG <: AbstractRNG,
+    ForVM <: Union{AbstractFloat, AbstractVecOrMat},
+    S <: AbstractString,
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+    M <: AbstractMatrix{<:AbstractFloat},
+    A <: AbstractArray{<:AbstractFloat, 3},
+    MT <: MultithreadType,
+}
+
+    # split data into train/test 
+    itrain = get_inputs(io_pairs)[:, 1:n_train]
+    otrain = get_outputs(io_pairs)[:, 1:n_train]
+    io_train_cost = PairedDataContainer(itrain, otrain)
+    itest = get_inputs(io_pairs)[:, (n_train + 1):end]
+    otest = get_outputs(io_pairs)[:, (n_train + 1):end]
+    input_dim = size(itrain, 1)
+    output_dim = size(otrain, 1)
+
+    # build and fit the RF
+    rfm = RFM_from_hyperparameters(
+        rfi,
+        rng,
+        l,
+        regularization,
+        n_features,
+        batch_sizes,
+        input_dim,
+        output_dim,
+        multithread_type,
+    )
+    fitted_features = RF.Methods.fit(rfm, io_train_cost, decomposition_type = decomp_type)
+
+    #we want to calc 1/var(y-mean)^2 + lambda/m * coeffs^2 in the end
+    thread_opt = isa(multithread_type, TullioThreading)
+    RF.Methods.predict!(
+        rfm,
+        fitted_features,
+        DataContainer(itest),
+        mean_store,
+        cov_store,
+        buffer,
+        tullio_threading = thread_opt,
+    )
+    # sizes (output_dim x n_test), (output_dim x output_dim x n_test) 
+    scaled_coeffs = sqrt(1 / n_features) * RF.Methods.get_coeffs(fitted_features)
+
+    # we add the additional complexity term
+    # TODO only cholesky and SVD available
+    if decomp_type == "cholesky"
+        chol_fac = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).L
+        complexity = 2 * sum(log(chol_fac[i, i]) for i in 1:size(chol_fac, 1))
+    else
+        svd_singval = RF.Methods.get_decomposition(RF.Methods.get_feature_factors(fitted_features)).S
+        complexity = sum(log, svd_singval) # note this is log(abs(det))
+    end
+    return scaled_coeffs, complexity
+
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Estimation of objective function (marginal log-likelihood) variability for optimization of hyperparameters. Calculated empirically by calling many samples of the objective at a "reasonable" value. `multithread_type` is used as dispatch, either `ensemble` (thread over the empirical samples) or `tullio` (serial samples, and thread the linear algebra within each sample)
+"""
+function estimate_mean_and_coeffnorm_covariance(
+    rfi::RFI,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUSorD,
+    n_features::Int,
+    n_train::Int,
+    n_test::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    io_pairs::PairedDataContainer,
+    n_samples::Int,
+    decomp_type::S,
+    multithread_type::TullioThreading;
+    repeats::Int = 1,
+) where {
+    RFI <: RandomFeatureInterface,
+    RNG <: AbstractRNG,
+    ForVM <: Union{AbstractFloat, AbstractVecOrMat},
+    S <: AbstractString,
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+}
+
+    output_dim = size(get_outputs(io_pairs), 1)
+
+    means = zeros(output_dim, n_samples, n_test)
+    mean_of_covs = zeros(output_dim, output_dim, n_test)
+    moc_tmp = similar(mean_of_covs)
+    mtmp = zeros(output_dim, n_test)
+    buffer = zeros(n_test, output_dim, n_features)
+    complexity = zeros(1, n_samples)
+    coeffl2norm = zeros(1, n_samples)
+    println("estimate cov with " * string(n_samples * repeats) * " iterations...")
+
+    for i in ProgressBar(1:n_samples)
+        for j in 1:repeats
+            c, cplxty = calculate_mean_cov_and_coeffs(
+                rfi,
+                rng,
+                l,
+                regularization,
+                n_features,
+                n_train,
+                n_test,
+                batch_sizes,
+                io_pairs,
+                decomp_type,
+                mtmp,
+                moc_tmp,
+                buffer,
+                multithread_type,
+            )
+
+            # m output_dim x n_test
+            # v output_dim x output_dim x n_test
+            # c n_features
+            # cplxty 1
+
+            # update vbles needed for cov
+            means[:, i, :] .+= mtmp ./ repeats
+            coeffl2norm[1, i] += sqrt(sum(abs2, c)) / repeats
+            complexity[1, i] += cplxty / repeats
+
+            # update vbles needed for mean
+            @. mean_of_covs += moc_tmp / (repeats * n_samples)
+
+        end
+    end
+    means = permutedims(means, (3, 2, 1))
+    mean_of_covs = permutedims(mean_of_covs, (3, 1, 2))
+
+    approx_σ2 = zeros(n_test * output_dim, n_test * output_dim)
+    blockmeans = zeros(n_test * output_dim, n_samples)
+    for i in 1:n_test
+        id = ((i - 1) * output_dim + 1):(i * output_dim)
+        approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2.
+        blockmeans[id, :] = permutedims(means[i, :, :], (2, 1))
+    end
+
+    if !isposdef(approx_σ2)
+        println("approx_σ2 not posdef")
+        approx_σ2 = posdef_correct(approx_σ2)
+    end
+
+    Γ = cov(vcat(blockmeans, coeffl2norm, complexity), dims = 2)
+
+
+    return Γ, approx_σ2
+
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Evaluate an objective function (marginal log-likelihood) over an ensemble of hyperparameter values (used for EKI). `multithread_type` is used as dispatch, either `ensemble` (thread over the ensemble members) or `tullio` (serial samples, and thread the linear algebra within each sample)
+"""
+function calculate_ensemble_mean_and_coeffnorm(
+    rfi::RFI,
+    rng::RNG,
+    lvecormat::VorM,
+    regularization::MorUSorD,
+    n_features::Int,
+    n_train::Int,
+    n_test::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    io_pairs::PairedDataContainer,
+    decomp_type::S,
+    multithread_type::TullioThreading;
+    repeats::Int = 1,
+) where {
+    RFI <: RandomFeatureInterface,
+    RNG <: AbstractRNG,
+    VorM <: AbstractVecOrMat,
+    S <: AbstractString,
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+}
+    if isa(lvecormat, AbstractVector)
+        lmat = reshape(lvecormat, 1, :)
+    else
+        lmat = lvecormat
+    end
+    N_ens = size(lmat, 2)
+    output_dim = size(get_outputs(io_pairs), 1)
+
+    means = zeros(output_dim, N_ens, n_test)
+    mean_of_covs = zeros(output_dim, output_dim, n_test)
+    buffer = zeros(n_test, output_dim, n_features)
+    complexity = zeros(1, N_ens)
+    coeffl2norm = zeros(1, N_ens)
+    moc_tmp = similar(mean_of_covs)
+    mtmp = zeros(output_dim, n_test)
+
+    println("calculating " * string(N_ens * repeats) * " ensemble members...")
+
+    for i in ProgressBar(1:N_ens)
+        for j in collect(1:repeats)
+            l = lmat[:, i]
+
+            c, cplxty = calculate_mean_cov_and_coeffs(
+                rfi,
+                rng,
+                l,
+                regularization,
+                n_features,
+                n_train,
+                n_test,
+                batch_sizes,
+                io_pairs,
+                decomp_type,
+                mtmp,
+                moc_tmp,
+                buffer,
+                multithread_type,
+            )
+            # m output_dim x n_test
+            # v output_dim x output_dim x n_test
+            # c n_features
+            means[:, i, :] += mtmp ./ repeats
+            @. mean_of_covs += moc_tmp / (repeats * N_ens)
+            coeffl2norm[1, i] += sqrt(sum(c .^ 2)) / repeats
+            complexity[1, i] += cplxty / repeats
+        end
+    end
+    means = permutedims(means, (3, 2, 1))
+    mean_of_covs = permutedims(mean_of_covs, (3, 1, 2))
+    blockcovmat = zeros(n_test * output_dim, n_test * output_dim)
+    blockmeans = zeros(n_test * output_dim, N_ens)
+    for i in 1:n_test
+        id = ((i - 1) * output_dim + 1):(i * output_dim)
+        blockcovmat[id, id] = mean_of_covs[i, :, :]
+        blockmeans[id, :] = permutedims(means[i, :, :], (2, 1))
+    end
+
+    if !isposdef(blockcovmat)
+        println("blockcovmat not posdef")
+        blockcovmat = posdef_correct(blockcovmat)
+    end
+
+    return vcat(blockmeans, coeffl2norm, complexity), blockcovmat
+
+end
+
+
+function estimate_mean_and_coeffnorm_covariance(
+    rfi::RFI,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUSorD,
+    n_features::Int,
+    n_train::Int,
+    n_test::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    io_pairs::PairedDataContainer,
+    n_samples::Int,
+    decomp_type::S,
+    multithread_type::EnsembleThreading;
+    repeats::Int = 1,
+) where {
+    RFI <: RandomFeatureInterface,
+    RNG <: AbstractRNG,
+    ForVM <: Union{AbstractFloat, AbstractVecOrMat},
+    S <: AbstractString,
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+}
+
+    output_dim = size(get_outputs(io_pairs), 1)
+
+    println("estimate cov with " * string(n_samples * repeats) * " iterations...")
+
+    nthreads = Threads.nthreads()
+    rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5
+    rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads]
+
+    c_list = [zeros(1, n_samples) for i in 1:nthreads]
+    cp_list = [zeros(1, n_samples) for i in 1:nthreads]
+    m_list = [zeros(output_dim, n_samples, n_test) for i in 1:nthreads]
+    moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads]
+    moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads]
+    mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads]
+    buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads]
+
+
+    Threads.@threads for i in ProgressBar(1:n_samples)
+        tid = Threads.threadid()
+        rngtmp = rng_list[tid]
+        mtmp = mtmp_list[tid]
+        moc_tmp = moc_tmp_list[tid]
+        buffer = buffer_list[tid]
+        for j in 1:repeats
+
+            c, cplxty = calculate_mean_cov_and_coeffs(
+                rfi,
+                rngtmp,
+                l,
+                regularization,
+                n_features,
+                n_train,
+                n_test,
+                batch_sizes,
+                io_pairs,
+                decomp_type,
+                mtmp,
+                moc_tmp,
+                buffer,
+                multithread_type,
+            )
+
+
+            # m output_dim x n_test
+            # v output_dim x output_dim x n_test
+            # c n_features
+            # cplxty 1
+
+            # update vbles needed for cov
+            # update vbles needed for cov
+            m_list[tid][:, i, :] += mtmp ./ repeats
+            c_list[tid][1, i] += sqrt(sum(abs2, c)) / repeats
+            cp_list[tid][1, i] += cplxty / repeats
+
+            # update vbles needed for mean
+            @. moc_list[tid] += moc_tmp ./ (repeats * n_samples)
+
+        end
+    end
+
+    #put back together after threading
+    mean_of_covs = sum(moc_list)
+    means = sum(m_list)
+    coeffl2norm = sum(c_list)
+    complexity = sum(cp_list)
+    means = permutedims(means, (3, 2, 1))
+    mean_of_covs = permutedims(mean_of_covs, (3, 1, 2))
+
+    approx_σ2 = zeros(n_test * output_dim, n_test * output_dim)
+    blockmeans = zeros(n_test * output_dim, n_samples)
+    for i in 1:n_test
+        id = ((i - 1) * output_dim + 1):(i * output_dim)
+        approx_σ2[id, id] = mean_of_covs[i, :, :] # this ordering, so we can take a mean/cov in dims = 2.
+        blockmeans[id, :] = permutedims(means[i, :, :], (2, 1))
+    end
+
+    if !isposdef(approx_σ2)
+        println("approx_σ2 not posdef")
+        approx_σ2 = posdef_correct(approx_σ2)
+    end
+
+    Γ = cov(vcat(blockmeans, coeffl2norm, complexity), dims = 2)
+    return Γ, approx_σ2
+
+end
+
+function calculate_ensemble_mean_and_coeffnorm(
+    rfi::RFI,
+    rng::RNG,
+    lvecormat::VorM,
+    regularization::MorUSorD,
+    n_features::Int,
+    n_train::Int,
+    n_test::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    io_pairs::PairedDataContainer,
+    decomp_type::S,
+    multithread_type::EnsembleThreading;
+    repeats::Int = 1,
+) where {
+    RFI <: RandomFeatureInterface,
+    RNG <: AbstractRNG,
+    VorM <: AbstractVecOrMat,
+    S <: AbstractString,
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+}
+    if isa(lvecormat, AbstractVector)
+        lmat = reshape(lvecormat, 1, :)
+    else
+        lmat = lvecormat
+    end
+    N_ens = size(lmat, 2)
+    output_dim = size(get_outputs(io_pairs), 1)
+
+
+
+    println("calculating " * string(N_ens * repeats) * " ensemble members...")
+
+    nthreads = Threads.nthreads()
+
+    means = zeros(output_dim, N_ens, n_test)
+    complexity = zeros(1, N_ens)
+    coeffl2norm = zeros(1, N_ens)
+
+    c_list = [zeros(1, N_ens) for i in 1:nthreads]
+    cp_list = [zeros(1, N_ens) for i in 1:nthreads]
+    m_list = [zeros(output_dim, N_ens, n_test) for i in 1:nthreads]
+    moc_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads]
+    buffer_list = [zeros(n_test, output_dim, n_features) for i in 1:nthreads]
+    moc_tmp_list = [zeros(output_dim, output_dim, n_test) for i in 1:nthreads]
+    mtmp_list = [zeros(output_dim, n_test) for i in 1:nthreads]
+    rng_seed = randperm(rng, 10^5)[1] # dumb way to get a random integer in 1:10^5
+    rng_list = [Random.MersenneTwister(rng_seed + i) for i in 1:nthreads]
+    Threads.@threads for i in ProgressBar(1:N_ens)
+        tid = Threads.threadid()
+        rngtmp = rng_list[tid]
+        mtmp = mtmp_list[tid]
+        moc_tmp = moc_tmp_list[tid]
+        buffer = buffer_list[tid]
+        l = lmat[:, i]
+        for j in collect(1:repeats)
+
+            c, cplxty = calculate_mean_cov_and_coeffs(
+                rfi,
+                rngtmp,
+                l,
+                regularization,
+                n_features,
+                n_train,
+                n_test,
+                batch_sizes,
+                io_pairs,
+                decomp_type,
+                mtmp,
+                moc_tmp,
+                buffer,
+                multithread_type,
+            )
+
+            # m output_dim x n_test
+            # v output_dim x output_dim x n_test
+            # c n_features
+            m_list[tid][:, i, :] += mtmp ./ repeats
+            @. moc_list[tid] += moc_tmp ./ (repeats * N_ens)
+            c_list[tid][1, i] += sqrt(sum(c .^ 2)) / repeats
+            cp_list[tid][1, i] += cplxty / repeats
+
+        end
+    end
+
+    # put back together after threading
+    mean_of_covs = sum(moc_list)
+    means = sum(m_list)
+    coeffl2norm = sum(c_list)
+    complexity = sum(cp_list)
+    means = permutedims(means, (3, 2, 1))
+    mean_of_covs = permutedims(mean_of_covs, (3, 1, 2))
+    blockcovmat = zeros(n_test * output_dim, n_test * output_dim)
+    blockmeans = zeros(n_test * output_dim, N_ens)
+    for i in 1:n_test
+        id = ((i - 1) * output_dim + 1):(i * output_dim)
+        blockcovmat[id, id] = mean_of_covs[i, :, :]
+        blockmeans[id, :] = permutedims(means[i, :, :], (2, 1))
+    end
+
+    if !isposdef(blockcovmat)
+        println("blockcovmat not posdef")
+        blockcovmat = posdef_correct(blockcovmat)
+    end
+
+    return vcat(blockmeans, coeffl2norm, complexity), blockcovmat
+
+end
+
+include("ScalarRandomFeature.jl")
+include("VectorRandomFeature.jl")
diff --git a/src/ScalarRandomFeature.jl b/src/ScalarRandomFeature.jl
new file mode 100644
index 000000000..dbc26d11b
--- /dev/null
+++ b/src/ScalarRandomFeature.jl
@@ -0,0 +1,437 @@
+export ScalarRandomFeatureInterface
+# getters already exported in VRFI
+
+"""
+$(DocStringExtensions.TYPEDEF)
+
+Structure holding the Scalar Random Feature models. 
+
+# Fields
+$(DocStringExtensions.TYPEDFIELDS)
+
+"""
+struct ScalarRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG} <: RandomFeatureInterface
+    "vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization"
+    rfms::Vector{RF.Methods.RandomFeatureMethod}
+    "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data"
+    fitted_features::Vector{RF.Methods.Fit}
+    "batch sizes"
+    batch_sizes::Union{Dict{S, Int}, Nothing}
+    "n_features"
+    n_features::Union{Int, Nothing}
+    "input dimension"
+    input_dim::Int
+    "choice of random number generator"
+    rng::RNG
+    "Assume inputs are decorrelated for ML"
+    diagonalize_input::Bool
+    "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)"
+    feature_decomposition::S
+    "dictionary of options for hyperparameter optimizer"
+    optimizer_options::Dict{S}
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the rfms field
+"""
+get_rfms(srfi::ScalarRandomFeatureInterface) = srfi.rfms
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the fitted_features field
+"""
+get_fitted_features(srfi::ScalarRandomFeatureInterface) = srfi.fitted_features
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets batch_sizes the field
+"""
+get_batch_sizes(srfi::ScalarRandomFeatureInterface) = srfi.batch_sizes
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the n_features field
+"""
+get_n_features(srfi::ScalarRandomFeatureInterface) = srfi.n_features
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the input_dim field
+"""
+get_input_dim(srfi::ScalarRandomFeatureInterface) = srfi.input_dim
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the rng field
+"""
+get_rng(srfi::ScalarRandomFeatureInterface) = srfi.rng
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the diagonalize_input field
+"""
+get_diagonalize_input(srfi::ScalarRandomFeatureInterface) = srfi.diagonalize_input
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the feature_decomposition field
+"""
+get_feature_decomposition(srfi::ScalarRandomFeatureInterface) = srfi.feature_decomposition
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+gets the optimizer_options field
+"""
+get_optimizer_options(srfi::ScalarRandomFeatureInterface) = srfi.optimizer_options
+
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Constructs a `ScalarRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and single- (or decorrelated-)output emulators.
+ - `n_features` - the number of random features
+ - `input_dim` - the dimension of the input space
+ - `diagonalize_input = false` - whether to assume independence in input space (Note, whens set `true`, this tool will approximate directly the behaviour of the scalar-valued GaussianProcess with ARD kernel)
+ - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there)
+ - `rng = Random.GLOBAL_RNG` - random number generator 
+ - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available
+ - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `ScalarRandomFeatureInterface` constructor):
+     - "prior":  the prior for the hyperparameter optimization 
+     - "n_ensemble":  number of ensemble members
+     - "n_iteration":  number of eki iterations
+     - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 1.0)
+     - "tikhonov":  tikhonov regularization parameter if >0
+     - "inflation":  additive inflation ∈ [0,1] with 0 being no inflation
+     - "train_fraction":  e.g. 0.8 (default)  means 80:20 train - test split
+     - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra
+     - "verbose" => false, verbose optimizer statements
+"""
+function ScalarRandomFeatureInterface(
+    n_features::Int,
+    input_dim::Int;
+    diagonalize_input::Bool = false,
+    batch_sizes::Union{Dict{S, Int}, Nothing} = nothing,
+    rng::RNG = Random.GLOBAL_RNG,
+    feature_decomposition::S = "cholesky",
+    optimizer_options::Union{Dict{S}, Nothing} = nothing,
+) where {S <: AbstractString, RNG <: AbstractRNG}
+    # Initialize vector for GP models
+    rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0)
+    fitted_features = Vector{RF.Methods.Fit}(undef, 0)
+
+
+    n_hp = calculate_n_hyperparameters(input_dim, diagonalize_input)
+    prior = build_default_prior(input_dim, diagonalize_input)
+
+    # default optimizer settings
+    optimizer_opts = Dict(
+        "prior" => prior, #the hyperparameter_prior
+        "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble
+        "n_iteration" => 5, # number of eki iterations
+        "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme
+        "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation
+        "train_fraction" => 0.8, # 80:20 train - test split
+        "multithread" => "ensemble", # instead of "tullio"
+        "verbose" => false, # verbose optimizer statements
+    )
+
+    if !isnothing(optimizer_options)
+        for key in keys(optimizer_options)
+            optimizer_opts[key] = optimizer_options[key]
+        end
+    end
+
+    opt_tmp = Dict()
+    for key in keys(optimizer_opts)
+        if key != "prior"
+            opt_tmp[key] = optimizer_opts[key]
+        end
+    end
+    @info("hyperparameter optimization with EKI configured with $opt_tmp")
+
+    return ScalarRandomFeatureInterface{S, RNG}(
+        rfms,
+        fitted_features,
+        batch_sizes,
+        n_features,
+        input_dim,
+        rng,
+        diagonalize_input,
+        feature_decomposition,
+        optimizer_opts,
+    )
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Builds the random feature method from hyperparameters. We use cosine activation functions and a MatrixVariateNormal(M,U,V) distribution (from `Distributions.jl`) with mean M=0, and input covariance U built from cholesky factorization or diagonal components based on the diagonalize_input flag.
+"""
+function RFM_from_hyperparameters(
+    srfi::ScalarRandomFeatureInterface,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUSorD, # just a 1x1 matrix though
+    n_features::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    input_dim::Int,
+    multithread_type::MT;
+) where {
+    RNG <: AbstractRNG,
+    ForVM <: Union{Real, AbstractVecOrMat},
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+    S <: AbstractString,
+    MT <: MultithreadType,
+}
+    diagonalize_input = get_diagonalize_input(srfi)
+
+    M = zeros(input_dim) #scalar output
+    U = hyperparameters_from_flat(l, input_dim, diagonalize_input)
+    if !isposdef(U)
+        println("U not posdef - correcting")
+        U = posdef_correct(U)
+    end
+
+    dist = MvNormal(M, U)
+    pd = ParameterDistribution(
+        Dict(
+            "distribution" => Parameterized(dist),
+            "constraint" => repeat([no_constraint()], input_dim),
+            "name" => "xi",
+        ),
+    )
+    feature_sampler = RF.Samplers.FeatureSampler(pd, rng = rng)
+    # Learn hyperparameters for different feature types
+
+    sf = RF.Features.ScalarFourierFeature(n_features, feature_sampler)
+    thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio
+    if isnothing(batch_sizes)
+        return RF.Methods.RandomFeatureMethod(sf, regularization = regularization, tullio_threading = thread_opt)
+    else
+        return RF.Methods.RandomFeatureMethod(
+            sf,
+            regularization = regularization,
+            batch_sizes = batch_sizes,
+            tullio_threading = thread_opt,
+        )
+    end
+end
+
+#removes vector-only input arguments
+RFM_from_hyperparameters(
+    srfi::ScalarRandomFeatureInterface,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUS, # just a 1x1 matrix though
+    n_features::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    input_dim::Int,
+    output_dim::Int,
+    multithread_type::MT;
+) where {
+    RNG <: AbstractRNG,
+    ForVM <: Union{Real, AbstractVecOrMat},
+    MorUS <: Union{Matrix, UniformScaling},
+    S <: AbstractString,
+    MT <: MultithreadType,
+} = RFM_from_hyperparameters(srfi, rng, l, regularization, n_features, batch_sizes, input_dim, multithread_type)
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Builds the random feature method from hyperparameters. We use cosine activation functions and a Multivariate Normal distribution (from `Distributions.jl`) with mean M=0, and input covariance U built from cholesky factorization or diagonal components based on the diagonalize_input flag.
+"""
+function build_models!(
+    srfi::ScalarRandomFeatureInterface,
+    input_output_pairs::PairedDataContainer{FT},
+) where {FT <: AbstractFloat}
+    # get inputs and outputs 
+    input_values = get_inputs(input_output_pairs)
+    output_values = get_outputs(input_output_pairs)
+    n_rfms, n_data = size(output_values)
+    noise_sd = 1.0
+
+    input_dim = size(input_values, 1)
+
+    rfms = get_rfms(srfi)
+    fitted_features = get_fitted_features(srfi)
+    n_features = get_n_features(srfi)
+    batch_sizes = get_batch_sizes(srfi)
+    rng = get_rng(srfi)
+    decomp_type = get_feature_decomposition(srfi)
+    optimizer_options = get_optimizer_options(srfi)
+
+    #regularization = I = 1.0 in scalar case
+    regularization = I
+    # Optimize features with EKP for each output dim
+    # [1.] Split data into test/train 80/20 
+    train_fraction = optimizer_options["train_fraction"]
+    n_train = Int(floor(train_fraction * n_data))
+    n_test = n_data - n_train
+    n_features_opt = min(n_features, Int(floor(2 * n_test))) #we take a specified number of features for optimization.
+    idx_shuffle = randperm(rng, n_data)
+
+    @info (
+        "hyperparameter learning for $n_rfms models using $n_train training points, $n_test validation points and $n_features_opt features"
+    )
+    for i in 1:n_rfms
+
+
+        io_pairs_opt = PairedDataContainer(
+            input_values[:, idx_shuffle],
+            reshape(output_values[i, idx_shuffle], 1, size(output_values, 2)),
+        )
+
+        multithread = optimizer_options["multithread"]
+        if multithread == "ensemble"
+            multithread_type = EnsembleThreading()
+        elseif multithread == "tullio"
+            multithread_type = TullioThreading()
+        else
+            throw(
+                ArgumentError(
+                    "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)",
+                ),
+            )
+        end
+        # [2.] Estimate covariance at mean value
+        prior = optimizer_options["prior"]
+        μ_hp = transform_unconstrained_to_constrained(prior, mean(prior))
+
+        cov_sample_multiplier = optimizer_options["cov_sample_multiplier"]
+        n_cov_samples_min = n_test + 2
+        n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 1.0)))
+
+        internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance(
+            srfi,
+            rng,
+            μ_hp,
+            regularization,
+            n_features_opt,
+            n_train,
+            n_test,
+            batch_sizes,
+            io_pairs_opt,
+            n_cov_samples,
+            decomp_type,
+            multithread_type,
+        )
+
+        Γ = internal_Γ
+        Γ[1:n_test, 1:n_test] += approx_σ2
+        Γ[(n_test + 1):end, (n_test + 1):end] += I
+
+        # [3.] set up EKP optimization
+        n_ensemble = optimizer_options["n_ensemble"]
+        n_iteration = optimizer_options["n_iteration"]
+        opt_verbose_flag = optimizer_options["verbose"]
+        initial_params = construct_initial_ensemble(rng, prior, n_ensemble)
+        min_complexity = log(det(regularization))
+        data = vcat(get_outputs(io_pairs_opt)[(n_train + 1):end], 0.0, min_complexity)
+        ekiobj = EKP.EnsembleKalmanProcess(initial_params, data, Γ, Inversion(), rng = rng, verbose = opt_verbose_flag)
+        err = zeros(n_iteration)
+
+        # [4.] optimize with EKP
+        for i in 1:n_iteration
+
+            #get parameters:
+            lvec = transform_unconstrained_to_constrained(prior, get_u_final(ekiobj))
+
+            g_ens, _ = calculate_ensemble_mean_and_coeffnorm(
+                srfi,
+                rng,
+                lvec,
+                regularization,
+                n_features_opt,
+                n_train,
+                n_test,
+                batch_sizes,
+                io_pairs_opt,
+                decomp_type,
+                multithread_type,
+            )
+            inflation = optimizer_options["inflation"]
+            if inflation > 0
+                EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation
+            else
+                EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation
+            end
+            err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2)
+
+        end
+
+        # [5.] extract optimal hyperparameters
+        hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:]
+
+        io_pairs_i = PairedDataContainer(input_values, reshape(output_values[i, :], 1, size(output_values, 2)))
+        # Now, fit new RF model with the optimized hyperparameters
+        rfm_i = RFM_from_hyperparameters(
+            srfi,
+            rng,
+            hp_optimal,
+            regularization,
+            n_features,
+            batch_sizes,
+            input_dim,
+            multithread_type,
+        )
+        fitted_features_i = RF.Methods.fit(rfm_i, io_pairs_i, decomposition_type = decomp_type) #fit features
+
+        push!(rfms, rfm_i)
+        push!(fitted_features, fitted_features_i)
+    end
+
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Empty method, as optimization takes place within the build_models stage
+"""
+function optimize_hyperparameters!(srfi::ScalarRandomFeatureInterface, args...; kwargs...)
+    @info("Random Features already trained. continuing...")
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions.
+"""
+function predict(srfi::ScalarRandomFeatureInterface, new_inputs::MM) where {MM <: AbstractMatrix}
+    M = length(get_rfms(srfi))
+    N_samples = size(new_inputs, 2)
+    # Predicts columns of inputs: input_dim × N_samples
+    μ = zeros(M, N_samples)
+    σ2 = zeros(M, N_samples)
+    optimizer_options = get_optimizer_options(srfi)
+    multithread = optimizer_options["multithread"]
+    if multithread == "ensemble"
+        tullio_threading = false
+    elseif multithread == "tullio"
+        tullio_threading = true
+    end
+
+    for i in 1:M
+        μ[i, :], σ2[i, :] = RF.Methods.predict(
+            get_rfms(srfi)[i],
+            get_fitted_features(srfi)[i],
+            DataContainer(new_inputs),
+            tullio_threading = tullio_threading,
+        )
+    end
+
+    # add the noise contribution from the regularization
+    σ2[:, :] = σ2[:, :] .+ 1.0
+
+    return μ, σ2
+end
diff --git a/src/Utilities.jl b/src/Utilities.jl
index eb354842e..e1a625892 100644
--- a/src/Utilities.jl
+++ b/src/Utilities.jl
@@ -14,7 +14,7 @@ export get_training_points
 export get_obs_sample
 export orig2zscore
 export zscore2orig
-
+export posdef_correct
 """
 $(DocStringExtensions.TYPEDSIGNATURES)
 
@@ -22,13 +22,20 @@ Extract the training points needed to train the Gaussian process regression.
 
 - `ekp` - EnsembleKalmanProcess holding the parameters and the data that were produced
   during the Ensemble Kalman (EK) process.
-- `N_train_iterations` - Number of EK layers/iterations to train on.
+- `train_iterations` - Number (or indices) EK layers/iterations to train on.
 
 """
-function get_training_points(ekp::EnsembleKalmanProcess{FT, IT, P}, N_train_iterations::IT) where {FT, IT, P}
-
-    # Note u[end] does not have an equivalent g
-    iter_range = (get_N_iterations(ekp) - N_train_iterations + 1):get_N_iterations(ekp)
+function get_training_points(
+    ekp::EnsembleKalmanProcess{FT, IT, P},
+    train_iterations::Union{IT, AbstractVector{IT}},
+) where {FT, IT, P}
+
+    if !isa(train_iterations, AbstractVector)
+        # Note u[end] does not have an equivalent g
+        iter_range = (get_N_iterations(ekp) - train_iterations + 1):get_N_iterations(ekp)
+    else
+        iter_range = train_iterations
+    end
 
     u_tp = []
     g_tp = []
@@ -114,4 +121,30 @@ function zscore2orig(Z::AbstractMatrix{FT}, mean::AbstractVector{FT}, std::Abstr
 end
 
 
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Makes square matrix `mat` positive definite, by symmetrizing and bounding the minimum eigenvalue below by `tol`
+"""
+function posdef_correct(mat::AbstractMatrix; tol::Real = 1e8 * eps())
+    if !issymmetric(mat)
+        out = 0.5 * (mat + permutedims(mat, (2, 1))) #symmetrize
+        if isposdef(out)
+            # very often, small numerical errors cause asymmetry, so cheaper to add this branch
+            return out
+        end
+    else
+        out = mat
+    end
+
+    nugget = abs(minimum(eigvals(out)))
+    for i in 1:size(out, 1)
+        out[i, i] += nugget + tol #add to diag
+    end
+    return out
+end
+
+
+
+
 end # module
diff --git a/src/VectorRandomFeature.jl b/src/VectorRandomFeature.jl
new file mode 100644
index 000000000..20eb9f2de
--- /dev/null
+++ b/src/VectorRandomFeature.jl
@@ -0,0 +1,567 @@
+using ProgressBars
+
+export VectorRandomFeatureInterface
+export get_rfms,
+    get_fitted_features,
+    get_batch_sizes,
+    get_n_features,
+    get_input_dim,
+    get_output_dim,
+    get_rng,
+    get_diagonalize_input,
+    get_diagonalize_output,
+    get_optimizer_options
+
+"""
+$(DocStringExtensions.TYPEDEF)
+
+Structure holding the Vector Random Feature models. 
+
+# Fields
+$(DocStringExtensions.TYPEDFIELDS)
+
+"""
+struct VectorRandomFeatureInterface{S <: AbstractString, RNG <: AbstractRNG} <: RandomFeatureInterface
+    "A vector of `RandomFeatureMethod`s, contains the feature structure, batch-sizes and regularization"
+    rfms::Vector{RF.Methods.RandomFeatureMethod}
+    "vector of `Fit`s, containing the matrix decomposition and coefficients of RF when fitted to data"
+    fitted_features::Vector{RF.Methods.Fit}
+    "batch sizes"
+    batch_sizes::Union{Dict{S, Int}, Nothing}
+    "number of features"
+    n_features::Union{Int, Nothing}
+    "input dimension"
+    input_dim::Int
+    "output_dimension"
+    output_dim::Int
+    "rng"
+    rng::RNG
+    "regularization"
+    regularization::Vector{Union{Matrix, UniformScaling, Diagonal}}
+    "Assume inputs are decorrelated for ML"
+    diagonalize_input::Bool
+    "Assume outputs are decorrelated for ML. Note: emulator(..., decorrelate=true) by default"
+    diagonalize_output::Bool
+    "Random Feature decomposition, choose from \"svd\" or \"cholesky\" (default)"
+    feature_decomposition::S
+    "dictionary of options for hyperparameter optimizer"
+    optimizer_options::Dict
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the rfms field
+"""
+get_rfms(vrfi::VectorRandomFeatureInterface) = vrfi.rfms
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the fitted_features field
+"""
+get_fitted_features(vrfi::VectorRandomFeatureInterface) = vrfi.fitted_features
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the batch_sizes field
+"""
+get_batch_sizes(vrfi::VectorRandomFeatureInterface) = vrfi.batch_sizes
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the n_features field
+"""
+get_n_features(vrfi::VectorRandomFeatureInterface) = vrfi.n_features
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the input_dim field
+"""
+get_input_dim(vrfi::VectorRandomFeatureInterface) = vrfi.input_dim
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the output_dim field
+"""
+get_output_dim(vrfi::VectorRandomFeatureInterface) = vrfi.output_dim
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the rng field
+"""
+get_rng(vrfi::VectorRandomFeatureInterface) = vrfi.rng
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the regularization field
+"""
+get_regularization(vrfi::VectorRandomFeatureInterface) = vrfi.regularization
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the diagonalize_input field
+"""
+get_diagonalize_input(vrfi::VectorRandomFeatureInterface) = vrfi.diagonalize_input
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the diagonalize_output field
+"""
+get_diagonalize_output(vrfi::VectorRandomFeatureInterface) = vrfi.diagonalize_output
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the feature_decomposition field
+"""
+get_feature_decomposition(vrfi::VectorRandomFeatureInterface) = vrfi.feature_decomposition
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Gets the optimizer_options field
+"""
+get_optimizer_options(vrfi::VectorRandomFeatureInterface) = vrfi.optimizer_options
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Constructs a `VectorRandomFeatureInterface <: MachineLearningTool` interface for the `RandomFeatures.jl` package for multi-input and multi-output emulators.
+ - `n_features` - the number of random features
+ - `input_dim` - the dimension of the input space
+ - `output_dim` - the dimension of the output space
+ - `diagonalize_input = false` - whether to assume independence in input space
+ - `diagonalize_output::Bool = false` - whether to assume independence in output space (even if set true, this is not the same as a list of scalar-valued models, as hyperparameters will be shared between models in this case)
+ - `batch_sizes = nothing` - Dictionary of batch sizes passed `RandomFeatures.jl` object (see definition there)
+ - `rng = Random.GLOBAL_RNG` - random number generator 
+ - `feature_decomposition = "cholesky"` - choice of how to store decompositions of random features, `cholesky` or `svd` available
+ - `optimizer_options = nothing` - Dict of options to pass into EKI optimization of hyperparameters (defaults created in `VectorRandomFeatureInterface` constructor):
+      - "prior": the prior for the hyperparameter optimization 
+      - "n_ensemble": number of ensemble members
+      - "n_iteration": number of eki iterations
+      - "cov_sample_multiplier": increase for more samples to estimate covariance matrix in optimization (default 10.0, minimum 1.0) 
+      - "tikhonov": tikhonov regularization parameter if > 0
+      - "inflation": additive inflation ∈ [0,1] with 0 being no inflation
+      - "train_fraction": e.g. 0.8 (default)  means 80:20 train - test split
+      - "multithread": how to multithread. "ensemble" (default) threads across ensemble members "tullio" threads random feature matrix algebra
+      - "verbose" => false, verbose optimizer statements
+
+"""
+function VectorRandomFeatureInterface(
+    n_features::Int,
+    input_dim::Int,
+    output_dim::Int;
+    diagonalize_input::Bool = false,
+    diagonalize_output::Bool = false,
+    batch_sizes::Union{Dict{S, Int}, Nothing} = nothing,
+    rng::RNG = Random.GLOBAL_RNG,
+    feature_decomposition::S = "cholesky",
+    optimizer_options::Union{Dict{S}, Nothing} = nothing,
+) where {S <: AbstractString, RNG <: AbstractRNG}
+
+    # Initialize vector for RF model
+    rfms = Vector{RF.Methods.RandomFeatureMethod}(undef, 0)
+    fitted_features = Vector{RF.Methods.Fit}(undef, 0)
+    regularization = Vector{Union{Matrix, UniformScaling, Nothing}}(undef, 0)
+
+    #Optimization Defaults    
+    n_hp = calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output)
+    prior = build_default_prior(input_dim, output_dim, diagonalize_input, diagonalize_output)
+
+    optimizer_opts = Dict(
+        "prior" => prior, #the hyperparameter_prior
+        "n_ensemble" => max(ndims(prior) + 1, 10), #number of ensemble
+        "n_iteration" => 5, # number of eki iterations
+        "cov_sample_multiplier" => 10.0, # multiplier for samples to estimate covariance in optimization scheme 
+        "tikhonov" => 0, # tikhonov regularization parameter if >0
+        "inflation" => 1e-4, # additive inflation ∈ [0,1] with 0 being no inflation
+        "train_fraction" => 0.8, # 80:20 train - test split
+        "multithread" => "ensemble", # instead of "tullio"
+        "verbose" => false, # verbose optimizer statements
+    )
+
+    if !isnothing(optimizer_options)
+        for key in keys(optimizer_options)
+            optimizer_opts[key] = optimizer_options[key]
+        end
+    end
+
+    opt_tmp = Dict()
+    for key in keys(optimizer_opts)
+        if key != "prior"
+            opt_tmp[key] = optimizer_opts[key]
+        end
+    end
+    @info("hyperparameter optimization with EKI configured with $opt_tmp")
+
+    return VectorRandomFeatureInterface{S, RNG}(
+        rfms,
+        fitted_features,
+        batch_sizes,
+        n_features,
+        input_dim,
+        output_dim,
+        rng,
+        regularization,
+        diagonalize_input,
+        diagonalize_output,
+        feature_decomposition,
+        optimizer_opts,
+    )
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Builds the random feature method from hyperparameters. We use cosine activation functions and a Matrixvariate Normal distribution with mean M=0, and input(output) covariance U(V) built from cholesky factorization or diagonal components based on the diagonalize_input(diagonalize_output) flag.
+"""
+function RFM_from_hyperparameters(
+    vrfi::VectorRandomFeatureInterface,
+    rng::RNG,
+    l::ForVM,
+    regularization::MorUSorD,
+    n_features::Int,
+    batch_sizes::Union{Dict{S, Int}, Nothing},
+    input_dim::Int,
+    output_dim::Int,
+    multithread_type::MT;
+) where {
+    S <: AbstractString,
+    RNG <: AbstractRNG,
+    ForVM <: Union{AbstractFloat, AbstractVecOrMat},
+    MorUSorD <: Union{Matrix, UniformScaling, Diagonal},
+    MT <: MultithreadType,
+}
+    diagonalize_input = get_diagonalize_input(vrfi)
+    diagonalize_output = get_diagonalize_output(vrfi)
+
+    M = zeros(input_dim, output_dim) # n x p mean
+    U, V = hyperparameters_from_flat(l, input_dim, output_dim, diagonalize_input, diagonalize_output)
+
+    if !isposdef(U)
+        println("U not posdef - correcting")
+        U = posdef_correct(U)
+    end
+    if !isposdef(V)
+        println("V not posdef - correcting")
+        V = posdef_correct(V)
+    end
+
+    dist = MatrixNormal(M, U, V)
+    pd = ParameterDistribution(
+        Dict(
+            "distribution" => Parameterized(dist),
+            "constraint" => repeat([no_constraint()], input_dim * output_dim),
+            "name" => "xi",
+        ),
+    )
+    feature_sampler = RF.Samplers.FeatureSampler(pd, output_dim, rng = rng)
+    vff = RF.Features.VectorFourierFeature(n_features, output_dim, feature_sampler)
+
+    thread_opt = isa(multithread_type, TullioThreading) # if we want to multithread with tullio
+    if isnothing(batch_sizes)
+        return RF.Methods.RandomFeatureMethod(vff, regularization = regularization, tullio_threading = thread_opt)
+    else
+        return RF.Methods.RandomFeatureMethod(
+            vff,
+            regularization = regularization,
+            batch_sizes = batch_sizes,
+            tullio_threading = thread_opt,
+        )
+    end
+end
+
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Build Vector Random Feature model for the input-output pairs subject to regularization, and optimizes the hyperparameters with EKP. 
+"""
+function build_models!(
+    vrfi::VectorRandomFeatureInterface,
+    input_output_pairs::PairedDataContainer{FT};
+    regularization_matrix::Union{M, Nothing} = nothing,
+) where {FT <: AbstractFloat, M <: AbstractMatrix}
+
+    # get inputs and outputs 
+    input_values = get_inputs(input_output_pairs)
+    output_values = get_outputs(input_output_pairs)
+    n_rfms, n_data = size(output_values)
+
+    input_dim = size(input_values, 1)
+    output_dim = size(output_values, 1)
+    # there are less hyperparameters when we diagonalize
+    diagonalize_input = get_diagonalize_input(vrfi)
+    diagonalize_output = get_diagonalize_output(vrfi)
+    n_hp = calculate_n_hyperparameters(input_dim, output_dim, diagonalize_input, diagonalize_output)
+
+    rfms = get_rfms(vrfi)
+    fitted_features = get_fitted_features(vrfi)
+    n_features = get_n_features(vrfi)
+    batch_sizes = get_batch_sizes(vrfi)
+    decomp_type = get_feature_decomposition(vrfi)
+    optimizer_options = get_optimizer_options(vrfi)
+    multithread = optimizer_options["multithread"]
+    if multithread == "ensemble"
+        multithread_type = EnsembleThreading()
+    elseif multithread == "tullio"
+        multithread_type = TullioThreading()
+    else
+        throw(
+            ArgumentError(
+                "Unknown optimizer option for multithreading, please choose from \"tullio\" (allows Tullio.jl to control threading in RandomFeatures.jl, or \"loops\" (threading optimization loops)",
+            ),
+        )
+    end
+    prior = optimizer_options["prior"]
+    rng = get_rng(vrfi)
+
+    if ndims(prior) > n_hp
+        # comes from having a truncated output_dimension
+        # TODO not really a truncation here, resetting to default
+        @info "truncating hyperparameter space to default in first $n_hp dimensions, due to truncation of output space"
+        prior = constrained_gaussian("cholesky_factors", 1.0, 1.0, 0.0, Inf, repeats = n_hp)
+
+    end
+
+    # Optimize feature cholesky factors with EKP
+    # [1.] Split data into test/train (e.g. 80/20)
+    train_fraction = optimizer_options["train_fraction"]
+    n_train = Int(floor(train_fraction * n_data)) # 20% split
+    n_test = n_data - n_train
+    if diagonalize_output
+        n_features_opt = min(n_features, Int(floor(4 * n_train))) #we take a specified number of features for optimization. MAGIC NUMBER
+    else
+        # note the n_features_opt should NOT exceed output_dim * n_train or the regularization is replaced with a diagonal approx.
+        n_features_opt = max(min(n_features, Int(floor(4 * sqrt(n_train * output_dim)))), Int(floor(1.9 * n_train)))#we take a specified number of features for optimization. MAGIC NUMBER
+    end
+    @info (
+        "hyperparameter learning using $n_train training points, $n_test validation points and $n_features_opt features"
+    )
+
+
+    # regularization_matrix = nothing when we use scaled SVD to decorrelate the space,
+    # in this setting, noise = I
+    if regularization_matrix === nothing
+        regularization = I
+    else
+        reg_mat = regularization_matrix
+        if !isposdef(regularization_matrix)
+            reg_mat = posdef_correct(regularization_matrix)
+            println("RF regularization matrix is not positive definite, correcting")
+
+        else
+            # think of the regularization_matrix as the observational noise covariance, or a related quantity
+            regularization = exp((1 / output_dim) * sum(log.(eigvals(reg_mat)))) * I #i.e. det(M)^{1/output_dim} I
+
+            #regularization = reg_mat #using the full p.d. tikhonov exp. EXPENSIVE, and challenge get complexity terms
+        end
+    end
+
+
+    idx_shuffle = randperm(rng, n_data)
+
+    io_pairs_opt = PairedDataContainer(
+        input_values[:, idx_shuffle],
+        reshape(output_values[:, idx_shuffle], :, size(output_values, 2)),
+    )
+
+    # [2.] Estimate covariance at mean value
+    μ_hp = transform_unconstrained_to_constrained(prior, mean(prior))
+    cov_sample_multiplier = optimizer_options["cov_sample_multiplier"]
+
+    if diagonalize_output
+        n_cov_samples_min = n_test + 2 # diagonal case
+    else
+        n_cov_samples_min = (n_test * output_dim + 2) # min required for
+    end
+    n_cov_samples = Int(floor(n_cov_samples_min * max(cov_sample_multiplier, 1.0)))
+
+    internal_Γ, approx_σ2 = estimate_mean_and_coeffnorm_covariance(
+        vrfi,
+        rng,
+        μ_hp, # take mean values
+        regularization,
+        n_features_opt,
+        n_train,
+        n_test,
+        batch_sizes,
+        io_pairs_opt,
+        n_cov_samples,
+        decomp_type,
+        multithread_type,
+    )
+
+    tikhonov_opt_val = optimizer_options["tikhonov"]
+    if tikhonov_opt_val == 0
+        # Build the covariance
+        Γ = internal_Γ
+        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2
+        Γ[(n_test * output_dim + 1):end, (n_test * output_dim + 1):end] += I
+
+        #in diag case we have data logdet = λ^m, in non diag case we have logdet(Λ^) to match the different reg matrices.
+        min_complexity =
+            isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) :
+            n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L)))
+
+        data = vcat(reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1), 0.0, min_complexity) #flatten data
+
+    elseif tikhonov_opt_val > 0
+        # augment the state to add tikhonov
+        outsize = size(internal_Γ, 1)
+        Γ = zeros(outsize + n_hp, outsize + n_hp)
+        Γ[1:outsize, 1:outsize] = internal_Γ
+        Γ[1:(n_test * output_dim), 1:(n_test * output_dim)] += approx_σ2
+        Γ[(n_test * output_dim + 1):outsize, (n_test * output_dim + 1):outsize] += I
+
+        Γ[(outsize + 1):end, (outsize + 1):end] = tikhonov_opt_val .* cov(prior)
+
+        #TODO the min complexity here is not the correct object in the non-diagonal case
+        min_complexity =
+            isa(regularization, UniformScaling) ? n_features_opt * log(regularization.λ) :
+            n_features_opt / output_dim * 2 * sum(log.(diag(cholesky(regularization).L)))
+
+        data = vcat(
+            reshape(get_outputs(io_pairs_opt)[:, (n_train + 1):end], :, 1),
+            0.0,
+            min_complexity,
+            zeros(size(Γ, 1) - outsize, 1),
+        ) #flatten data with additional zeros
+    else
+        throw(
+            ArgumentError(
+                "Tikhonov parameter must be non-negative, instead received tikhonov_opt_val=$tikhonov_opt_val",
+            ),
+        )
+    end
+
+    # [3.] set up EKP optimization
+    n_ensemble = optimizer_options["n_ensemble"] # minimal ensemble size n_hp,
+    n_iteration = optimizer_options["n_iteration"]
+    opt_verbose_flag = optimizer_options["verbose"]
+
+    if !isposdef(Γ)
+        Γ = posdef_correct(Γ)
+    end
+
+    initial_params = construct_initial_ensemble(rng, prior, n_ensemble)
+
+    ekiobj = EKP.EnsembleKalmanProcess(initial_params, data[:], Γ, Inversion(), rng = rng, verbose = opt_verbose_flag)
+    err = zeros(n_iteration)
+
+    # [4.] optimize with EKP
+    for i in 1:n_iteration
+        #get parameters:
+        lvec = get_ϕ_final(prior, ekiobj)
+
+        g_ens, _ = calculate_ensemble_mean_and_coeffnorm(
+            vrfi,
+            rng,
+            lvec,
+            regularization,
+            n_features_opt,
+            n_train,
+            n_test,
+            batch_sizes,
+            io_pairs_opt,
+            decomp_type,
+            multithread_type,
+        )
+        if tikhonov_opt_val > 0
+            # augment with the computational parameters (u not ϕ)
+            uvecormat = get_u_final(ekiobj)
+            if isa(uvecormat, AbstractVector)
+                umat = reshape(uvecormat, 1, :)
+            else
+                umat = uvecormat
+            end
+
+            g_ens = vcat(g_ens, umat)
+        end
+        inflation = optimizer_options["inflation"]
+        if inflation > 0
+            EKP.update_ensemble!(ekiobj, g_ens, additive_inflation = true, use_prior_cov = true, s = inflation) # small regularizing inflation
+        else
+            EKP.update_ensemble!(ekiobj, g_ens) # small regularizing inflation
+        end
+        err[i] = get_error(ekiobj)[end] #mean((params_true - mean(params_i,dims=2)).^2)
+
+    end
+
+    # [5.] extract optimal hyperparameters
+    hp_optimal = get_ϕ_mean_final(prior, ekiobj)[:]
+    # Now, fit new RF model with the optimized hyperparameters
+
+    rfm_tmp = RFM_from_hyperparameters(
+        vrfi,
+        rng,
+        hp_optimal,
+        regularization,
+        n_features,
+        batch_sizes,
+        input_dim,
+        output_dim,
+        multithread_type,
+    )
+    fitted_features_tmp = fit(rfm_tmp, input_output_pairs, decomposition_type = decomp_type)
+
+
+    push!(rfms, rfm_tmp)
+    push!(fitted_features, fitted_features_tmp)
+    push!(get_regularization(vrfi), regularization)
+
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Empty method, as optimization takes place within the build_models stage
+"""
+function optimize_hyperparameters!(vrfi::VectorRandomFeatureInterface, args...; kwargs...)
+    @info("Random Features already trained. continuing...")
+end
+
+"""
+$(DocStringExtensions.TYPEDSIGNATURES)
+
+Prediction of data observation (not latent function) at new inputs (passed in as columns in a matrix). That is, we add the observational noise into predictions.
+"""
+function predict(vrfi::VectorRandomFeatureInterface, new_inputs::M) where {M <: AbstractMatrix}
+    input_dim = get_input_dim(vrfi)
+    output_dim = get_output_dim(vrfi)
+    rfm = get_rfms(vrfi)[1]
+    ff = get_fitted_features(vrfi)[1]
+    optimizer_options = get_optimizer_options(vrfi)
+    multithread = optimizer_options["multithread"]
+    if multithread == "ensemble"
+        tullio_threading = false
+    elseif multithread == "tullio"
+        tullio_threading = true
+    end
+
+    N_samples = size(new_inputs, 2)
+    # Predicts columns of inputs: input_dim × N_samples
+    μ, σ2 = RF.Methods.predict(rfm, ff, DataContainer(new_inputs), tullio_threading = tullio_threading)
+    # sizes (output_dim x n_test), (output_dim x output_dim x n_test) 
+    # add the noise contribution from the regularization
+    # note this is because we are predicting the data here, not the latent function.
+    lambda = get_regularization(vrfi)[1]
+    for i in 1:N_samples
+        σ2[:, :, i] = 0.5 * (σ2[:, :, i] + permutedims(σ2[:, :, i], (2, 1))) + lambda
+
+        if !isposdef(σ2[:, :, i])
+            σ2[:, :, i] = posdef_correct(σ2[:, :, i])
+        end
+    end
+
+    return μ, σ2
+end
diff --git a/test/GaussianProcess/runtests.jl b/test/GaussianProcess/runtests.jl
index 53624cc56..5935f834a 100644
--- a/test/GaussianProcess/runtests.jl
+++ b/test/GaussianProcess/runtests.jl
@@ -214,7 +214,7 @@ using CalibrateEmulateSample.DataContainers
     @test μ4_noise_from_Σ[:, 3] ≈ [0.0, 0.0] atol = tol_mu
     @test μ4_noise_from_Σ[:, 4] ≈ [0.0, -2.0] atol = tol_mu
 
-    # check match between the means and variances (should be similar at least
+    # check match between the variances (should be similar at least)
     @test all(isapprox.(σ4²_noise_from_Σ, σ4²_noise_learnt, rtol = 2 * tol_mu))
 
 
diff --git a/test/RandomFeature/runtests.jl b/test/RandomFeature/runtests.jl
new file mode 100644
index 000000000..e1792514f
--- /dev/null
+++ b/test/RandomFeature/runtests.jl
@@ -0,0 +1,306 @@
+using Test
+using Random
+using RandomFeatures
+using LinearAlgebra, Distributions
+
+using CalibrateEmulateSample.Emulators
+using CalibrateEmulateSample.DataContainers
+using CalibrateEmulateSample.ParameterDistributions
+using RandomFeatures
+
+seed = 10101010
+rng = Random.MersenneTwister(seed)
+
+@testset "RandomFeatures" begin
+
+    @testset "ScalarRandomFeatureInterface" begin
+
+        input_dim = 2
+        n_features = 200
+        batch_sizes = Dict("train" => 100, "test" => 100, "feature" => 100)
+        diagonalize_input = false
+        #build interface
+        n_input_hp = Int(input_dim * (input_dim + 1) / 2) + 2 # see  calculate_n_hyperparameters for details
+        n_output_hp = 1
+
+        # prior built from:
+        # input:  γ₁*(Cholesky_factor + γ₂ * I )
+        # output: γ₃
+        # where γᵢ positive
+        prior = combine_distributions([
+            constrained_gaussian("input_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_input_hp - 2),
+            constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2),
+            constrained_gaussian("output_prior", 1.0, 1.0, 0.0, Inf),
+        ])
+
+        optimizer_options = Dict(
+            "prior" => prior,
+            "n_ensemble" => max(ndims(prior) + 1, 10),
+            "n_iteration" => 5,
+            "cov_sample_multiplier" => 10.0,
+            "inflation" => 1e-4,
+            "train_fraction" => 0.8,
+            "multithread" => "ensemble",
+            "verbose" => false,
+        )
+
+        srfi = ScalarRandomFeatureInterface(
+            n_features,
+            input_dim,
+            batch_sizes = batch_sizes,
+            rng = rng,
+            diagonalize_input = diagonalize_input,
+            optimizer_options = optimizer_options,
+        )
+
+        @test isa(get_rfms(srfi), Vector{RandomFeatures.Methods.RandomFeatureMethod})
+        @test isa(get_fitted_features(srfi), Vector{RandomFeatures.Methods.Fit})
+        @test get_batch_sizes(srfi) == batch_sizes
+        @test get_n_features(srfi) == n_features
+        @test get_input_dim(srfi) == input_dim
+        @test get_rng(srfi) == rng
+        @test get_diagonalize_input(srfi) == diagonalize_input
+        @test get_optimizer_options(srfi) == optimizer_options
+
+        # check defaults 
+        srfi2 = ScalarRandomFeatureInterface(n_features, input_dim)
+        @test get_batch_sizes(srfi2) === nothing
+        @test get_rng(srfi2) == Random.GLOBAL_RNG
+        @test get_diagonalize_input(srfi2) == false
+        @test get_optimizer_options(srfi2) == optimizer_options # we just set the defaults above
+
+    end
+
+    @testset "VectorRandomFeatureInterface" begin
+
+        rng = Random.MersenneTwister(seed)
+
+        input_dim = 2
+        output_dim = 3
+        n_features = 200
+        batch_sizes = Dict("train" => 100, "test" => 100, "feature" => 100)
+        diagonalize_input = true
+        diagonalize_output = false
+
+        n_input_hp = Int(input_dim) + 2
+        n_output_hp = Int(output_dim * (output_dim + 1) / 2) + 2
+
+        # prior built from:
+        # input:  γ₁*(Cholesky_factor_in + γ₂ * I)
+        # output: γ₃*(Cholesky_factor_out + γ₄ * I)
+        # where γᵢ positive
+        prior = combine_distributions([
+            constrained_gaussian("input_prior_diagonal", 1.0, 1.0, 0.0, Inf, repeats = n_input_hp - 2),
+            constrained_gaussian("input_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2),
+            constrained_gaussian("output_prior_cholesky", 0.0, 1.0, -Inf, Inf, repeats = n_output_hp - 2),
+            constrained_gaussian("output_prior_scaling", 1.0, 1.0, 0.0, Inf, repeats = 2),
+        ])
+
+        optimizer_options = Dict(
+            "prior" => prior,
+            "n_ensemble" => max(ndims(prior) + 1, 10),
+            "n_iteration" => 5,
+            "tikhonov" => 0,
+            "cov_sample_multiplier" => 10.0,
+            "inflation" => 1e-4,
+            "train_fraction" => 0.8,
+            "multithread" => "ensemble",
+            "verbose" => false,
+        )
+
+        #build interfaces
+        vrfi = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            rng = rng,
+            batch_sizes = batch_sizes,
+            diagonalize_input = diagonalize_input,
+            diagonalize_output = diagonalize_output,
+            optimizer_options = optimizer_options,
+        )
+
+        @test isa(get_rfms(vrfi), Vector{RandomFeatures.Methods.RandomFeatureMethod})
+        @test isa(get_fitted_features(vrfi), Vector{RandomFeatures.Methods.Fit})
+        @test get_batch_sizes(vrfi) == batch_sizes
+        @test get_n_features(vrfi) == n_features
+        @test get_input_dim(vrfi) == input_dim
+        @test get_output_dim(vrfi) == output_dim
+        @test get_rng(vrfi) == rng
+        @test get_diagonalize_input(vrfi) == diagonalize_input
+        @test get_diagonalize_output(vrfi) == diagonalize_output
+        @test get_optimizer_options(vrfi) == optimizer_options
+
+
+        #check defaults
+        vrfi2 = VectorRandomFeatureInterface(n_features, input_dim, output_dim, diagonalize_input = diagonalize_input)
+
+        @test get_batch_sizes(vrfi2) === nothing
+        @test get_rng(vrfi2) == Random.GLOBAL_RNG
+        @test get_diagonalize_input(vrfi2) == true
+        @test get_diagonalize_output(vrfi2) == false
+        @test get_optimizer_options(vrfi2) == optimizer_options # we just set the defaults above
+
+    end
+
+    @testset "RF within Emulator: 1D -> 1D" begin
+
+        # Training data
+        input_dim = 1
+        output_dim = 1
+        n = 40                                       # number of training points
+        x = reshape(2.0 * π * rand(n), 1, n)         # unif(0,2π) predictors/features: 1 x n
+        obs_noise_cov = 0.05^2 * I
+        y = reshape(sin.(x) + 0.05 * randn(n)', 1, n) # predictands/targets: 1 x n
+        iopairs = PairedDataContainer(x, y, data_are_columns = true)
+
+        ntest = 40
+        new_inputs = reshape(2.0 * π * rand(ntest), 1, ntest)
+        new_outputs = sin.(new_inputs)
+
+        # RF parameters
+        n_features = 100
+        diagonalize_input = true
+        # Scalar RF options to mimic squared-exp ARD kernel
+        srfi = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = diagonalize_input, rng = rng)
+
+        # Vector RF options to mimic squared-exp ARD kernel (in 1D)
+        vrfi = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            diagonalize_input = diagonalize_input,
+            rng = rng,
+        )
+
+        # build emulators
+        em_srfi = Emulator(srfi, iopairs, obs_noise_cov = obs_noise_cov)
+        em_vrfi = Emulator(vrfi, iopairs, obs_noise_cov = obs_noise_cov)
+
+        # just see if it prints something
+        @test_logs (:info,) Emulators.optimize_hyperparameters!(em_srfi)
+        @test_logs (:info,) Emulators.optimize_hyperparameters!(em_vrfi)
+
+        # predict and test at the new inputs
+        tol_μ = 0.1 * ntest
+        μs, σs² = Emulators.predict(em_srfi, new_inputs, transform_to_real = true)
+        @test size(μs) == (1, ntest)
+        @test size(σs²) == (1, ntest)
+        @test isapprox.(norm(μs - new_outputs), 0, atol = tol_μ)
+        @test all(isapprox.(vec(σs²), 0.05^2 * ones(ntest), atol = 1e-2))
+
+        μv, σv² = Emulators.predict(em_vrfi, new_inputs, transform_to_real = true)
+        @test size(μv) == (1, ntest)
+        @test size(σv²) == (1, ntest)
+        @test isapprox.(norm(μv - new_outputs), 0, atol = tol_μ)
+        @test all(isapprox.(vec(σv²), 0.05^2 * ones(ntest), atol = 1e-2))
+
+    end
+    @testset "RF within Emulator: 2D -> 2D" begin
+        # Generate training data
+        n = 100 # number of training points
+
+        input_dim = 2   # input dim
+        output_dim = 2   # output dim
+        X = 2.0 * π * rand(input_dim, n) # [0,2π]x[0,2π]
+
+        # G(x1, x2)
+        g1x = sin.(X[1, :]) .+ cos.(X[2, :])
+        g2x = sin.(X[1, :]) .- cos.(X[2, :])
+        gx = zeros(2, n)
+        gx[1, :] = g1x
+        gx[2, :] = g2x
+
+        # Add noise η
+        μ = zeros(output_dim)
+        Σ = 0.05 * [[0.5, 0.2] [0.2, 0.5]] # d x d
+        noise_samples = rand(MvNormal(μ, Σ), n)
+
+        # y = G(x) + η
+        Y = gx .+ noise_samples
+
+        iopairs = PairedDataContainer(X, Y, data_are_columns = true)
+
+
+        # RF parameters
+        n_features = 150
+
+        # Test a few options for RF
+        # 1) scalar + diag in
+        # 2) scalar  
+        # 3) vector + diag out
+        # 4) vector 
+
+        srfi_diagin = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = true, rng = rng)
+        srfi = ScalarRandomFeatureInterface(n_features, input_dim, diagonalize_input = false, rng = rng)
+
+        vrfi_diagout = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            diagonalize_input = false,
+            diagonalize_output = true,
+            optimizer_options = Dict("train_fraction" => 0.7),
+            rng = rng,
+        )
+
+        vrfi = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            diagonalize_input = false,
+            diagonalize_output = false,
+            optimizer_options = Dict("train_fraction" => 0.7),
+            rng = rng,
+        )
+
+        # build emulators
+        # svd: scalar, scalar + diag in, vector + diag out, vector
+        # no-svd: vector
+        em_srfi_svd_diagin = Emulator(srfi_diagin, iopairs, obs_noise_cov = Σ)
+        em_srfi_svd = Emulator(srfi, iopairs, obs_noise_cov = Σ)
+
+        em_vrfi_svd_diagout = Emulator(vrfi_diagout, iopairs, obs_noise_cov = Σ)
+        em_vrfi_svd = Emulator(vrfi, iopairs, obs_noise_cov = Σ)
+
+        em_vrfi = Emulator(vrfi, iopairs, obs_noise_cov = Σ, decorrelate = false)
+
+        #TODO truncated SVD option for vector (involves resizing prior)
+
+        ntest = 100
+        new_inputs = 2.0 * π * rand(input_dim, ntest) # [0,2π]x[0,2π]
+
+        outx = zeros(2, ntest)
+        outx[1, :] = sin.(new_inputs[1, :]) .+ cos.(new_inputs[2, :])
+        outx[2, :] = sin.(new_inputs[1, :]) .- cos.(new_inputs[2, :])
+
+        μ_ssd, σ²_ssd = Emulators.predict(em_srfi_svd_diagin, new_inputs, transform_to_real = true)
+        μ_ss, σ²_ss = Emulators.predict(em_srfi_svd, new_inputs, transform_to_real = true)
+        μ_vsd, σ²_vsd = Emulators.predict(em_vrfi_svd_diagout, new_inputs, transform_to_real = true)
+        μ_vs, σ²_vs = Emulators.predict(em_vrfi_svd, new_inputs, transform_to_real = true)
+        μ_v, σ²_v = Emulators.predict(em_vrfi, new_inputs)
+
+        tol_μ = 0.1 * ntest * output_dim
+        @test isapprox.(norm(μ_ssd - outx), 0, atol = tol_μ)
+        @test isapprox.(norm(μ_ss - outx), 0, atol = tol_μ)
+        @test isapprox.(norm(μ_vsd - outx), 0, atol = tol_μ)
+        @test isapprox.(norm(μ_vs - outx), 0, atol = tol_μ)
+        @test isapprox.(norm(μ_v - outx), 0, atol = 4 * tol_μ) # a more approximate option so likely less good approx
+
+        # An example with the other threading option
+        vrfi_tul = VectorRandomFeatureInterface(
+            n_features,
+            input_dim,
+            output_dim,
+            diagonalize_input = false,
+            diagonalize_output = false,
+            optimizer_options = Dict("train_fraction" => 0.7, "multithread" => "tullio"),
+            rng = rng,
+        )
+        em_vrfi_svd_tul = Emulator(vrfi_tul, iopairs, obs_noise_cov = Σ)
+        μ_vs_tul, σ²_vs_tul = Emulators.predict(em_vrfi_svd_tul, new_inputs, transform_to_real = true)
+        @test isapprox.(norm(μ_vs_tul - outx), 0, atol = tol_μ)
+
+    end
+
+end
diff --git a/test/Utilities/runtests.jl b/test/Utilities/runtests.jl
index bec9283ef..54e1ad7c9 100644
--- a/test/Utilities/runtests.jl
+++ b/test/Utilities/runtests.jl
@@ -60,4 +60,17 @@ using CalibrateEmulateSample.DataContainers
     @test get_inputs(training_points) ≈ initial_ensemble
     @test get_outputs(training_points) ≈ g_ens
 
+    #positive definiteness
+    mat = reshape(collect(-3:(-3 + 10^2 - 1)), 10, 10)
+    tol = 1e12 * eps()
+    @test !isposdef(mat)
+
+    pdmat = posdef_correct(mat)
+    @test isposdef(pdmat)
+    @test minimum(eigvals(pdmat)) >= (1 - 1e-4) * 1e8 * eps() #eigvals is approximate so need a bit of give here  
+
+    pdmat2 = posdef_correct(mat, tol = tol)
+    @test isposdef(pdmat2)
+    @test minimum(eigvals(pdmat2)) >= (1 - 1e-4) * tol
+
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index c48b2aa9d..eeda04c7c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -24,7 +24,7 @@ end
         end
     end
 
-    for submodule in ["Emulator", "GaussianProcess", "MarkovChainMonteCarlo", "Utilities"]
+    for submodule in ["Emulator", "GaussianProcess", "RandomFeature", "MarkovChainMonteCarlo", "Utilities"]
         if all_tests || has_submodule(submodule) || "CalibrateEmulateSample" in ARGS
             include_test(submodule)
         end