JuliaTrustworthyAI · pat-alt · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/Project.toml b/Project.toml
@@ -20,6 +20,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+TaijaBase = "10284c91-9f28-4c9a-abbf-ee43576dfff6"
 
 [compat]
 Aqua = "0.8"
@@ -39,6 +40,7 @@ ProgressMeter = "1"
 Random = "1.7, 1.8, 1.9, 1.10"
 StatsBase = "0.33, 0.34.0"
 Tables = "1"
+TaijaBase = "1"
 Test = "1.7, 1.8, 1.9, 1.10"
 julia = "1.7, 1.8, 1.9, 1.10"
 

diff --git a/src/ConformalPrediction.jl b/src/ConformalPrediction.jl
@@ -1,5 +1,7 @@
 module ConformalPrediction
 
+using TaijaBase
+
 # Conformal Models:
 include("conformal_models/conformal_models.jl")
 export ConformalModel

diff --git a/src/conformal_models/conformal_models.jl b/src/conformal_models/conformal_models.jl
@@ -50,26 +50,19 @@ function conformal_model(
     return conf_model
 end
 
+# Inductive Models:
+include("inductive/inductive_models.jl")
+
 # Regression Models:
-include("inductive_regression.jl")
 include("transductive_regression.jl")
 
 # Classification Models
-include("inductive_classification.jl")
 include("transductive_classification.jl")
 
 # Training:
 include("ConformalTraining/ConformalTraining.jl")
 using .ConformalTraining
 
-# Type unions:
-const InductiveModel = Union{
-    SimpleInductiveRegressor,
-    SimpleInductiveClassifier,
-    AdaptiveInductiveClassifier,
-    ConformalQuantileRegressor,
-}
-
 const TransductiveModel = Union{
     NaiveRegressor,
     JackknifeRegressor,

diff --git a/...formal_models/inductive_classification.jl → ...formal_models/inductive/classification.jl b/...formal_models/inductive_classification.jl → ...formal_models/inductive/classification.jl
@@ -1,50 +1,36 @@
-"""
-    score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)
-
-Generic score method for the [`ConformalProbabilisticSet`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
-"""
-function score(conf_model::ConformalProbabilisticSet, fitresult, X, y=nothing)
-    return score(conf_model, conf_model.model, fitresult, X, y)
-end
-
-"""
-    split_data(conf_model::ConformalProbabilisticSet, indices::Base.OneTo{Int})
-
-Splits the data into a proper training and calibration set.
-"""
-function split_data(conf_model::ConformalProbabilisticSet, X, y)
-    train, calibration = partition(eachindex(y), conf_model.train_ratio)
-    Xtrain = selectrows(X, train)
-    ytrain = y[train]
-    Xcal = selectrows(X, calibration)
-    ycal = y[calibration]
-
-    return Xtrain, ytrain, Xcal, ycal
-end
-
 # Simple
 "The `SimpleInductiveClassifier` is the simplest approach to Inductive Conformal Classification. Contrary to the [`NaiveClassifier`](@ref) it computes nonconformity scores using a designated calibration dataset."
 mutable struct SimpleInductiveClassifier{Model<:Supervised} <: ConformalProbabilisticSet
     model::Model
     coverage::AbstractFloat
     scores::Union{Nothing,Dict{Any,Any}}
     heuristic::Function
+    parallelizer::Union{Nothing,AbstractParallelizer}
     train_ratio::AbstractFloat
 end
 
 function SimpleInductiveClassifier(
     model::Supervised;
     coverage::AbstractFloat=0.95,
     heuristic::Function=minus_softmax,
+    parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
     train_ratio::AbstractFloat=0.5,
 )
-    return SimpleInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
+    return SimpleInductiveClassifier(
+        model, coverage, nothing, heuristic, parallelizer, train_ratio
+    )
 end
 
-"""
+@doc raw"""
     score(conf_model::SimpleInductiveClassifier, ::Type{<:Supervised}, fitresult, X, y::Union{Nothing,AbstractArray}=nothing)
 
-Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model.
+Score method for the [`SimpleInductiveClassifier`](@ref) dispatched for any `<:Supervised` model. For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:
+
+``
+S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
+``
+
+A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
 """
 function score(
     conf_model::SimpleInductiveClassifier, atomic::Supervised, fitresult, X, y=nothing
@@ -61,34 +47,6 @@ function score(
     end
 end
 
-@doc raw"""
-    MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)
-
-For the [`SimpleInductiveClassifier`](@ref) nonconformity scores are computed as follows:
-
-``
-S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\text{calibration}}
-``
-
-A typical choice for the heuristic function is ``h(\hat\mu(X_i), Y_i)=1-\hat\mu(X_i)_{Y_i}`` where ``\hat\mu(X_i)_{Y_i}`` denotes the softmax output of the true class and ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``. The simple approach only takes the softmax probability of the true label into account.
-"""
-function MMI.fit(conf_model::SimpleInductiveClassifier, verbosity, X, y)
-
-    # Data Splitting:
-    Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)
-
-    # Training:
-    fitresult, cache, report = MMI.fit(
-        conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
-    )
-
-    # Nonconformity Scores:
-    cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
-    conf_model.scores = Dict(:calibration => cal_scores, :all => scores)
-
-    return (fitresult, cache, report)
-end
-
 @doc raw"""
     MMI.predict(conf_model::SimpleInductiveClassifier, fitresult, Xnew)
 
@@ -127,42 +85,20 @@ mutable struct AdaptiveInductiveClassifier{Model<:Supervised} <: ConformalProbab
     coverage::AbstractFloat
     scores::Union{Nothing,Dict{Any,Any}}
     heuristic::Function
+    parallelizer::Union{Nothing,AbstractParallelizer}
     train_ratio::AbstractFloat
 end
 
 function AdaptiveInductiveClassifier(
     model::Supervised;
     coverage::AbstractFloat=0.95,
     heuristic::Function=minus_softmax,
+    parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
     train_ratio::AbstractFloat=0.5,
 )
-    return AdaptiveInductiveClassifier(model, coverage, nothing, heuristic, train_ratio)
-end
-
-@doc raw"""
-    MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)
-
-For the [`AdaptiveInductiveClassifier`](@ref) nonconformity scores are computed by cumulatively summing the ranked scores of each label in descending order until reaching the true label ``Y_i``:
-
-``
-S_i^{\text{CAL}} = s(X_i,Y_i) = \sum_{j=1}^k  \hat\mu(X_i)_{\pi_j} \ \text{where } \ Y_i=\pi_k,  i \in \mathcal{D}_{\text{calibration}}
-``
-"""
-function MMI.fit(conf_model::AdaptiveInductiveClassifier, verbosity, X, y)
-
-    # Data Splitting:
-    Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)
-
-    # Training:
-    fitresult, cache, report = MMI.fit(
-        conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
+    return AdaptiveInductiveClassifier(
+        model, coverage, nothing, heuristic, parallelizer, train_ratio
     )
-
-    # Nonconformity Scores:
-    cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
-    conf_model.scores = Dict(:calibration => cal_scores, :all => scores)
-
-    return (fitresult, cache, report)
 end
 
 """

diff --git a/src/conformal_models/inductive/inductive_models.jl b/src/conformal_models/inductive/inductive_models.jl
@@ -0,0 +1,56 @@
+# Type unions:
+include("classification.jl")
+include("regression.jl")
+
+const InductiveModel = Union{
+    SimpleInductiveRegressor,
+    SimpleInductiveClassifier,
+    AdaptiveInductiveClassifier,
+    ConformalQuantileRegressor,
+}
+
+"""
+    split_data(conf_model::InductiveModel, indices::Base.OneTo{Int})
+
+Splits the data into a proper training and calibration set.
+"""
+function split_data(conf_model::InductiveModel, X, y)
+    train, calibration = partition(eachindex(y), conf_model.train_ratio)
+    Xtrain = selectrows(X, train)
+    ytrain = y[train]
+    Xcal = selectrows(X, calibration)
+    ycal = y[calibration]
+
+    return Xtrain, ytrain, Xcal, ycal
+end
+
+"""
+    score(conf_model::InductiveModel, fitresult, X, y=nothing)
+
+Generic score method for the [`InductiveModel`](@ref). It computes nonconformity scores using the heuristic function `h` and the softmax probabilities of the true class. Method is dispatched for different Conformal Probabilistic Sets and atomic models.
+"""
+function score(conf_model::InductiveModel, fitresult, X, y=nothing)
+    return score(conf_model, conf_model.model, fitresult, X, y)
+end
+
+@doc raw"""
+    MMI.fit(conf_model::InductiveModel, verbosity, X, y)
+
+Fits the [`InductiveModel`](@ref) model. 
+"""
+function MMI.fit(conf_model::InductiveModel, verbosity, X, y)
+
+    # Data Splitting:
+    Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)
+
+    # Training:
+    fitresult, cache, report = MMI.fit(
+        conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
+    )
+
+    # Nonconformity Scores:
+    cal_scores, scores = score(conf_model, fitresult, Xcal, ycal)
+    conf_model.scores = Dict(:calibration => cal_scores, :all => scores)
+
+    return (fitresult, cache, report)
+end
diff --git a/src/conformal_models/inductive_regression.jl → src/conformal_models/inductive/regression.jl b/src/conformal_models/inductive_regression.jl → src/conformal_models/inductive/regression.jl
@@ -6,20 +6,25 @@ mutable struct SimpleInductiveRegressor{Model<:Supervised} <: ConformalInterval
     coverage::AbstractFloat
     scores::Union{Nothing,AbstractArray}
     heuristic::Function
+    parallelizer::Union{Nothing,AbstractParallelizer}
     train_ratio::AbstractFloat
 end
 
 function SimpleInductiveRegressor(
     model::Supervised;
     coverage::AbstractFloat=0.95,
     heuristic::Function=absolute_error,
+    parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
     train_ratio::AbstractFloat=0.5,
 )
-    return SimpleInductiveRegressor(model, coverage, nothing, heuristic, train_ratio)
+    return SimpleInductiveRegressor(
+        model, coverage, nothing, heuristic, parallelizer, train_ratio
+    )
 end
 
+
 @doc raw"""
-    MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
+    score(conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing)
 
 For the [`SimpleInductiveRegressor`](@ref) nonconformity scores are computed as follows:
 
@@ -29,27 +34,19 @@ S_i^{\text{CAL}} = s(X_i, Y_i) = h(\hat\mu(X_i), Y_i), \ i \in \mathcal{D}_{\tex
 
 A typical choice for the heuristic function is ``h(\hat\mu(X_i),Y_i)=|Y_i-\hat\mu(X_i)|`` where ``\hat\mu`` denotes the model fitted on training data ``\mathcal{D}_{\text{train}}``.
 """
-function MMI.fit(conf_model::SimpleInductiveRegressor, verbosity, X, y)
-
-    # Data Splitting:
-    train, calibration = partition(eachindex(y), conf_model.train_ratio)
-    Xtrain = selectrows(X, train)
-    ytrain = y[train]
-    Xcal = selectrows(X, calibration)
-    ycal = y[calibration]
-
-    # Training:
-    fitresult, cache, report = MMI.fit(
-        conf_model.model, verbosity, MMI.reformat(conf_model.model, Xtrain, ytrain)...
-    )
-
-    # Nonconformity Scores:
+function score(
+    conf_model::SimpleInductiveRegressor, atomic::Supervised, fitresult, X, y=nothing
+)
     ŷ = reformat_mlj_prediction(
-        MMI.predict(conf_model.model, fitresult, MMI.reformat(conf_model.model, Xcal)...)
+        MMI.predict(atomic, fitresult, MMI.reformat(atomic, X)...)
     )
-    conf_model.scores = @.(conf_model.heuristic(ycal, ŷ))
-
-    return (fitresult, cache, report)
+    scores = @.(conf_model.heuristic(y, ŷ))
+    if isnothing(y)
+        return scores
+    else
+        cal_scores = getindex.(Ref(scores), 1:size(scores, 1), y)
+        return cal_scores, scores
+    end
 end
 
 # Prediction
@@ -84,6 +81,7 @@ mutable struct ConformalQuantileRegressor{Model<:QuantileModel} <: ConformalInte
     coverage::AbstractFloat
     scores::Union{Nothing,AbstractArray}
     heuristic::Function
+    parallelizer::Union{Nothing,AbstractParallelizer}
     train_ratio::AbstractFloat
 end
 
@@ -93,9 +91,12 @@ function ConformalQuantileRegressor(
     heuristic::Function=function f(y, ŷ_lb, ŷ_ub)
         return reduce((x, y) -> max.(x, y), [ŷ_lb - y, y - ŷ_ub])
     end,
+    parallelizer::Union{Nothing,AbstractParallelizer}=nothing,
     train_ratio::AbstractFloat=0.5,
 )
-    return ConformalQuantileRegressor(model, coverage, nothing, heuristic, train_ratio)
+    return ConformalQuantileRegressor(
+        model, coverage, nothing, heuristic, parallelizer, train_ratio
+    )
 end
 
 @doc raw"""
@@ -114,13 +115,7 @@ A typical choice for the heuristic function is ``h(\hat\mu_{\alpha_{lo}}(X_i), \
 function MMI.fit(conf_model::ConformalQuantileRegressor, verbosity, X, y)
 
     # Data Splitting:
-    train, calibration = partition(eachindex(y), conf_model.train_ratio)
-    Xtrain = selectrows(X, train)
-    ytrain = y[train]
-    Xtrain, ytrain = MMI.reformat(conf_model.model, Xtrain, ytrain)
-    Xcal = selectrows(X, calibration)
-    ycal = y[calibration]
-    Xcal, ycal = MMI.reformat(conf_model.model, Xcal, ycal)
+    Xtrain, ytrain, Xcal, ycal = split_data(conf_model, X, y)
 
     # Training:
     fitresult, cache, report, y_pred = ([], [], [], [])