JuliaSurv · lrnv · May 16, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/Project.toml b/Project.toml
@@ -8,16 +8,19 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 
 [compat]
+Aqua = "0.8"
 Distributions = "0.25"
+HypothesisTests = "0.11"
 SpecialFunctions = "2"
 Test = "1.6"
 TestItemRunner = "0.2"
 julia = "1.6.7"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
 
 [targets]
-test = ["Aqua", "Test", "TestItemRunner"]
+test = ["Aqua", "Test", "TestItemRunner", "HypothesisTests"]
diff --git a/docs/make.jl b/docs/make.jl
@@ -23,9 +23,11 @@ makedocs(;
         "index.md",
         "utils.md",
         "Implemented Distributions" => [
+            "distros/PCHD.md",
             "distros/GenGamma.md",
             "distros/PGW.md",
             "distros/EW.md",
+            "distros/LogLogistic.md",
         ],
         "references.md"
     ],

diff --git a/docs/src/distros/LogLogistic.md b/docs/src/distros/LogLogistic.md
@@ -0,0 +1,36 @@
+# LogLogistic
+
+## Definition
+
+The [LogLogistic](https://en.wikipedia.org/wiki/Log-logistic_distribution) distribution is the probability distribution of a random variable whose logarithm has a logistic distribution. It is similar in shape to the log-normal distribution but has heavier tails.
+
+It is used in survival analysis as a parametric model for events whose rate increases initially and decreases later, as, for example, mortality rate from cancer following diagnosis or treatment. It has also been used in hydrology to model stream flow and precipitation, in economics as a simple model of the distribution of wealth or income, and in networking to model the transmission times of data considering both the network and the software.
+
+## Examples
+
+Let us sample a dataset from an Exponentiated Weibull distribution: 
+
+```@example 1
+using SurvivalDistributions, Distributions, Random, Plots, StatsBase
+Random.seed!(123)
+D = LogLogistic(1,2)
+sim = rand(D,1000);
+```
+
+First, let's have a look at the hazard function: 
+```@example 1
+plot(t -> hazard(D,t), ylabel = "Hazard", xlims = (0,10))
+```
+
+Then, we can verify the coherence of our code by comparing the obtained sample and the true pdf: 
+```@example 1
+histogram(sim, normalize=:pdf, bins = range(0, 5, length=30))
+plot!(t -> pdf(D,t), ylabel = "Density", xlims = (0,5))
+```
+
+We could also compare the empirical and theroetical cdfs: 
+```@example 1
+ecdfsim = ecdf(sim)
+plot(x -> ecdfsim(x), 0, 5, label = "ECDF", linecolor = "gray", linewidth=3)
+plot!(t -> cdf(D,t), xlabel = "x", ylabel = "CDF vs. ECDF", xlims = (0,5))
+```
diff --git a/docs/src/distros/PCHD.md b/docs/src/distros/PCHD.md
@@ -0,0 +1,39 @@
+# Piecewise constant hazard distributions
+
+## Definition
+
+The `PiecewiseConstantHazardDistribution` is one of the most simple and yet most usefull distribution provided in this package. These distributions are defined by their hazard functions, which are assumed to be piecewise constant (hence their names). 
+
+While dealing with census data and rate tables, having a survival model defined by a piecewise constant hazard is very common. In particular, random lifes extracted from `RateTable`s from [`RateTables.jl`](https://github.com/JuliaSurv/RateTables.jl) follows this pattern. 
+
+
+## Examples
+
+```@example 1
+using SurvivalDistributions, Distributions, Random, Plots, StatsBase
+Random.seed!(123)
+∂t = rand(20)
+λ = rand(20)
+D = PiecewiseConstantHazardDistribution(∂t,λ)
+sim = rand(D,1000);
+```
+
+First, let's have a look at the hazard function: 
+```@example 1
+plot(t -> hazard(D,t), ylabel = "Hazard", xlims = (0,10))
+```
+
+As excepted, it is quite random. 
+
+Then, we can verify the coherence of our code by comparing the obtained sample and the true pdf: 
+```@example 1
+histogram(sim, normalize=:pdf, bins = range(0, 5, length=30))
+plot!(t -> pdf(D,t), ylabel = "Density", xlims = (0,5))
+```
+
+The comparison is not too bad ! We could also compare the empirical and theroetical cdfs: 
+```@example 1
+ecdfsim = ecdf(sim)
+plot(x -> ecdfsim(x), 0, 5, label = "ECDF", linecolor = "gray", linewidth=3)
+plot!(t -> cdf(D,t), xlabel = "x", ylabel = "CDF vs. ECDF", xlims = (0,5))
+```
diff --git a/src/SurvivalDistributions.jl b/src/SurvivalDistributions.jl
@@ -4,19 +4,23 @@ module SurvivalDistributions
 
     using SpecialFunctions: loggamma
 
-    using Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, ContinuousUnivariateDistribution, UnivariateDistribution, @distr_support, AbstractRNG, Weibull, Gamma, Logistic
-    import Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand
+    using Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, ContinuousUnivariateDistribution, UnivariateDistribution, @distr_support, AbstractRNG, Weibull, Gamma, Logistic, expectation
+    import Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, expectation
 
     # Export a few utilities : 
     include("utilities.jl")
     export censored_loglikelihood, hazard, loghazard, cumhazard
 
     # Export other distributions: 
+    include("distros/AbstractHazardDistribution.jl")
+    include("distros/PiecewiseConstantHazardDistribution.jl")
     include("distros/ExpoDist.jl")
+
     include("distros/ExponentiatedWeibull.jl")
     include("distros/GeneralizedGamma.jl")
     include("distros/PowerGeneralizedWeibull.jl")
     include("distros/LogLogistic.jl")
-    export ExpoDist, ExponentiatedWeibull, GeneralizedGamma, PowerGeneralizedWeibull, LogLogistic
+
+    export ExpoDist, ExponentiatedWeibull, GeneralizedGamma, PowerGeneralizedWeibull, LogLogistic, PiecewiseConstantHazardDistribution
 
 end
diff --git a/src/distros/AbstractHazardDistribution.jl b/src/distros/AbstractHazardDistribution.jl
@@ -0,0 +1,16 @@
+abstract type AbstractHazardDistribution <: ContinuousUnivariateDistribution end
+@distr_support AbstractHazardDistribution 0.0 Inf
+loghazard(X::AbstractHazardDistribution, t::Real) = log(hazard(X,t))
+cumhazard(X::AbstractHazardDistribution, t::Real) = quadgk(u -> hazard(X,u), 0, t)[1]
+logccdf(  X::AbstractHazardDistribution, t::Real) = -cumhazard(X,t)
+ccdf(     X::AbstractHazardDistribution, t::Real) = exp(-cumhazard(X,t))
+cdf(      X::AbstractHazardDistribution, t::Real) = -expm1(-cumhazard(X,t))
+logcdf(   X::AbstractHazardDistribution, t::Real) = log1mexp(-cumhazard(X,t))
+pdf(      X::AbstractHazardDistribution, t::Real) = hazard(X,t)*ccdf(X,t)
+logpdf(   X::AbstractHazardDistribution, t::Real) = loghazard(X,t) - cumhazard(X,t)
+function quantile( X::AbstractHazardDistribution, t::Real) 
+    u = log(1-t)
+    return find_zero(x -> u + cumhazard(X,x), (0.0, Inf))
+end
+
+
diff --git a/src/distros/ExpoDist.jl b/src/distros/ExpoDist.jl
@@ -1,7 +1,7 @@
 """
-ExpoDist(γ, X)
+    ExpoDist(γ, X)
 
-    A power distribution with power γ and base distribution X<:ContinuousUnivariateDistribution is defined as the distribution that has cumulative distribution function ``F^γ`` where F was the distribution function of X. 
+A power distribution with power γ and base distribution X<:ContinuousUnivariateDistribution is defined as the distribution that has cumulative distribution function ``F^γ`` where F was the distribution function of X. 
 """
 struct ExpoDist{D} <: ContinuousUnivariateDistribution
     γ::Float64

diff --git a/src/distros/ExponentiatedWeibull.jl b/src/distros/ExponentiatedWeibull.jl
@@ -1,18 +1,13 @@
 """
-    ExponentiatedWeibull(sigma,nu,gamma)
+    ExponentiatedWeibull(α,θ,γ)
 
-The *ExponentiatedWeibull distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function 
+The [Exponentiated Weibull distribution](https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution) is obtain by exponentiating the cdf of the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). This simple transformation adds a second shape parameter that, interestingly, induces a lot of flexibility on the hazard function. The hazard function of the Exponentiated Weibull distribution can capture the basic shapes: constant, increasing, decreasing, bathtub, and unimodal, making it appealing for survival models.
 
-```math
-f(x; parameters) = ...
-```
-
-More details and examples of usage could be provided in this docstring.
-
-Maybe this distribution could simply be constructed from a transformation of the original Weibull ? 
+A random variable X follows an `ExponentiatedWeibull(α,θ,γ)` distribution when it has cumulative distribution function ``F_X = F_W^{γ}`` where ``F_W`` is the cumulative distribution function of a `Weibull(α,θ)`. 
 
 References: 
-* [Link to my reference so that people understand what it is](https://myref.com) 
+* [Exponentiated Weibull distribution](https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution)
+* [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution)
 """
 const ExponentiatedWeibull{T} = ExpoDist{Weibull{T}}
-ExponentiatedWeibull(sigma,nu,gamma) = ExpoDist(gamma, Weibull(nu,sigma))
+ExponentiatedWeibull(α,θ,γ) = ExpoDist(γ, Weibull(α,θ))
diff --git a/src/distros/GeneralizedGamma.jl b/src/distros/GeneralizedGamma.jl
@@ -1,18 +1,11 @@
 """
-    GeneralizedGamma(sigma,nu,gamma)
+    GeneralizedGamma(σ,nu,gamma)
 
-The *GeneralizedGamma distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function 
-
-```math
-f(x; parameters) = ...
-```
-
-More details and examples of usage could be provided in this docstring.
-
-Maybe this distribution could simply be constructed from a transformation of the original Weibull ? 
+The [Generalised Gamma](https://en.wikipedia.org/wiki/Generalized_gamma_distribution) (GG) distribution is a three-parameter distribution with support on ``{\\mathbb R}_+``. The corresponding hazard function can accommodate bathtub, unimodal and monotone (increasing and decreasing) hazard shapes. The GG distribution has become popular in survival analysis due to its flexibility. 
 
 References: 
-* [Link to my reference so that people understand what it is](https://myref.com) 
+* [Generalised Gamma](https://en.wikipedia.org/wiki/Generalized_gamma_distribution)
+* [stacy:1962](@cite) Stacy, E.W. A generalization of the gamma distribution, *The Annals of Mathematical Statistics*, 1962
 """
 struct GeneralizedGamma{T<:Real} <: ContinuousUnivariateDistribution
     sigma::T

diff --git a/src/distros/LogLogistic.jl b/src/distros/LogLogistic.jl
@@ -1,34 +1,38 @@
 """
-    LogLogistic(mu,sigma)
+    LogLogistic(μ,σ)
 
-To be described... 
+According to [its wikipedia page](https://en.wikipedia.org/wiki/Log-logistic_distribution), the the log-logistic distribution (known as the Fisk distribution in economics) is a continuous probability distribution for a non-negative random variable. It is used in survival analysis as a parametric model for events whose rate increases initially and decreases later, as, for example, mortality rate from cancer following diagnosis or treatment. It has also been used in hydrology to model stream flow and precipitation, in economics as a simple model of the distribution of wealth or income, and in networking to model the transmission times of data considering both the network and the software.
 
+The log-logistic distribution is the probability distribution of a random variable whose logarithm has a logistic distribution. It is similar in shape to the log-normal distribution but has heavier tails. Unlike the log-normal, its cumulative distribution function can be written in closed form.
+
+It is characterized by its density function as 
 
 ```math
-f(x; parameters) = ...
+f(x) = \\frac{(\\frac{β}{α})(\\frac{x}{α})^{β-1} }{(1 + (\\frac{x}{α})^{β})^2},
 ```
 
+where α = e^μ and β = 1/σ.
 """
 struct LogLogistic{T<:Real} <: ContinuousUnivariateDistribution
-    mu::T
-    sigma::T
-    function LogLogistic(mu,sigma)
-        T = promote_type(Float64, eltype.((mu,sigma))...)
-        return new{T}(T(mu), T(sigma))
+    X::Logistic{T}
+    function LogLogistic(μ,σ)
+        X = Logistic(μ, σ)
+        return new{eltype(X)}(X)
     end
 end
 LogLogistic() = LogLogistic(1,1)
-params(d::LogLogistic) = (d.mu,d.sigma)
+params(d::LogLogistic) = (d.X.μ,d.X.θ)
 @distr_support LogLogistic 0.0 Inf
 function loghazard(d::LogLogistic, t::Real)
-    lt = log.(t)
-    lpdf0 = logpdf.(Logistic(d.mu, d.sigma), lt) .- lt
-    ls0 = logccdf.(Logistic(d.mu, d.sigma), lt)
-    return lpdf0 .- ls0
+    lt = log(t)
+    lpdf0 = logpdf(Logistic(d.X.μ, d.X.θ), lt)
+    ls0 = logccdf(Logistic(d.X.μ, d.X.θ), lt)
+    return lpdf0 - ls0 - lt
 end
 function cumhazard(d::LogLogistic,t::Real)
     lt = log.(t)
-    return -logccdf.(Logistic(d.mu, d.sigma), lt)
+    return -logccdf.(Logistic(d.X.μ, d.X.θ), lt)
 end
 logpdf(d::LogLogistic, t::Real) = loghazard(d,t) - cumhazard(d,t)
-cdf(d::LogLogistic, t::Real) = -expm1(-cumhazard(d,t))
+cdf(d::LogLogistic, t::Real) = -expm1(-cumhazard(d,t))
+rand(rng::AbstractRNG, d::LogLogistic) = exp(rand(rng,d.X))
diff --git a/src/distros/PiecewiseConstantHazardDistribution.jl b/src/distros/PiecewiseConstantHazardDistribution.jl
@@ -0,0 +1,60 @@
+struct PiecewiseConstantHazardDistribution <: AbstractHazardDistribution
+    ∂t::Vector{Float64}
+    λ::Vector{Float64}
+end
+# the three folowing functions are actually enough i think to be able to sample eficiently for piecewise constant hazard distributions. 
+function hazard(D::PiecewiseConstantHazardDistribution, t::Real)
+    u = 0.0
+    for i in 1:length(D.∂t)
+        u += D.∂t[i]
+        if t < u
+            return D.λ[i]
+        end
+    end
+    return D.λ[end]
+end
+function cumhazard(D::PiecewiseConstantHazardDistribution, t::Real)
+    Λ = 0.0
+    u = 0.0
+    for j in eachindex(D.∂t)
+        u += D.∂t[j]
+        if t > u 
+            Λ += D.λ[j]*D.∂t[j]
+        else
+            Λ += D.λ[j]*(t-(u-D.∂t[j]))
+            return Λ
+        end
+    end
+    # We consider that the last box is in fact infinitely wide (exponential tail)
+    return Λ + (t-u)*L.λ[end] 
+end
+function quantile(D::PiecewiseConstantHazardDistribution, p::Real)
+    Λ_target = -log(1-p)
+    Λ = 0.0
+    u = 0.0
+    for j in eachindex(D.∂t)
+        Λ += D.λ[j]*D.∂t[j]
+        u += D.∂t[j]
+        if Λ_target < Λ
+            u -= (Λ - Λ_target) / D.λ[j]
+            return u
+        end
+    end
+    return u
+end
+function expectation(L::PiecewiseConstantHazardDistribution)
+    S = 1.0
+    E = 0.0
+    for j in eachindex(D.∂t)
+        if D.λ[j] > 0
+            S_inc = exp(-D.λ[j]*D.∂t[j])
+            E += S * (1 - S_inc) / D.λ[j]
+            S *= S_inc
+        else
+            E += S * D.∂t[j]
+        end
+    end
+    # This reminder assumes a exponential life time afer the maximuum age.
+    R = ifelse(D.λ[end] == 0.0, 0.0, S / D.λ[end])
+    return E + R
+end
diff --git a/src/distros/PowerGeneralizedWeibull.jl b/src/distros/PowerGeneralizedWeibull.jl
@@ -1,18 +1,17 @@
 """
-    PowerGeneralizedWeibull(sigma,nu,gamma)
+    PowerGeneralizedWeibull(σ,ν,γ)
 
-The *PowerGeneralizedWeibull distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function 
+The Power Generalised Weibull (PGW) distribution is a three-parameter distribution with support on ``{\\mathbb R}_+``. The corresponding hazard function can accommodate bathtub, unimodal and monotone (increasing and decreasing) hazard shapes. The PGW distribution has become popular in survival analysis given the tractability of its hazard and survival functions. 
+
+The `PowerGeneralizedWeibull(σ,ν,γ)` distribution, with scale `σ`, shape `ν` (nu) and second shape `γ` has probability density function 
 
 ```math
-f(x; parameters) = ...
+f(t;σ,ν,γ) = \\dfrac{ν}{γ σ^ν}t^{ν-1} \\left[ 1 + \\left(\\dfrac{t}{σ}\\right)^ν\\right]^{\\left(\\frac{1}{γ}-1\\right)} \\exp\\left\\{ 1- \\left[ 1 + \\left(\\dfrac{t}{σ}\\right)^ν\\right]^{\\frac{1}{γ}}
+\\right\\}.
 ```
 
-More details and examples of usage could be provided in this docstring.
-
-Maybe this distribution could simply be constructed from a transformation of the original Weibull ? 
-
 References: 
-* [Link to my reference so that people understand what it is](https://myref.com) 
+* [nikulin:2009](@cite) Nikulin, M. and Haghighi, F. On the power generalized Weibull family: model for cancer censored data. Metron -- International Journal of Statistics, 2009
 """
 struct PowerGeneralizedWeibull{T<:Real} <: ContinuousUnivariateDistribution
     sigma::T