From c1615d5adca6076eb50fdc80818398a533f22d07 Mon Sep 17 00:00:00 2001 From: aursiber Date: Wed, 28 Aug 2024 14:30:27 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20lbbe-sof?= =?UTF-8?q?tware/fitdistrplus@2ba5f06401d0fb01f11b5709624c438f1a6cbf69=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/FAQ.html | 7 +++-- articles/Optimalgo.html | 56 ++++++++++++++++++------------------- pkgdown.yml | 2 +- reference/bootdist.html | 6 ++-- reference/bootdistcens.html | 6 ++-- reference/fitdist.html | 4 +-- reference/fitdistcens.html | 4 +-- reference/mmedist.html | 4 +-- search.json | 2 +- 9 files changed, 46 insertions(+), 45 deletions(-) diff --git a/articles/FAQ.html b/articles/FAQ.html index 39bda12..161571d 100644 --- a/articles/FAQ.html +++ b/articles/FAQ.html @@ -186,9 +186,10 @@

Is it p

Why there are differences between MLE and MME for the lognormal distribution?

-

We recall that the lognormal distribution function is given by

-

f(x)=12πσ2e(xμ2)2σ2f(x) = \dfrac{1}{\sqrt{2\pi\sigma^2}} e^{-\frac{(x-\mu^2)}{2\sigma^2}}

-

FX(x)=Φ(log(x)μσ),F_X(x) = \Phi\left(\frac{\log(x)-\mu}{\sigma} \right),

+

We recall that the lognormal distribution function is given by +FX(x)=Φ(log(x)μσ), +F_X(x) = \Phi\left(\frac{\log(x)-\mu}{\sigma} \right), +

where Φ\Phi denotes the distribution function of the standard normal distribution. diff --git a/articles/Optimalgo.html b/articles/Optimalgo.html index 4e51b2d..e7aee38 100644 --- a/articles/Optimalgo.html +++ b/articles/Optimalgo.html @@ -414,9 +414,9 @@

time (sec) -0.005 +0.006 0.004 -0.022 +0.023 0.029 0.020 0.004 @@ -514,15 +514,15 @@

time (sec) -0.009 -0.075 -0.072 -0.045 -0.014 -0.013 -0.085 -0.076 -0.064 +0.011 +0.082 +0.078 +0.050 +0.017 +0.015 +0.095 +0.083 +0.071 @@ -580,10 +580,10 @@

time (sec) -0.015 -0.003 -0.013 +0.016 +0.004 0.013 +0.014 0.015 @@ -636,10 +636,10 @@

time (sec) -0.011 -0.044 -0.043 -0.037 +0.013 +0.062 +0.047 +0.042 @@ -875,8 +875,8 @@

time (sec) 0.002 0.002 -0.220 -0.209 +0.224 +0.208 0.205 0.001 0.003 @@ -983,15 +983,15 @@

time (sec) -0.008 +0.009 0.032 0.007 0.032 +0.001 0.002 -0.002 -0.031 -0.007 0.032 +0.007 +0.031 fitted prob @@ -1061,10 +1061,10 @@

time (sec) -0.004 +0.005 0.002 -0.256 -0.234 +0.262 +0.237 0.024 @@ -1117,7 +1117,7 @@

time (sec) -0.006 +0.007 0.002 0.002 0.002 diff --git a/pkgdown.yml b/pkgdown.yml index 95d1cea..74f6b6d 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -6,7 +6,7 @@ articles: fitdistrplus_vignette: fitdistrplus_vignette.html Optimalgo: Optimalgo.html starting-values: starting-values.html -last_built: 2024-08-28T14:19Z +last_built: 2024-08-28T14:28Z urls: reference: https://lbbe-software.github.io/fitdistrplus/reference article: https://lbbe-software.github.io/fitdistrplus/articles diff --git a/reference/bootdist.html b/reference/bootdist.html index 1dc24cd..fb46b71 100644 --- a/reference/bootdist.html +++ b/reference/bootdist.html @@ -379,7 +379,7 @@

Examples#> rate 0.05458836 0.04622389 0.06476728 proc.time() - ptm #> user system elapsed -#> 3.879 0.089 3.859 +#> 3.981 0.089 3.964 # parallel version using snow require(parallel) @@ -392,7 +392,7 @@

Examples#> rate 0.05450354 0.04632331 0.06524721 proc.time() - ptm #> user system elapsed -#> 0.037 0.001 3.675 +#> 0.036 0.004 3.859 # parallel version using multicore (not available on Windows) ptm <- proc.time() @@ -403,7 +403,7 @@

Examples#> rate 0.05496497 0.04672265 0.06498123 proc.time() - ptm #> user system elapsed -#> 0.030 0.020 2.022 +#> 0.026 0.023 2.059 # } diff --git a/reference/bootdistcens.html b/reference/bootdistcens.html index 32e3e5c..ca1012e 100644 --- a/reference/bootdistcens.html +++ b/reference/bootdistcens.html @@ -410,7 +410,7 @@

Examples#> sd 1.129426 0.6853478 1.709083 proc.time() - ptm #> user system elapsed -#> 4.177 0.089 4.156 +#> 4.422 0.101 4.414 # parallel version using snow require(parallel) @@ -422,7 +422,7 @@

Examples#> sd 1.108123 0.6912424 1.673702 proc.time() - ptm #> user system elapsed -#> 0.005 0.003 3.173 +#> 0.006 0.004 3.412 # parallel version using multicore (not available on Windows) ptm <- proc.time() @@ -433,7 +433,7 @@

Examples#> sd 1.119044 0.7072572 1.656059 proc.time() - ptm #> user system elapsed -#> 0.012 0.012 2.262 +#> 0.008 0.019 2.360 # } diff --git a/reference/fitdist.html b/reference/fitdist.html index 63ef384..334b0dc 100644 --- a/reference/fitdist.html +++ b/reference/fitdist.html @@ -703,7 +703,7 @@

Examples#> ## #> #> -#> Wed Aug 28 14:20:24 2024 +#> Wed Aug 28 14:28:45 2024 #> Domains: #> 0.000000e+00 <= X1 <= 1.000000e+01 #> 0.000000e+00 <= X2 <= 1.000000e+01 @@ -750,7 +750,7 @@

Examples#> Solution Found Generation 1 #> Number of Generations Run 12 #> -#> Wed Aug 28 14:20:25 2024 +#> Wed Aug 28 14:28:46 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit2) diff --git a/reference/fitdistcens.html b/reference/fitdistcens.html index 9033749..0610989 100644 --- a/reference/fitdistcens.html +++ b/reference/fitdistcens.html @@ -439,7 +439,7 @@

Examples print.level=1, hessian=TRUE) #> #> -#> Wed Aug 28 14:20:32 2024 +#> Wed Aug 28 14:28:56 2024 #> Domains: #> 0.000000e+00 <= X1 <= 5.000000e+00 #> 0.000000e+00 <= X2 <= 5.000000e+00 @@ -486,7 +486,7 @@

Examples#> Solution Found Generation 1 #> Number of Generations Run 12 #> -#> Wed Aug 28 14:20:33 2024 +#> Wed Aug 28 14:28:57 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit.with.genoud) diff --git a/reference/mmedist.html b/reference/mmedist.html index ad9e691..b0ed947 100644 --- a/reference/mmedist.html +++ b/reference/mmedist.html @@ -500,7 +500,7 @@

Examples#> #> $memp #> function(x, order) mean(x^order) -#> <environment: 0x560f817a3ed0> +#> <environment: 0x56524f2fd0a8> #> #> $vcov #> NULL @@ -584,7 +584,7 @@

Examples#> #> $memp #> function(x, order, weights) sum(x^order * weights)/sum(weights) -#> <environment: 0x560f817a3ed0> +#> <environment: 0x56524f2fd0a8> #> #> $vcov #> NULL diff --git a/search.json b/search.json index 3149662..4436974 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-know-the-root-name-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I know the root name of a distribution?","title":"Frequently Asked Questions","text":"root name probability distribution name used d, p, q, r functions. base R distributions, root names given R-intro : https://cran.r-project.org/doc/manuals/R-intro.html#Probability-distributions. example, must use \"pois\" Poisson distribution \"poisson\".","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-find-non-standard-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I find “non standard” distributions?","title":"Frequently Asked Questions","text":"non-standard distributions, can either find package implementing define . comprehensive list non-standard distributions given Distributions task view https://CRAN.R-project.org/view=Distributions. two examples user-defined distributions. third example (shifted exponential) given FAQ 3.5.4. Gumbel distribution zero-modified geometric distribution","code":"dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q, a, b) exp(-exp((a-q)/b)) qgumbel <- function(p, a, b) a-b*log(-log(p)) data(groundbeef) fitgumbel <- fitdist(groundbeef$serving, \"gumbel\", start=list(a=10, b=10)) dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) pzmgeom <- function(q, p1, p2) p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2) rzmgeom <- function(n, p1, p2) { u <- rbinom(n, 1, 1-p1) #prob to get zero is p1 u[u != 0] <- rgeom(sum(u != 0), p2)+1 u } x2 <- rzmgeom(1000, 1/2, 1/10) fitdist(x2, \"zmgeom\", start=list(p1=1/2, p2=1/2))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-set-or-find-initial-values-for-non-standard-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I set (or find) initial values for non standard distributions?","title":"Frequently Asked Questions","text":"documented, provide initial values following distributions: \"norm\", \"lnorm\", \"exp\", \"pois\", \"cauchy\", \"gamma“, \"logis\", \"nbinom\", \"geom\", \"beta\", \"weibull\" stats package; \"invgamma\", \"llogis\", \"invweibull\", \"pareto1\", \"pareto\", \"lgamma\", \"trgamma\", \"invtrgamma\" actuar package. Look first statistics probability books different volumes N. L. Johnson, S. Kotz N. Balakrishnan books, e.g. Continuous Univariate Distributions, Vol. 1, Thesaurus univariate discrete probability distributions G. Wimmer G. Altmann. Statistical Distributions M. Evans, N. Hastings, B. Peacock. Distributional Analysis L-moment Statistics using R Environment Statistical Computing W. Asquith. available, find initial values equalling theoretical empirical quartiles. graphical function plotdist() plotdistcens() can also used assess suitability starting values : iterative manual process can move parameter values obtain distribution roughly fits data take parameter values starting values real fit. may also consider prefit() function find initial values especially case parameters constrained.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-fit-a-distribution-with-at-least-3-parameters","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Is it possible to fit a distribution with at least 3 parameters?","title":"Frequently Asked Questions","text":"Yes, example Burr distribution detailed JSS paper. reproduce quickly .","code":"data(\"endosulfan\") library(\"actuar\") fendo.B <- fitdist(endosulfan$ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) summary(fendo.B) ## Fitting of the distribution ' burr ' by maximum likelihood ## Parameters : ## estimate Std. Error ## shape1 0.206 0.0561 ## shape2 1.540 0.3188 ## rate 1.497 0.4683 ## Loglikelihood: -520 AIC: 1046 BIC: 1054 ## Correlation matrix: ## shape1 shape2 rate ## shape1 1.000 -0.900 -0.727 ## shape2 -0.900 1.000 0.588 ## rate -0.727 0.588 1.000"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-there-are-differences-between-mle-and-mme-for-the-lognormal-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Why there are differences between MLE and MME for the lognormal distribution?","title":"Frequently Asked Questions","text":"recall lognormal distribution function given f(x)=12πσ2e−(x−μ2)2σ2f(x) = \\dfrac{1}{\\sqrt{2\\pi\\sigma^2}} e^{-\\frac{(x-\\mu^2)}{2\\sigma^2}} FX(x)=Φ(log(x)−μσ),F_X(x) = \\Phi\\left(\\frac{\\log(x)-\\mu}{\\sigma} \\right), Φ\\Phi denotes distribution function standard normal distribution. know E(X)=exp(μ+12σ2)E(X) = \\exp\\left( \\mu+\\frac{1}{2} \\sigma^2 \\right) Var(X)=exp(2μ+σ2)(eσ2−1)Var(X) = \\exp\\left( 2\\mu+\\sigma^2\\right) (e^{\\sigma^2} -1). MME obtained inverting previous formulas, whereas MLE following explicit solution μ̂MLE=1n∑=1nlog(xi),σ̂MLE2=1n∑=1n(log(xi)−μ̂MLE)2. \\hat\\mu_{MLE} = \\frac{1}{n}\\sum_{=1}^n \\log(x_i),~~ \\hat\\sigma^2_{MLE} = \\frac{1}{n}\\sum_{=1}^n (\\log(x_i) - \\hat\\mu_{MLE})^2. Let us fit sample MLE MME. fit looks particularly good cases. Let us compare theoretical moments (mean variance) given fitted values (μ̂,σ̂\\hat\\mu,\\hat\\sigma), E(X)=exp(μ̂+12σ̂2),Var(X)=exp(2μ̂+σ̂2)(eσ̂2−1). E(X) = \\exp\\left( \\hat\\mu+\\frac{1}{2} \\hat\\sigma^2 \\right), Var(X) = \\exp\\left( 2\\hat\\mu+\\hat\\sigma^2\\right) (e^{\\hat\\sigma^2} -1). MLE point view, lognormal sample x1,…,xnx_1,\\dots,x_n equivalent handle normal sample log(x1),…,log(xn)\\log(x_1),\\dots,\\log(x_n). However, well know Jensen inequality E(X)=E(exp(log(X)))≥exp(E(log(X)))E(X) = E(\\exp(\\log(X))) \\geq \\exp(E(\\log(X))) implying MME estimates provides better moment estimates MLE.","code":"x3 <- rlnorm(1000) f1 <- fitdist(x3, \"lnorm\", method=\"mle\") f2 <- fitdist(x3, \"lnorm\", method=\"mme\") par(mfrow=1:2, mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points=FALSE, xlogscale = TRUE, main = \"CDF plot\") denscomp(list(f1, f2), demp=TRUE, main = \"Density plot\") c(\"E(X) by MME\"=as.numeric(exp(f2$estimate[\"meanlog\"]+f2$estimate[\"sdlog\"]^2/2)), \"E(X) by MLE\"=as.numeric(exp(f1$estimate[\"meanlog\"]+f1$estimate[\"sdlog\"]^2/2)), \"empirical\"=mean(x3)) ## E(X) by MME E(X) by MLE empirical ## 1.61 1.60 1.61 c(\"Var(X) by MME\"=as.numeric(exp(2*f2$estimate[\"meanlog\"]+f2$estimate[\"sdlog\"]^2) * (exp(f2$estimate[\"sdlog\"]^2)-1)), \"Var(X) by MLE\"=as.numeric(exp(2*f1$estimate[\"meanlog\"]+f1$estimate[\"sdlog\"]^2) * (exp(f1$estimate[\"sdlog\"]^2)-1)), \"empirical\"=var(x3)) ## Var(X) by MME Var(X) by MLE empirical ## 4.30 4.36 4.30"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-distribution-with-positive-support-when-data-contains-negative-values","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a distribution with positive support when data contains negative values?","title":"Frequently Asked Questions","text":"answer : fit distribution positive support (say gamma distribution) data contains negative values. irrelevant fit. really need use distribution, two options: either remove negative values (recommended) shift data.","code":"set.seed(1234) x <- rnorm(100, mean = 1, sd = 0.5) (try(fitdist(x, \"exp\"))) ## Error in computing default starting values. ## Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : ## Error in startarg_transgamma_family(x, distr) : ## values must be positive to fit an exponential distribution ## [1] \"Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : \\n Error in startarg_transgamma_family(x, distr) : \\n values must be positive to fit an exponential distribution\\n\\n\" ## attr(,\"class\") ## [1] \"try-error\" ## attr(,\"condition\") ## fitdist(x[x >= 0], \"exp\") ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 1.06 0.107 fitdist(x - min(x), \"exp\") ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.914 0.0914"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-finite-support-distribution-when-data-is-outside-that-support","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a finite-support distribution when data is outside that support?","title":"Frequently Asked Questions","text":"answer : fit distribution finite-support (say beta distribution) data outside [0,1][0,1]. irrelevant fit. really need use distribution, two ways tackle issue: either remove impossible values (recommended) shift/scale data.","code":"set.seed(1234) x <- rnorm(100, mean = 0.5, sd = 0.25) (try(fitdist(x, \"beta\"))) ## Error in computing default starting values. ## Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : ## Error in startargdefault(obs, distname) : ## values must be in [0-1] to fit a beta distribution ## [1] \"Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : \\n Error in startargdefault(obs, distname) : \\n values must be in [0-1] to fit a beta distribution\\n\\n\" ## attr(,\"class\") ## [1] \"try-error\" ## attr(,\"condition\") ## fitdist(x[x > 0 & x < 1], \"beta\") ## Fitting of the distribution ' beta ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 2.08 0.288 ## shape2 2.50 0.352 fitdist((x - min(x)*1.01) / (max(x) * 1.01 - min(x) * 1.01), \"beta\") ## Fitting of the distribution ' beta ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 1.77 0.236 ## shape2 2.17 0.296"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-truncated-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit truncated distributions?","title":"Frequently Asked Questions","text":"answer yes: fitting procedure must carried carefully. Let XX original untruncated random variable. truncated variable conditionnal random variable Y=X|l= low) * (x <= upp) } ptexp <- function(q, rate, low, upp) { PU <- pexp(upp, rate=rate) PL <- pexp(low, rate=rate) (pexp(q, rate)-PL) / (PU-PL) * (q >= low) * (q <= upp) + 1 * (q > upp) } n <- 200 x <- rexp(n); x <- x[x > .5 & x < 3] f1 <- fitdist(x, \"texp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))) f2 <- fitdist(x, \"texp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=.5, upp=3)) gofstat(list(f1, f2)) ## Goodness-of-fit statistics ## 1-mle-texp 2-mle-texp ## Kolmogorov-Smirnov statistic 0.0952 0.084 ## Cramer-von Mises statistic 0.1343 0.104 ## Anderson-Darling statistic Inf 1.045 ## ## Goodness-of-fit criteria ## 1-mle-texp 2-mle-texp ## Akaike's Information Criterion 127 132 ## Bayesian Information Criterion 130 135 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-truncated-inflated-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit truncated inflated distributions?","title":"Frequently Asked Questions","text":"answer yes: fitting procedure must carried carefully. Let XX original untruncated random variable. truncated variable Y=max(min(X,u),l)Y = \\max(\\min(X, u), l) ly>l+1y>uF_Y(y)=F_X(y)1_{u>y>l} + 1_{y>u}. density (w.r.t. Lebesgues measure) since two probability masses P(Y=l)=P(X≤l)>0P(Y=l)= P(X\\leq l)>0 P(Y=u)=P(X>u)>0P(Y=u)=P(X>u)>0. However, density function respect measure m(x)=δl(x)+δu(x)+λ(x)m(x)= \\delta_l(x)+\\delta_u(x)+\\lambda(x) fY(y)={FX(l)y=lfX(y)lminiyil>\\min_i y_i u= low) * (x <= upp) + PL * (x == low) + PU * (x == upp) } ptiexp <- function(q, rate, low, upp) pexp(q, rate) * (q >= low) * (q <= upp) + 1 * (q > upp) n <- 100; x <- pmax(pmin(rexp(n), 3), .5) # the loglikelihood has a discontinous point at the solution par(mar=c(4,4,2,1), mfrow=1:2) llcurve(x, \"tiexp\", plot.arg=\"low\", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200) llcurve(x, \"tiexp\", plot.arg=\"upp\", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200) (f1 <- fitdist(x, \"tiexp\", method=\"mle\", start=list(rate=3, low=0, upp=20))) ## Fitting of the distribution ' tiexp ' by maximum likelihood ## Parameters: ## estimate ## rate 0.949 ## low -0.502 ## upp 23.072 (f2 <- fitdist(x, \"tiexp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))) ## Fitting of the distribution ' tiexp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.947 0.0982 ## Fixed parameters: ## value ## low 0.5 ## upp 3.0 gofstat(list(f1, f2)) ## Goodness-of-fit statistics ## 1-mle-tiexp 2-mle-tiexp ## Kolmogorov-Smirnov statistic 0.378 0.377 ## Cramer-von Mises statistic 1.890 1.882 ## Anderson-Darling statistic 10.222 10.193 ## ## Goodness-of-fit criteria ## 1-mle-tiexp 2-mle-tiexp ## Akaike's Information Criterion 216 162 ## Bayesian Information Criterion 224 165 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5)) curve(ptiexp(x, 1, .5, 3), add=TRUE, col=\"blue\", lty=3) legend(\"bottomright\", lty=1:3, col=c(\"red\", \"green\", \"blue\", \"black\"), legend=c(\"full MLE\", \"MLE fixed arg\", \"true CDF\", \"emp. CDF\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-uniform-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a uniform distribution?","title":"Frequently Asked Questions","text":"uniform distribution 𝒰(,b)\\mathcal U(,b) support parameters since density scale shape parameter fU(u)=1b−a1[,b](u)f_U(u) = \\frac{1}{b-}1_{[,b]}(u). distribution, maximize log-likelihood likelihood. Let (xi)(x_i)_i ..d. observations 𝒰(,b)\\mathcal U(,b) distribution. likelihood L(,b)=∏=1n1b−a1[,b](xi)=1a≤xi≤b,=1,…,n1b−=1a≤minixi1maxixi≤b1b−L(,b) = \\prod_{=1}^n \\frac{1}{b-} 1_{[,b]}(x_i) = 1_{\\leq x_i \\leq b, =1,\\dots,n} \\frac{1}{b-}^n = 1_{\\leq \\min_i x_i} 1_{\\max_i x_i \\leq b} \\frac{1}{b-}^n Hence ↦L(,b)\\mapsto L(,b) fixed b∈]maxixi,+∞[b\\]\\max_i x_i, +\\infty[ increasing ]−∞,minixi]]-\\infty, \\min_i x_i], similarly b↦L(,b)b\\mapsto L(,b) decreasing fixed aa. leads minixi\\min_i x_i maxixi\\max_i x_i MLE uniform distribution. notice likelihood function LL defined ℝ2\\mathbb R^2 yet cancels outside S=]−∞,minixi]×]maxixi,+∞[S=]-\\infty, \\min_i x_i]\\times]\\max_i x_i, +\\infty[. Hence, log-likelihood undefined outside SS, issue maximizing log-likelihood. reasons, fitdist(data, dist=\"unif\", method=\"mle\") uses explicit form MLE distribution. example Maximizing log-likelihood harder can done defining new density function. Appropriate starting values parameters bound must supplied. Using closed-form expression (fitdist()) maximizing log-likelihood (unif2) lead similar results.","code":"trueval <- c(\"min\"=3, \"max\"=5) x <- runif(n=500, trueval[1], trueval[2]) f1 <- fitdist(x, \"unif\") delta <- .01 par(mfrow=c(1,1), mar=c(4,4,2,1)) llsurface(x, \"unif\", plot.arg = c(\"min\", \"max\"), min.arg=c(min(x)-2*delta, max(x)-delta), max.arg=c(min(x)+delta, max(x)+2*delta), main=\"likelihood surface for uniform\", loglik=FALSE) abline(v=min(x), h=max(x), col=\"grey\", lty=2) points(f1$estimate[1], f1$estimate[2], pch=\"x\", col=\"red\") points(trueval[1], trueval[2], pch=\"+\", col=\"blue\") legend(\"bottomright\", pch=c(\"+\",\"x\"), col=c(\"blue\",\"red\"), c(\"true\", \"fitted\")) delta <- .2 llsurface(x, \"unif\", plot.arg = c(\"min\", \"max\"), min.arg=c(3-2*delta, 5-delta), max.arg=c(3+delta, 5+2*delta), main=\"log-likelihood surface for uniform\") abline(v=min(x), h=max(x), col=\"grey\", lty=2) points(f1$estimate[1], f1$estimate[2], pch=\"x\", col=\"red\") points(trueval[1], trueval[2], pch=\"+\", col=\"blue\") legend(\"bottomright\", pch=c(\"+\",\"x\"), col=c(\"blue\",\"red\"), c(\"true\", \"fitted\")) dunif2 <- function(x, min, max) dunif(x, min, max) punif2 <- function(q, min, max) punif(q, min, max) f2 <- fitdist(x, \"unif2\", start=list(min=0, max=10), lower=c(-Inf, max(x)), upper=c(min(x), Inf)) print(c(logLik(f1), logLik(f2)), digits=7) ## [1] -346.0539 -346.0540 print(cbind(coef(f1), coef(f2)), digits=7) ## [,1] [,2] ## min 3.000684 3.000683 ## max 4.998606 4.998606"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-beta-distribution-with-the-same-shape-parameter","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a beta distribution with the same shape parameter?","title":"Frequently Asked Questions","text":"Yes, can wrap density function beta distribution one shape parameter. example concave density. Another example U-shaped density.","code":"x <- rbeta(1000, 3, 3) dbeta2 <- function(x, shape, ...) dbeta(x, shape, shape, ...) pbeta2 <- function(q, shape, ...) pbeta(q, shape, shape, ...) fitdist(x, \"beta2\", start=list(shape=1/2)) ## Fitting of the distribution ' beta2 ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 3.24 0.135 x <- rbeta(1000, .3, .3) fitdist(x, \"beta2\", start=list(shape=1/2), optim.method=\"L-BFGS-B\", lower=1e-2) ## Fitting of the distribution ' beta2 ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 0.295 0.00986"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-estimate-support-parameter-the-case-of-the-four-parameter-beta","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How to estimate support parameter? the case of the four-parameter beta","title":"Frequently Asked Questions","text":"Let us consider four-parameter beta distribution, also known PERT distribution, defined following density x∈[,c]x\\[,c]fX(x)=(x−)α−1(c−x)β−1/CNf_X(x) = (x-)^{\\alpha-1} (c-x)^{\\beta-1}/C_N CNC_N normalizing constant α=1+d(b−)/(c−)\\alpha=1+d(b-)/(c-), β=1+d(c−b)/(c−)\\beta=1+d(c-b)/(c-). ,ca,c support parameters, b∈],c[b\\],c[ mode dd shape parameter. uniform distribution, one can show MLE aa cc respectively sample minimum maximum. code illustrates strategy using partial closed formula fix.arg full numerical search MLE. NB: small sample size, latter generally better goodness--fit statistics; small positive number added subtracted fixing support parameters aa cc sample minimum maximum.","code":"require(mc2d) x2 <- rpert(n=2e2, min=0, mode=1, max=2, shape=3/4) eps <- sqrt(.Machine$double.eps) f1 <- fitdist(x2, \"pert\", start=list(min=-1, mode=0, max=10, shape=1), lower=c(-Inf, -Inf, -Inf, 0), upper=c(Inf, Inf, Inf, Inf)) ## Warning in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some ## parameter names have no starting/fixed value but have a default value: mean. ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced f2 <- fitdist(x2, \"pert\", start=list(mode=1, shape=1), fix.arg=list(min=min(x2)-eps, max=max(x2)+eps), lower=c(min(x2), 0), upper=c(max(x2), Inf)) ## Warning in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some ## parameter names have no starting/fixed value but have a default value: mean. print(cbind(coef(f1), c(f2$fix.arg[\"min\"], coef(f2)[\"mode\"], f2$fix.arg[\"max\"], coef(f2)[\"shape\"])), digits=7) ## [,1] [,2] ## min 0.03106317 0.03395487 ## mode 1.120283 1.956149 ## max 1.9595 1.956234 ## shape 0.3056077 0.008646087 gofstat(list(f1,f2)) ## Goodness-of-fit statistics ## 1-mle-pert 2-mle-pert ## Kolmogorov-Smirnov statistic 0.0452 0.0584 ## Cramer-von Mises statistic 0.0823 0.1834 ## Anderson-Darling statistic 0.5325 1.2776 ## ## Goodness-of-fit criteria ## 1-mle-pert 2-mle-pert ## Akaike's Information Criterion 268 265 ## Bayesian Information Criterion 281 272 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1,f2))"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"where-can-we-find-the-results-of-goodness-of-fit-tests","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Where can we find the results of goodness-of-fit tests ?","title":"Frequently Asked Questions","text":"Results goodness--fit tests printed given object returned gofstat() can access described example . Nevertheless, p-values given every test. Anderson-Darling (ad), Cramer von Mises (cvm) Kolomogorov (ks), decision (rejection H0 ) given, available (see FAQ 2.3 details).","code":"set.seed(1234) x <- rgamma(n = 100, shape = 2, scale = 1) # fit of the good distribution fgamma <- fitdist(x, \"gamma\") # fit of a bad distribution fexp <- fitdist(x, \"exp\") g <- gofstat(list(fgamma, fexp), fitnames = c(\"gamma\", \"exp\")) par(mfrow=c(1,1), mar=c(4,4,2,1)) denscomp(list(fgamma, fexp), legendtext = c(\"gamma\", \"exp\")) # results of the tests ## chi square test (with corresponding table with theoretical and observed counts) g$chisqpvalue ## gamma exp ## 1.89e-01 7.73e-05 g$chisqtable ## obscounts theo gamma theo exp ## <= 0.5483 9 10.06 23.66 ## <= 0.8122 9 8.82 9.30 ## <= 0.9592 9 5.27 4.68 ## <= 1.368 9 14.64 11.37 ## <= 1.523 9 5.24 3.74 ## <= 1.701 9 5.73 3.97 ## <= 1.94 9 7.09 4.82 ## <= 2.381 9 11.08 7.50 ## <= 2.842 9 9.00 6.29 ## <= 3.801 9 11.93 9.28 ## > 3.801 10 11.15 15.40 ## Anderson-Darling test g$adtest ## gamma exp ## \"not rejected\" \"rejected\" ## Cramer von Mises test g$cvmtest ## gamma exp ## \"not rejected\" \"rejected\" ## Kolmogorov-Smirnov test g$kstest ## gamma exp ## \"not rejected\" \"rejected\""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-reasonable-to-use-goodness-of-fit-tests-to-validate-the-fit-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","title":"Frequently Asked Questions","text":"first versions fitdistrplus, available, results GOF tests (AD, KS, CvM) automatically printed. decided suppress automatic printing realized users difficulties interpret results tests sometimes misused . Goodness--fit tests often appear objective tools decide wether fitted distribution well describes data set. ! reasonable reject distribution just goodness--fit test rejects (see FAQ 2.2.1). reasonable validate distribution goodness--fit tests reject (see FAQ 2.2.2). fitted distribution evaluated using graphical methods (goodness--fit graphs automatically provided package plotting result fit (output fitdist() fitdistcens() complementary graphs help compare different fits - see ?graphcomp). really think appropriate way evaluate adequacy fit ones recommend . can find type recommendations reference books : Probabilistic techniques exposure assessment - handbook dealing variability uncertainty models inputs .C. Cullen H.C. Frey. Application uncertainty analysis ecological risks pesticides W.J. Warren-Hicks . Hart. Statistical inference G. Casella R.L. Berger Loss models: data decision S.. Klugman H.H. Panjer G.E. Willmot Moreover, selection distribution also driven knowledge underlying processes available. example variable negative, one cautious fitting normal distribution, potentially gives negative values, even observed data variable seem well fitted normal distribution.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"should-i-reject-a-distribution-because-a-goodness-of-fit-test-rejects-it","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph > Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","what":"Should I reject a distribution because a goodness-of-fit test rejects it ?","title":"Frequently Asked Questions","text":"reasonable reject distribution just goodness--fit test rejects , especially case big samples. real life, soon sufficient amount data, reject fitted distribution. know model perfectly describe real data, generally true question find better distribution among pool simple parametric distributions describe data, compare different models (see FAQ 2.4 2.5 corresponding questions). illustre point let us comment example presented . drew two samples Poisson distribution mean parameter equal 100. many applications, value parameter, Poisson distribution considered well approximated normal distribution. Testing fit (using Kolmogorov-Smirnov test ) normal distribution sample 100 observations reject normal fit, testing sample 10000 observations reject , samples come distribution.","code":"set.seed(1234) x1 <- rpois(n = 100, lambda = 100) f1 <- fitdist(x1, \"norm\") g1 <- gofstat(f1) g1$kstest ## 1-mle-norm ## \"not rejected\" x2 <- rpois(n = 10000, lambda = 100) f2 <- fitdist(x2, \"norm\") g2 <- gofstat(f2) g2$kstest ## 1-mle-norm ## \"rejected\" par(mfrow=c(1,2), mar=c(4,4,2,1)) denscomp(f1, demp = TRUE, addlegend = FALSE, main = \"small sample\") denscomp(f2, demp = TRUE, addlegend = FALSE, main = \"big sample\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"should-i-accept-a-distribution-because-goodness-of-fit-tests-do-not-reject-it","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph > Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","what":"Should I accept a distribution because goodness-of-fit tests do not reject it ?","title":"Frequently Asked Questions","text":", reasonable validate distribution goodness--fit tests reject . Like hypothesis tests, goodness--fit tests lack statistical power sample size high. different goodness--fit tests equally sensitive different types deviation empirical fitted distributions. example Kolmogorov-Smirnov test sensitive distributions differ global fashion near centre distribution. Anderson-Darling test sensitive distributions differ tails, Cramer von Mises sensitive small repetitive differences empirical theoretical distribution functions. sensitivity chi square test depend definition classes, even propose default definition classes user provide classes, choice obvious impact results test. test appropriate data discrete, even modelled continuous distribution, following example. Two samples respective sizes 500 50 drawn Poisson distribution mean parameter equal 1 (sufficiently high value consider Poisson distribution approximated normal one). Using Kolmogorov-Smirnov test, small sample normal fit rejected bigger sample. rejected smaller sample even fit rejected simple visual confrontation distributions. particular case, chi square test classes defined default rejected te normal fit samples.","code":"set.seed(1234) x3 <- rpois(n = 500, lambda = 1) f3 <- fitdist(x3, \"norm\") g3 <- gofstat(f3) g3$kstest ## 1-mle-norm ## \"rejected\" x4 <- rpois(n = 50, lambda = 1) f4 <- fitdist(x4, \"norm\") g4 <- gofstat(f4) g4$kstest ## 1-mle-norm ## \"not rejected\" par(mfrow=c(1,2), mar=c(4,4,2,1)) denscomp(f3, addlegend = FALSE, main = \"big sample\") denscomp(f4, addlegend = FALSE, main = \"small sample\") g3$chisqtable ## obscounts theocounts ## <= 0 180.0 80.3 ## <= 1 187.0 163.5 ## <= 2 87.0 168.1 ## <= 3 32.0 73.4 ## > 3 14.0 14.7 g3$chisqpvalue ## [1] 7.11e-42 g4$chisqtable ## obscounts theocounts ## <= 0 14.00 5.46 ## <= 1 15.00 14.23 ## <= 2 15.00 18.09 ## > 2 6.00 12.22 g4$chisqpvalue ## [1] 3.57e-05"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-all-goodness-of-fit-tests-are-not-available-for-every-distribution","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Why all goodness-of-fit tests are not available for every distribution ?","title":"Frequently Asked Questions","text":"Chi-squared test available distribution one must conscious result depends definition cells observed data grouped, correct definition possible small sample. Concerning Kolmogorov-Smirnov test, proposed continuous distribution, critical value corresponding comparison empirical distribution fully specified distribution. distribution fully known fitted distribution, result test subject caution, general asymptotic theory Kolmogorov-Smirnov statistics case fitted distribution. Nevertheless, one can use Monte Carlo methods conduct Kolmgorov-Smirnov goodness--fit tests cases sample used estimate model parameters. method implemented R package KScorrect variety continuous distributions. asymptotic theory proposed quadratic statistics distributions (Anderson-Darling, Cramer von Mises). reference book used subject (Tests based edf statistics Stephens MA Goodness--fit techniques D’Agostino RB Stephens MA) proposes critical values statistics classical distributions (exponential, gamma, Weibull, logistic, Cauchy, normal lognormal). asymptotic theory statistics also depends way parameters estimated. estimated maximum likelihood Cauchy, normal lognormal distributions results reported Stephens, propose results Anderson-Darling Cramer von Mises using results exponential, gamma, Weibull, logistic distributions. user can refer cited books use proposed formula estimate parameters Cauchy, normal lognormal distributions apply tests using critical values given book. R packages goftest ADGofTest also explored users like apply Anderson-Darling Cramer von Mises tests distributions. time sure case parameters unknown (estimated maximum likelihood) tackled two packages. Concerning development package, rather develoing goodness--fit tests made choice develop graphical tools help appreciate quality fit compare fits different distributions data set (see FAQ 2.2 argumentation).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-use-goodness-of-fit-statistics-to-compare-the-fit-of-different-distributions-on-a-same-data-set","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"How can we use goodness-of-fit statistics to compare the fit of different distributions on a same data set ?","title":"Frequently Asked Questions","text":"Goodness--fit statistics based empirical distribution function (Kolmogorov-Smirnov, Anderson-Darling Cramer von Mises) may used measure distance fitted distribution empirical distribution. one wants compare fit various distributions data set, smaller statistics better. Kolmogorov-Smirnov statistics sensitive distributions differ global fashion near centre distribution Anderson-Darling statistics sensitive distributions differ tails, Cramer von Mises statistics sensitive small repetitive differences empirical theoretical distribution functions. mentioned main vignette package, use Anderson-Darling compare fit different distributions subject caution due weighting quadratic distance fitted empirical distribution functions depends parametric distribution. Moreover, statistics based empirical distribution function penalize distributions greater number parameters generally flexible, induce -fitting. Goodness-fo-fit statistics based information criteria (AIC, BIC) correspond deviance penalized complexity model (number parameters distribution), smaller better. generic statistics, adapted focus part fitted distribution, take account complexity distribution thus help prevent overfitting.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-use-a-test-to-compare-the-fit-of-two-distributions-on-a-same-data-set","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Can we use a test to compare the fit of two distributions on a same data set ?","title":"Frequently Asked Questions","text":"package implement test two nested distributions (one special case one, e.g. exponential gamma distributions) likelihood ratio test can easily implemented using loglikelihood provided fitdist fitdistcens. Denoting LL maximum likelihood obtained complete distribution L0L_0 one obtained simplified distribution, sample size increases, −2ln(L0L)=2ln(L)−2ln(L0)- 2 ln(\\frac{L_0}{L}) = 2 ln(L) - 2 ln(L_0) tends Chi squared distribution degrees freedom equal difference numbers parameters characterizing two nested distributions. find example test. test can also used fits censored data.","code":"set.seed(1234) g <- rgamma(100, shape = 2, rate = 1) (f <- fitdist(g, \"gamma\")) ## Fitting of the distribution ' gamma ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 2.025 0.266 ## rate 0.997 0.149 (f0 <- fitdist(g, \"exp\")) ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.492 0.0492 L <- logLik(f) k <- length(f$estimate) # number of parameters of the complete distribution L0 <- logLik(f0) k0 <- length(f0$estimate) # number of parameters of the simplified distribution (stat <- 2*L - 2*L0) ## [1] 23.9 (critical_value <- qchisq(0.95, df = k - k0)) ## [1] 3.84 (rejected <- stat > critical_value) ## [1] TRUE"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-get-goodness-of-fit-statistics-for-a-fit-on-censored-data","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Can we get goodness-of-fit statistics for a fit on censored data ?","title":"Frequently Asked Questions","text":"Function gofstat yet proposed package fits censored data develop one among one objectives future. Published works goodness--fit statistics based empirical distribution function censored data generally focused data containing one type censoring (e.g. right censored data survival data). Build statistics general case, data containing time (right, left interval censoring), remains tricky. Nevertheless, possible type censored data, use information criteria (AIC BIC given summary object class fitdistcens) compare fits various distributions data set.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-cullen-frey-graph-may-be-misleading","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Why Cullen-Frey graph may be misleading?","title":"Frequently Asked Questions","text":"considering distribution large theoretical moments infinite moments, using Cullen-Frey may appropriate. typical log-normal distribution ℒ𝒩(μ,σ2)\\mathcal L\\mathcal N(\\mu,\\sigma^2). Indeed distribution, skewness kurtosis functions exponential σ2\\sigma^2. large values, even small σ\\sigma. sk(X)=(eσ2+2)eσ2−1,kr(X)=e4σ2+2e3σ2+3e2σ2−3. sk(X) = (e^{\\sigma^2}+2)\\sqrt{e^{\\sigma^2}-1}, kr(X) = e^{4\\sigma^2} + 2e^{3\\sigma^2} + 3e^{2\\sigma^2}-3. convergence theoretical standardized moments (skewness kurtosis) slow future, plan use trimmed linear moments deal issue. moments always exist even distribution infinite mean, e.g. Cauchy distribution.","code":"n <- 1e3 x <- rlnorm(n) descdist(x) ## summary statistics ## ------ ## min: 0.0436 max: 20.3 ## median: 1.02 ## mean: 1.61 ## estimated sd: 1.89 ## estimated skewness: 3.49 ## estimated kurtosis: 21.9"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-choose-optimization-method","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"How to choose optimization method?","title":"Frequently Asked Questions","text":"want perform optimization without bounds, optim() used. can try derivative-free method Nelder-Mead Hessian-free method BFGS. want perform optimization bounds, two methods available without providing gradient objective function: Nelder-Mead via constrOptim() bounded BFGS via optim(). cases, see help mledist() vignette optimization algorithms.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"the-optimization-algorithm-stops-with-error-code-100--what-shall-i-do","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"The optimization algorithm stops with error code 100. What shall I do?","title":"Frequently Asked Questions","text":"First, add traces adding control=list(trace=1, REPORT=1). Second, try set bounds parameters. Third, find better starting values (see FAQ 1.3).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-distribution-with-a-log-argument-may-converge-better","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"Why distribution with a log argument may converge better?","title":"Frequently Asked Questions","text":"Say, study shifted lognormal distribution defined following density f(x)=1xσ2πexp(−(ln(x+δ)−μ)22σ2) f(x) = \\frac{1}{x \\sigma \\sqrt{2 \\pi}} \\exp\\left(- \\frac{(\\ln (x+\\delta)- \\mu)^2}{2\\sigma^2}\\right) x>−δx>-\\delta μ\\mu location parameter, σ\\sigma scale parameter δ\\delta boundary parameter. Let us fit distribution dataset y MLE. define two functions densities without log argument. now optimize minus log-likelihood. don’t use log argument, algorithms stalls. Indeed algorithm stops following value, log-likelihood infinite. something wrong computation. R-base implementation using log argument seems reliable. happens C-base implementation dlnorm takes care log value. file ../src/nmath/dlnorm.c R sources, find C code dlnorm last four lines logical condtion give_log?, see log argument handled: log=TRUE, use −(log(2π)+y2/2+log(xσ))-(\\log(\\sqrt{2\\pi}) + y^2/2+\\log(x\\sigma)) log=FALSE, use 2π*exp(y2/2)/(xσ))\\sqrt{2\\pi} *\\exp( y^2/2)/(x\\sigma)) (logarithm outside dlnorm) Note constant log(2π)\\log(\\sqrt{2\\pi}) pre-computed C macro M_LN_SQRT_2PI. order sort problem, use constrOptim wrapping optim take account linear constraints. allows also use optimization methods L-BFGS-B (low-memory BFGS bounded) used optim. Another possible perform computations higher precision arithmetics implemented package Rmpfr using MPFR library.","code":"dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log) pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p) qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift) pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift) data(dataFAQlog1) y <- dataFAQlog1 D <- 1-min(y) f0 <- fitdist(y+D, \"lnorm\") start <- list(mean=as.numeric(f0$estimate[\"meanlog\"]), sigma=as.numeric(f0$estimate[\"sdlog\"]), shift=D) # works with BFGS, but not Nelder-Mead f <- fitdist(y, \"shiftlnorm\", start=start, optim.method=\"BFGS\") summary(f) ## Fitting of the distribution ' shiftlnorm ' by maximum likelihood ## Parameters : ## estimate Std. Error ## mean -1.386 0.02401 ## sigma 0.071 0.00192 ## shift 0.248 0.00598 ## Loglikelihood: 8299 AIC: -16591 BIC: -16573 ## Correlation matrix: ## mean sigma shift ## mean 1.000 -0.885 0.999 ## sigma -0.885 1.000 -0.886 ## shift 0.999 -0.886 1.000 f2 <- try(fitdist(y, \"shiftlnorm_no\", start=start, optim.method=\"BFGS\")) ## ## Error in fitdist(y, \"shiftlnorm_no\", start = start, optim.method = \"BFGS\") : ## the function mle failed to estimate the parameters, ## with the error code 100 print(attr(f2, \"condition\")) ## sum(log(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 ))) ## [1] -Inf log(prod(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 ))) ## [1] -Inf sum(dshiftlnorm(y, 0.16383978, 0.01679231, 1.17586600, TRUE )) ## [1] 7761 double dlnorm(double x, double meanlog, double sdlog, int give_log) { double y; #ifdef IEEE_754 if (ISNAN(x) || ISNAN(meanlog) || ISNAN(sdlog)) return x + meanlog + sdlog; #endif if(sdlog <= 0) { if(sdlog < 0) ML_ERR_return_NAN; // sdlog == 0 : return (log(x) == meanlog) ? ML_POSINF : R_D__0; } if(x <= 0) return R_D__0; y = (log(x) - meanlog) / sdlog; return (give_log ? -(M_LN_SQRT_2PI + 0.5 * y * y + log(x * sdlog)) : M_1_SQRT_2PI * exp(-0.5 * y * y) / (x * sdlog)); /* M_1_SQRT_2PI = 1 / sqrt(2 * pi) */ } -(M_LN_SQRT_2PI + 0.5 * y * y + log(x * sdlog)) M_1_SQRT_2PI * exp(-0.5 * y * y) / (x * sdlog)) f2 <- fitdist(y, \"shiftlnorm\", start=start, lower=c(-Inf, 0, -min(y)), optim.method=\"Nelder-Mead\") ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced summary(f2) ## Fitting of the distribution ' shiftlnorm ' by maximum likelihood ## Parameters : ## estimate Std. Error ## mean -1.3873 NaN ## sigma 0.0711 NaN ## shift 0.2481 NaN ## Loglikelihood: 8299 AIC: -16591 BIC: -16573 ## Correlation matrix: ## mean sigma shift ## mean 1 NaN NaN ## sigma NaN 1 NaN ## shift NaN NaN 1 print(cbind(BFGS=f$estimate, NelderMead=f2$estimate)) ## BFGS NelderMead ## mean -1.386 -1.3873 ## sigma 0.071 0.0711 ## shift 0.248 0.2481"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"what-to-do-when-there-is-a-scaling-issue","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"What to do when there is a scaling issue?","title":"Frequently Asked Questions","text":"Let us consider dataset particular small values. way sort multiply dataset large value. Let us consider dataset particular large values. way sort multiply dataset small value.","code":"data(dataFAQscale1) head(dataFAQscale1) ## [1] -0.007077 -0.000947 -0.001898 -0.000475 -0.001902 -0.000476 summary(dataFAQscale1) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## -0.00708 -0.00143 -0.00047 -0.00031 0.00096 0.00428 for(i in 6:0) cat(10^i, try(mledist(dataFAQscale1*10^i, \"cauchy\")$estimate), \"\\n\") ## 1e+06 -290 1194 ## 1e+05 -29 119 ## 10000 -2.9 11.9 ## 1000 -0.29 1.19 ## 100 -0.029 0.119 ## 10 -0.0029 0.0119 ## ## 1 NA NA data(dataFAQscale2) head(dataFAQscale2) ## [1] 1.40e+09 1.41e+09 1.43e+09 1.44e+09 1.49e+09 1.57e+09 summary(dataFAQscale2) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 1.40e+09 1.58e+09 2.24e+09 2.55e+09 3.39e+09 4.49e+09 for(i in 0:5) cat(10^(-2*i), try(mledist(dataFAQscale2*10^(-2*i), \"cauchy\")$estimate), \"\\n\") ## 1 2.03e+09 6.59e+08 ## 0.01 20283641 6594932 ## 1e-04 202836 65949 ## 1e-06 2028 659 ## 1e-08 20.3 6.59 ## 1e-10 0.203 0.0659"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-scale-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for scale parameters","title":"Frequently Asked Questions","text":"Consider normal distribution 𝒩(μ,σ2)\\mathcal{N}(\\mu, \\sigma^2) defined density f(x)=12πσ2exp(−(x−μ)22σ2),x∈ℝ, f(x) = \\frac{1}{\\sqrt{2\\pi\\sigma^2}}\\exp\\left(-\\frac{(x-\\mu)^2}{2\\sigma^2}\\right), x\\\\mathbb{R}, μ\\mu location parameter μ∈ℝ\\mu\\\\mathbb{R}, σ2\\sigma^2 scale parameter σ2>0\\sigma^2>0. Therefore optimizing log-likelihood squared differences GoF statistics. Setting lower bound scale parameter easy fitdist: just use lower argument.","code":"set.seed(1234) x <- rnorm(1000, 1, 2) fitdist(x, \"norm\", lower=c(-Inf, 0)) ## Fitting of the distribution ' norm ' by maximum likelihood ## Parameters: ## estimate Std. Error ## mean 0.947 0.0630 ## sd 1.994 0.0446"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-shape-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for shape parameters","title":"Frequently Asked Questions","text":"Consider Burr distribution ℬ(μ,σ2)\\mathcal B(\\mu, \\sigma^2) defined density f(x)=ab(x/s)bx[1+(x/s)b]+1,x∈ℝ, f(x) = \\frac{b (x/s)^b}{x [1 + (x/s)^b]^{+ 1}}, x\\\\mathbb{R}, ,ba,b shape parameters ,b>0a,b>0, ss scale parameter s>0s>0.","code":"x <- rburr(1000, 1, 2, 3) fitdist(x, \"burr\", lower=c(0, 0, 0), start=list(shape1 = 1, shape2 = 1, rate = 1)) ## Fitting of the distribution ' burr ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 0.969 0.0334 ## shape2 2.051 0.0367 ## rate 3.180 0.0516"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-probability-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for probability parameters","title":"Frequently Asked Questions","text":"Consider geometric distribution 𝒢(p)\\mathcal G(p) defined mass probability function f(x)=p(1−p)x,x∈ℕ, f(x) = p(1-p)^x, x\\\\mathbb{N}, pp probability parameter p∈[0,1]p\\[0,1].","code":"x <- rgeom(1000, 1/4) fitdist(x, \"geom\", lower=0, upper=1) ## Fitting of the distribution ' geom ' by maximum likelihood ## Parameters: ## estimate Std. Error ## prob 0.242 0.00666"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-boundary-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for boundary parameters","title":"Frequently Asked Questions","text":"Consider shifted exponential distribution ℰ(μ,λ)\\mathcal E(\\mu,\\lambda) defined mass probability function f(x)=λexp(−λ(x−μ)),x>μ, f(x) = \\lambda \\exp(-\\lambda(x-\\mu)), x>\\mu, λ\\lambda scale parameter λ>0\\lambda>0, μ\\mu boundary (shift) parameter μ∈ℝ\\mu\\\\mathbb{R}. optimizing log-likelihood, boundary constraint ∀=1,…,n,xi>μ⇒mini=1,…,nxi>μ⇔μ>−mini=1,…,nxi. \\forall =1,\\dots,n, x_i>\\mu \\Rightarrow \\min_{=1,\\dots,n} x_i > \\mu \\Leftrightarrow \\mu > -\\min_{=1,\\dots,n} x_i. Note optimizing squared differences GoF statistics, constraint may necessary. Let us R.","code":"dsexp <- function(x, rate, shift) dexp(x-shift, rate=rate) psexp <- function(x, rate, shift) pexp(x-shift, rate=rate) rsexp <- function(n, rate, shift) rexp(n, rate=rate)+shift x <- rsexp(1000, 1/4, 1) fitdist(x, \"sexp\", start=list(rate=1, shift=0), lower= c(0, -min(x))) ## Fitting of the distribution ' sexp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.248 4.71e-10 ## shift 1.005 NaN"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-linear-inequality-bounds","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting linear inequality bounds","title":"Frequently Asked Questions","text":"distributions, bounds parameters independent. instance, normal inverse Gaussian distribution (μ,δ,α,β\\mu, \\delta, \\alpha, \\beta parametrization) following parameter constraints, can reformulated linear inequality: {α>0δ>0α>|β|⇔(01000010001−10011)⏟ui(μδαβ)≥(0000)⏟ci. \\left\\{ \\begin{array}{l}\\alpha > 0\\\\ \\delta >0\\\\ \\alpha > |\\beta|\\end{array} \\right. \\Leftrightarrow \\underbrace{ \\left( \\begin{matrix} 0 & 1 & 0 & 0 \\\\ 0 & 0 & 1 & 0 \\\\ 0 & 0 & 1 & -1 \\\\ 0 & 0 & 1 & 1 \\\\ \\end{matrix} \\right) }_{ui} \\left( \\begin{matrix} \\mu\\\\ \\delta\\\\ \\alpha \\\\ \\beta \\\\ \\end{matrix} \\right) \\geq \\underbrace{ \\left( \\begin{matrix} 0\\\\ 0\\\\ 0 \\\\ 0 \\\\ \\end{matrix} \\right)}_{ci}. constraints can carried via constrOptim() arguments ci ui. example","code":"library(GeneralizedHyperbolic) myoptim <- function(fn, par, ui, ci, ...) { res <- constrOptim(f=fn, theta=par, method=\"Nelder-Mead\", ui=ui, ci=ci, ...) c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian) } x <- rnig(1000, 3, 1/2, 1/2, 1/4) ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1)) ci <- c(0,0,0,0) fitdist(x, \"nig\", custom.optim=myoptim, ui=ui, ci=ci, start=list(mu = 0, delta = 1, alpha = 1, beta = 0)) ## Warning in fitdist(x, \"nig\", custom.optim = myoptim, ui = ui, ci = ci, start = ## list(mu = 0, : The dnig function should return a vector of with NaN values when ## input has inconsistent parameters and not raise an error ## Warning in fitdist(x, \"nig\", custom.optim = myoptim, ui = ui, ci = ci, start = ## list(mu = 0, : The pnig function should return a vector of with NaN values when ## input has inconsistent values and not raise an error ## Fitting of the distribution ' nig ' by maximum likelihood ## Parameters: ## estimate ## mu 2.985 ## delta 0.457 ## alpha 0.466 ## beta 0.237"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-works-quantile-matching-estimation-for-discrete-distributions","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"How works quantile matching estimation for discrete distributions?","title":"Frequently Asked Questions","text":"Let us consider geometric distribution values {0,1,2,3,…}\\{0,1,2,3,\\dots\\}. probability mass function, cumulative distribution function quantile function P(X=x)=p(1−p)⌊x⌋,FX(x)=1−(1−p)⌊x⌋,FX−1(q)=⌊log(1−q)log(1−p)⌋. P(X=x)= p (1-p)^{\\lfloor x\\rfloor}, F_X(x) = 1- (1-p)^{\\lfloor x\\rfloor}, F_X^{-1}(q) = \\left\\lfloor\\frac{\\log(1-q)}{\\log(1-p)}\\right\\rfloor. Due integer part (floor function), distribution function quantile function step functions. Now study QME geometric distribution. Since one parameter, choose one probabiliy, p=1/2p=1/2. theoretical median following integer FX−1(1/2)=⌊log(1/2)log(1−p)⌋. F_X^{-1}(1/2) = \\left\\lfloor\\frac{\\log(1/2)}{\\log(1-p)}\\right\\rfloor. Note theoretical median discrete distribution integer. Empirically, median may integer. Indeed even length dataset, empirical median qn,1/2=xn/2⋆+xn/2+1⋆2, q_{n,1/2} = \\frac{x_{n/2}^\\star + x_{n/2+1}^\\star}{2}, x1⋆<…= low) * (x <= upp) } ptgamma <- function(q, shape, rate, low, upp) { PU <- pgamma(upp, shape = shape, rate = rate) PL <- pgamma(low, shape = shape, rate = rate) (pgamma(q, shape, rate) - PL) / (PU - PL) * (q >= low) * (q <= upp) + 1 * (q > upp) } rtgamma <- function(n, shape, rate, low=0, upp=Inf, maxit=10) { stopifnot(n > 0) if(low > upp) return(rep(NaN, n)) PU <- pgamma(upp, shape = shape, rate = rate) PL <- pgamma(low, shape = shape, rate = rate) #simulate directly expected number of random variate n2 <- n/(PU-PL) x <- rgamma(n, shape=shape, rate=rate) x <- x[x >= low & x <= upp] i <- 0 while(length(x) < n && i < maxit) { n2 <- (n-length(x))/(PU-PL) y <- rgamma(n2, shape=shape, rate=rate) x <- c(x, y[y >= low & y <= upp]) i <- i+1 } x[1:n] } n <- 100 ; shape <- 11 ; rate <- 3 ; x0 <- 5 x <- rtgamma(n, shape = shape, rate = rate, low=x0) fit.NM.2P <- fitdist( data = x, distr = \"tgamma\", method = \"mle\", start = list(shape = 10, rate = 10), fix.arg = list(upp = Inf, low=x0), lower = c(0, 0), upper=c(Inf, Inf)) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced fit.NM.3P <- fitdist( data = x, distr = \"tgamma\", method = \"mle\", start = list(shape = 10, rate = 10, low=1), fix.arg = list(upp = Inf), lower = c(0, 0, -Inf), upper=c(Inf, Inf, min(x))) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in cov2cor(varcovar): NaNs produced ## fit3P fit2P true value ## shape 11.8503 57.13 11 ## rate 3.2982 10.92 3 ## low 5.0094 5.00 5 ## mean sq. error 0.2707 730.30 0 ## rel. error 0.0595 2.28 0 fit.gamma <- fitdist( data = x-x0, distr = \"gamma\", method = \"mle\") ## fit3P fit2P orig. data fit2P shift data true value ## shape 11.8503 57.13 1.498 11 ## rate 3.2982 10.92 2.289 3 ## low 5.0094 5.00 5.000 5 ## mean sq. error 0.2707 730.30 30.266 0 ## rel. error 0.0595 2.28 0.367 0 ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced ## fit3P fit2P orig. data true value ## shape 11.7976 15.489 11 ## rate 3.0813 3.679 3 ## low 5.0001 5.000 5 ## mean sq. error 0.2143 6.871 0 ## rel. error 0.0332 0.212 0"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-compute-marginal-confidence-intervals-on-parameter-estimates-from-their-reported-standard-error","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"Can we compute marginal confidence intervals on parameter estimates from their reported standard error ?","title":"Frequently Asked Questions","text":"statistics, deriving marginal confidence intervals MLE parameter estimates using approximation standard errors (calculated hessian) quite common procedure. based wald approximation stands sample size nn sufficiently high, marginal 95%95\\% confidence ith component θi\\theta_i model parameter θ\\theta estimated maximum likelihood (estimate denoted θ̂\\hat \\theta) can approximated : θ̂±1.96×SE(θ̂)\\hat \\theta_i \\pm 1.96 \\times SE(\\hat \\theta_i ) SE(θ̂)SE(\\hat \\theta_i ) ith term diagonal covariance matrix estimates (ViiV_{ii}). VV generally approximated inverse Fisher information matrix ((θ̂)(\\hat \\theta)). Fisher information matrix corresponds opposite hessian matrix evaluated MLE estimate. Let us recall hessian matrix defined Hij(y,θ)=∂2L(y,θ)∂θi∂θjH_{ij}(y, \\theta) = \\frac{\\partial^2 L(y, \\theta)}{\\partial \\theta_i \\partial \\theta_j} L(y,θ)L(y, \\theta) loglikelihod function data yy parameter θ\\theta. using approximation, one must keep mind validity depend sample size. also strongly depends data, distribution, also parameterization distribution. reason recommend potential users Wald approximation compare results ones obtained using bootstrap procedure (see ) using approximation. look loglikelihood contours also interesting Wald approximation assumes elliptical contours. general context, recommend use bootstrap compute confidence intervals parameters function parameters. find two examples, one Wald confidence intervals seem correct one give wrong results, parameter values even outside possible range (negative rate bound gamma distribution).","code":"set.seed(1234) n <- rnorm(30, mean = 10, sd = 2) fn <- fitdist(n, \"norm\") bn <- bootdist(fn) bn$CI ## Median 2.5% 97.5% ## mean 9.41 8.78 10.02 ## sd 1.73 1.33 2.15 fn$estimate + cbind(\"estimate\"= 0, \"2.5%\"= -1.96*fn$sd, \"97.5%\"= 1.96*fn$sd) ## estimate 2.5% 97.5% ## mean 9.41 8.77 10.04 ## sd 1.78 1.33 2.22 par(mfrow=c(1,1), mar=c(4,4,2,1)) llplot(fn, back.col = FALSE) set.seed(1234) g <- rgamma(30, shape = 0.1, rate = 10) fg <- fitdist(g, \"gamma\") bg <- bootdist(fg) bg$CI ## Median 2.5% 97.5% ## shape 0.0923 0.0636 0.145 ## rate 30.1018 9.6288 147.323 fg$estimate + cbind(\"estimate\"= 0, \"2.5%\"= -1.96*fg$sd, \"97.5%\"= 1.96*fg$sd) ## estimate 2.5% 97.5% ## shape 0.0882 0.0553 0.121 ## rate 24.2613 -6.3431 54.866 par(mfrow=c(1,1), mar=c(4,4,2,1)) llplot(fg, back.col = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-compute-confidence-intervals-on-quantiles-from-the-fit-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How can we compute confidence intervals on quantiles from the fit of a distribution ?","title":"Frequently Asked Questions","text":"quantile() function can used calculate quantile fitted distribution called object class fitdist fitdistcens first argument. called object class bootdist bootdistcens first argument, quantiles returned accompanied confidence interval calculated using bootstraped sample parameters. Moreover, can use CIcdfplot() function plot fitted distribution CDF curve surrounded band corresponding pointwise intervals quantiles. See example censored data corresponding 72-hour acute salinity tolerance (LC50values) rivermarine invertebrates.","code":"data(salinity) log10LC50 <-log10(salinity) fit <- fitdistcens(log10LC50, \"norm\") # Bootstrap bootsample <- bootdistcens(fit, niter = 101) #### We used only 101 iterations in that example to limit the calculation time but #### in practice you should take at least 1001 bootstrap iterations # Calculation of the quantile of interest (here the 5 percent hazard concentration) (HC5 <- quantile(bootsample, probs = 0.05)) ## (original) estimated quantiles for each specified probability (censored data) ## p=0.05 ## estimate 1.12 ## Median of bootstrap estimates ## p=0.05 ## estimate 1.12 ## ## two-sided 95 % CI of each quantile ## p=0.05 ## 2.5 % 1.05 ## 97.5 % 1.20 # visualizing pointwise confidence intervals on other quantiles par(mfrow=c(1,1), mar=c(4,4,2,1)) CIcdfplot(bootsample, CI.output = \"quantile\", CI.fill = \"pink\", xlim = c(0.5,2), main = \"\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-compute-confidence-intervals-on-any-function-of-the-parameters-of-the-fitted-distribution","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How can we compute confidence intervals on any function of the parameters of the fitted distribution ?","title":"Frequently Asked Questions","text":"bootstrap sample parameter estimates can used calculate bootstrap sample variable defined function parameters fitted distribution. bootstrap sample can easily compute conidence interval using percentiles. example uses bootstrap sample parameters previous example (FAQ 4.2) calculate 95 percent confidence interval Potentially Affected Portion (PAF) species given exposure salinity (fixed 1.2 log10 example). complex calculations especially tranfer uncertainty within quantitative risk assessment, recommend use package mc2d aims making calculations easy gives extensive examples use bootstrap samples parameters estimated using functions package fitdistrplus.","code":"exposure <- 1.2 # Bootstrap sample of the PAF at this exposure PAF <- pnorm(exposure, mean = bootsample$estim$mean, sd = bootsample$estim$sd) # confidence interval from 2.5 and 97.5 percentiles quantile(PAF, probs = c(0.025, 0.975)) ## 2.5% 97.5% ## 0.0487 0.1470"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-we-choose-the-bootstrap-number","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How do we choose the bootstrap number?","title":"Frequently Asked Questions","text":"Generally, need choose number bootstrap values high original sample size. search number mean standard values become stable. log-normal example , enough 100 bootstrap values.","code":"f.ln.MME <- fitdist(rlnorm(1000), \"lnorm\", method = \"mme\", order = 1:2) # Bootstrap b.ln.50 <- bootdist(f.ln.MME, niter = 50) b.ln.100 <- bootdist(f.ln.MME, niter = 100) b.ln.200 <- bootdist(f.ln.MME, niter = 200) b.ln.500 <- bootdist(f.ln.MME, niter = 500) d1 <- density(b.ln.50, b.ln.100, b.ln.200, b.ln.500) plot(d1)"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-personalize-the-default-plot-given-for-an-object-of-class-fitdist-or-fitdistcens","dir":"Articles","previous_headings":"How to personalize plots","what":"Can I personalize the default plot given for an object of class fitdist or fitdistcens?","title":"Frequently Asked Questions","text":"default plot given using plot() function object class fitdist fitdistcens hard personalize. Indeed plot designed give quick overview fit, used graph manuscript formal presentation. personalize () goodness--fit plots, rather use specific graphical functions, denscomp, cdfcomp, ppcomp, qqcomp cdfcompcens (see following paragraphs).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-personalize-goodness-of-fit-plots","dir":"Articles","previous_headings":"How to personalize plots","what":"How to personalize goodness-of-fit plots ?","title":"Frequently Asked Questions","text":"default plot object class fitdist can easily reproduced personalized using denscomp, cdfcomp, ppcomp qqcomp. similar way, default plot object class fitdistcens can easily personalized using cdfcompcens.","code":"data(groundbeef) serving <- groundbeef$serving fit <- fitdist(serving, \"gamma\") par(mfrow = c(2,2), mar = c(4, 4, 1, 1)) denscomp(fit, addlegend = FALSE, main = \"\", xlab = \"serving sizes (g)\", fitcol = \"orange\") qqcomp(fit, addlegend = FALSE, main = \"\", fitpch = 16, fitcol = \"grey\", line01lty = 2) cdfcomp(fit, addlegend = FALSE, main = \"\", xlab = \"serving sizes (g)\", fitcol = \"orange\", lines01 = TRUE) ppcomp(fit, addlegend = FALSE, main = \"\", fitpch = 16, fitcol = \"grey\", line01lty = 2)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-obtain-ggplot2-plots","dir":"Articles","previous_headings":"How to personalize plots","what":"Is it possible to obtain ggplot2 plots ?","title":"Frequently Asked Questions","text":"argument plotstyle added functions denscomp, cdfcomp, ppcomp, qqcompand cdfcompcens, ppcompcens, qqcompcens enable generation plots using ggplot2 package. argument default fixed graphics must simply fixed ggplot purpose, following example. latter case graphical functions return graphic object can personalized using ggplot2 functions.","code":"library(ggplot2) fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") dcomp <- denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), xlab = \"serving sizes (g)\", xlim = c(0, 250), fitcol = c(\"red\", \"green\", \"orange\"), fitlty = 1, fitlwd = 1:3, xlegend = \"topright\", plotstyle = \"ggplot\", addlegend = FALSE) dcomp + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Ground beef fits\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-add-the-names-of-the-observations-in-a-goodness-of-fit-plot-e-g--the-names-of-the-species-in-the-plot-of-the-species-sensitivity-distribution-ssd-classically-used-in-ecotoxicology","dir":"Articles","previous_headings":"How to personalize plots","what":"Is it possible to add the names of the observations in a goodness-of-fit plot, e.g. the names of the species in the plot of the Species Sensitivity Distribution (SSD) classically used in ecotoxicology ?","title":"Frequently Asked Questions","text":"argument named name.points can used functions cdfcomp CIcdfcomp pass label vector observed points add names points left point. option available ECDF goodness--fit plots non censored data. option can used , example, name species classical plot Species Sensitivity Distributions (SSD) ecotoxicology.","code":"data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV taxaATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa f <- fitdist(ATV, \"lnorm\") cdfcomp(f, xlogscale = TRUE, main = \"Species Sensitivty Distribution\", xlim = c(1, 100000), name.points = taxaATV, addlegend = FALSE, plotstyle = \"ggplot\")"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-code-censored-data-in-fitdistrplus","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to code censored data in fitdistrplus ?","title":"Frequently Asked Questions","text":"Censored data must rpresented package dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. type representation corresponds coding names \"interval2\" function Surv package survival. way represent censored data fitdistrplus function Surv2fitdistcens() can used help format data use fitdistcens() one format used survival package (see help page Surv2fitdistcens()). toy example .","code":"dtoy <- data.frame(left = c(NA, 2, 4, 6, 9.7, 10), right = c(1, 3, 7, 8, 9.7, NA)) dtoy ## left right ## 1 NA 1.0 ## 2 2.0 3.0 ## 3 4.0 7.0 ## 4 6.0 8.0 ## 5 9.7 9.7 ## 6 10.0 NA"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-prepare-the-input-of-fitdistcens-with-surv2fitdistcens","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How do I prepare the input of fitdistcens() with Surv2fitdistcens()?","title":"Frequently Asked Questions","text":"Let us consider classical right-censored dataset human life: twenty values randomly chosen canlifins dataset CASdatasets package. refer help Surv2fitdistcens() censoring types. performing survival analysis, common use Surv() function package survival handle different types censoring. order ease use fitdistcens(), dedicated function Surv2fitdistcens() implemented arguments similar ones Surv(). Let us now fit two simple distributions.","code":"exitage <- c(81.1,78.9,72.6,67.9,60.1,78.3,83.4,66.9,74.8,80.5,75.6,67.1, 75.3,82.8,70.1,85.4,74,70,71.6,76.5) death <- c(0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0) svdata <- Surv2fitdistcens(exitage, event=death) flnormc <- fitdistcens(svdata, \"lnorm\") fweic <- fitdistcens(svdata, \"weibull\") par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcompcens(list(fweic, flnormc), xlim=range(exitage), xlegend = \"topleft\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-represent-an-empirical-distribution-from-censored-data","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to represent an empirical distribution from censored data ?","title":"Frequently Asked Questions","text":"representation empirical distribution censored data trivial problem. One can simply represent observation interval y-value defined rank observation done using function plotdistcens. representation can interesting visualize raw data, remains difficult correctly order observations case (see example right using data smokedfish). Many authors worked development algorithms non parametric maximum likelihood estimation (NPMLE) empirical cumulative distribution function (ECDF) interval censored data (including left right censored data can considered interval censored data one bound infinity). old versions fitdistrplus used Turnbull algorithm using calls functions package survival. Even Turnbull algorithm still available package, default plot now uses function npsurv package npsurv. package provides performant algorithms developped Yong Wang (see references cited help page plotdistcens). Due lack maintenance package forced rewrite main functions package, using another optimization function. ECDF plot also implemented using Turnbull algorithm survival (see ). can see example, new implementation NPMLE provides different type plot ECDF, representing filled rectangles zones non-uniqueness NPMLE ECDF. Indeed NPMLE algorithm generally proceeds two steps. first step aims identifying equivalence classes (also named litterture Turnbull intervals maximal intersection intervals innermost intervals maximal cliques data). Equivalences classess points/intervals NPMLE ECDF may change. Equivalence classes shown correspond regions left bound interval (named L following plot previous toy example) immediately followed right bound interval (named R following plot). equivalence class may null length (example non censored value). second step aims assigning probability mass equivalence class, may zero classes. NPMLE unique equivalence classes non uniqueness NPMLE ECDF represented filled rectangles. Various NPMLE algorithms implemented packages Icens, interval npsurv. less performant enable handling data survival data, especially left censored observations.","code":"par(mfrow = c(1,2), mar = c(3, 4, 3, 0.5)) plotdistcens(dtoy, NPMLE = FALSE) data(smokedfish) dsmo <- log10(smokedfish) plotdistcens(dsmo, NPMLE = FALSE) par(mfrow = c(2, 2), mar = c(3, 4, 3, 0.5)) # Turnbull algorithm with representation of middle points of equivalence classes plotdistcens(dsmo, NPMLE.method = \"Turnbull.middlepoints\", xlim = c(-1.8, 2.4)) # Turnbull algorithm with representation of equivalence classes as intervals plotdistcens(dsmo, NPMLE.method = \"Turnbull.intervals\") # Wang algorithm with representation of equivalence classes as intervals plotdistcens(dsmo, NPMLE.method = \"Wang\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-assess-the-goodness-of-fit-of-a-distribution-fitted-on-censored-data","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to assess the goodness-of-fit of a distribution fitted on censored data ?","title":"Frequently Asked Questions","text":"available method fitdistrplus fit distributions censored data maximum likelihood estimation (MLE). distribution fitted using fitdistcens, AIC BIC values can found summary object class fitdistcens returned function. values can used compare fit various distributions dataset. Function gofstat yet proposed package fits censored data plan develop future calculation goodness--fit statistics censored data. Considering goodness--fit plots, generic plot function object class fitdistcensprovides three plots, one CDF using NPMLE ECDF plot (default using Wang prepresentation, see previous part details), Q-Q plot P-P plot simply derived Wang plot ECDF, filled rectangles indicating non uniqueness NPMLE ECDF. Functions cdfcompcens(), qqcompens() ppcompcens() can used individualize personnalize CDF, Q-Q P-P goodness--fit plots /compare fit various distributions dataset. Considering Q-Q plots P-P plots, may easier compare various fits splitting plots done automatically using plotstyle ggplot qqcompens() ppcompcens() can also done manually plotstyle graphics.","code":"fnorm <- fitdistcens(dsmo,\"norm\") flogis <- fitdistcens(dsmo,\"logis\") # comparison of AIC values summary(fnorm)$aic ## [1] 178 summary(flogis)$aic ## [1] 177 par(mar = c(2, 4, 3, 0.5)) plot(fnorm) par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcompcens(list(fnorm, flogis), fitlty = 1) qqcompcens(list(fnorm, flogis)) ppcompcens(list(fnorm, flogis)) qqcompcens(list(fnorm, flogis), lwd = 2, plotstyle = \"ggplot\", fitcol = c(\"red\", \"green\"), fillrect = c(\"pink\", \"lightgreen\"), legendtext = c(\"normal distribution\", \"logistic distribution\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"quick-overview-of-main-optimization-methods","dir":"Articles","previous_headings":"","what":"Quick overview of main optimization methods","title":"Which optimization algorithm to choose?","text":"present quickly main optimization methods. Please refer Numerical Optimization (Nocedal & Wright, 2006) Numerical Optimization: theoretical practical aspects (Bonnans, Gilbert, Lemarechal & Sagastizabal, 2006) good introduction. consider following problem \\(\\min_x f(x)\\) \\(x\\\\mathbb{R}^n\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"derivative-free-optimization-methods","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods","what":"Derivative-free optimization methods","title":"Which optimization algorithm to choose?","text":"Nelder-Mead method one well known derivative-free methods use values \\(f\\) search minimum. consists building simplex \\(n+1\\) points moving/shrinking simplex good direction. set initial points \\(x_1, \\dots, x_{n+1}\\). order points \\(f(x_1)\\leq f(x_2)\\leq\\dots\\leq f(x_{n+1})\\). compute \\(x_o\\) centroid \\(x_1, \\dots, x_{n}\\). compute reflected point \\(x_r = x_o + \\alpha(x_o-x_{n+1})\\). \\(f(x_1)\\leq f(x_r) 1.2 Hessian-free optimization methods","what":"Computing the direction \\(d_k\\)","title":"Which optimization algorithm to choose?","text":"desirable property \\(d_k\\) \\(d_k\\) ensures descent \\(f(x_{k+1}) < f(x_{k})\\). Newton methods \\(d_k\\) minimizes local quadratic approximation \\(f\\) based Taylor expansion, \\(q_f(d) = f(x_k) + g(x_k)^Td +\\frac{1}{2} d^T H(x_k) d\\) \\(g\\) denotes gradient \\(H\\) denotes Hessian. consists using exact solution local minimization problem \\(d_k = - H(x_k)^{-1} g(x_k)\\). practice, methods preferred (least ensure positive definiteness). method approximates Hessian matrix \\(H_k\\) function \\(H_{k-1}\\), \\(x_k\\), \\(f(x_k)\\) \\(d_k\\) solves system \\(H_k d = - g(x_k)\\). implementation may also directly approximate inverse Hessian \\(W_k\\) order compute \\(d_k = -W_k g(x_k)\\). Using Sherman-Morrison-Woodbury formula, can switch \\(W_k\\) \\(H_k\\). determine \\(W_k\\), first must verify secant equation \\(H_k y_k =s_k\\) \\(y_k=W_k s_k\\) \\(y_k = g_{k+1}-g_k\\) \\(s_k=x_{k+1}-x_k\\). define \\(n(n-1)\\) terms, generally impose symmetry minimum distance conditions. say rank 2 update \\(H_k = H_{k-1} + u u^T + b v v^T\\) rank 1 update $H_k = H_{k-1} + u u^T $. Rank \\(n\\) update justified spectral decomposition theorem. two rank-2 updates symmetric preserve positive definiteness DFP minimizes \\(\\min || H - H_k ||_F\\) \\(H=H^T\\): \\[ H_{k+1} = \\left (-\\frac {y_k s_k^T} {y_k^T s_k} \\right ) H_k \\left (-\\frac {s_k y_k^T} {y_k^T s_k} \\right )+\\frac{y_k y_k^T} {y_k^T s_k} \\Leftrightarrow W_{k+1} = W_k + \\frac{s_k s_k^T}{y_k^{T} s_k} - \\frac {W_k y_k y_k^T W_k^T} {y_k^T W_k y_k} . \\] BFGS minimizes \\(\\min || W - W_k ||_F\\) \\(W=W^T\\): \\[ H_{k+1} = H_k - \\frac{ H_k y_k y_k^T H_k }{ y_k^T H_k y_k } + \\frac{ s_k s_k^T }{ y_k^T s_k } \\Leftrightarrow W_{k+1} = \\left (-\\frac {y_k s_k^T} {y_k^T s_k} \\right )^T W_k \\left (-\\frac { y_k s_k^T} {y_k^T s_k} \\right )+\\frac{s_k s_k^T} {y_k^T s_k} . \\] R, -called BFGS scheme implemented optim. Another possible method (initially arised quadratic problems) nonlinear conjugate gradients. consists computing directions \\((d_0, \\dots, d_k)\\) conjugate respect matrix close true Hessian \\(H(x_k)\\). Directions computed iteratively \\(d_k = -g(x_k) + \\beta_k d_{k-1}\\) \\(k>1\\), initiated \\(d_1 = -g(x_1)\\). \\(\\beta_k\\) updated according scheme: \\(\\beta_k = \\frac{ g_k^T g_k}{g_{k-1}^T g_{k-1} }\\): Fletcher-Reeves update, \\(\\beta_k = \\frac{ g_k^T (g_k-g_{k-1} )}{g_{k-1}^T g_{k-1}}\\): Polak-Ribiere update. exists also three-term formula computing direction \\(d_k = -g(x_k) + \\beta_k d_{k-1}+\\gamma_{k} d_t\\) \\(tt+1\\) otherwise \\(\\gamma_k=0\\) \\(k=t\\). See Yuan (2006) well-known schemes Hestenses-Stiefel, Dixon Conjugate-Descent. three updates (Fletcher-Reeves, Polak-Ribiere, Beale-Sorenson) (non-linear) conjugate gradient available optim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"computing-the-stepsize-t_k","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods > 1.2 Hessian-free optimization methods","what":"Computing the stepsize \\(t_k\\)","title":"Which optimization algorithm to choose?","text":"Let \\(\\phi_k(t) = f(x_k + t d_k)\\) given direction/iterate \\((d_k, x_k)\\). need find conditions find satisfactory stepsize \\(t_k\\). literature, consider descent condition: \\(\\phi_k'(0) < 0\\) Armijo condition: \\(\\phi_k(t) \\leq \\phi_k(0) + t c_1 \\phi_k'(0)\\) ensures decrease \\(f\\). Nocedal & Wright (2006) presents backtracking (geometric) approach satisfying Armijo condition minimal condition, .e. Goldstein Price condition. set \\(t_{k,0}\\) e.g. 1, \\(0 < \\alpha < 1\\), \\(t_{k,+1} = \\alpha \\times t_{k,}\\). end Repeat backtracking linesearch available optim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"benchmark","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods","what":"Benchmark","title":"Which optimization algorithm to choose?","text":"simplify benchmark optimization methods, create fitbench function computes desired estimation method optimization methods. function currently exported package.","code":"fitbench <- function(data, distr, method, grad = NULL, control = list(trace = 0, REPORT = 1, maxit = 1000), lower = -Inf, upper = +Inf, ...)"},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"theoretical-value","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution > 2.1 Log-likelihood function and its gradient for beta distribution","what":"Theoretical value","title":"Which optimization algorithm to choose?","text":"density beta distribution given \\[ f(x; \\delta_1,\\delta_2) = \\frac{x^{\\delta_1-1}(1-x)^{\\delta_2-1}}{\\beta(\\delta_1,\\delta_2)}, \\] \\(\\beta\\) denotes beta function, see NIST Handbook mathematical functions https://dlmf.nist.gov/. recall \\(\\beta(,b)=\\Gamma()\\Gamma(b)/\\Gamma(+b)\\). log-likelihood set observations \\((x_1,\\dots,x_n)\\) \\[ \\log L(\\delta_1,\\delta_2) = (\\delta_1-1)\\sum_{=1}^n\\log(x_i)+ (\\delta_2-1)\\sum_{=1}^n\\log(1-x_i)+ n \\log(\\beta(\\delta_1,\\delta_2)) \\] gradient respect \\(\\) \\(b\\) \\[ \\nabla \\log L(\\delta_1,\\delta_2) = \\left(\\begin{matrix} \\sum\\limits_{=1}^n\\ln(x_i) - n\\psi(\\delta_1)+n\\psi( \\delta_1+\\delta_2) \\\\ \\sum\\limits_{=1}^n\\ln(1-x_i)- n\\psi(\\delta_2)+n\\psi( \\delta_1+\\delta_2) \\end{matrix}\\right), \\] \\(\\psi(x)=\\Gamma'(x)/\\Gamma(x)\\) digamma function, see NIST Handbook mathematical functions https://dlmf.nist.gov/.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"r-implementation","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution > 2.1 Log-likelihood function and its gradient for beta distribution","what":"R implementation","title":"Which optimization algorithm to choose?","text":"fitdistrplus package, minimize opposite log-likelihood: implement opposite gradient grlnL. log-likelihood gradient exported.","code":"lnL <- function(par, fix.arg, obs, ddistnam) fitdistrplus:::loglikelihood(par, fix.arg, obs, ddistnam) grlnlbeta <- fitdistrplus:::grlnlbeta"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"random-generation-of-a-sample","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Random generation of a sample","title":"Which optimization algorithm to choose?","text":"","code":"#(1) beta distribution n <- 200 x <- rbeta(n, 3, 3/4) grlnlbeta(c(3, 4), x) #test ## [1] -133 317 hist(x, prob=TRUE, xlim=0:1) lines(density(x), col=\"red\") curve(dbeta(x, 3, 3/4), col=\"green\", add=TRUE) legend(\"topleft\", lty=1, col=c(\"red\",\"green\"), legend=c(\"empirical\", \"theoretical\"), bty=\"n\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"fit-beta-distribution","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Fit Beta distribution","title":"Which optimization algorithm to choose?","text":"Define control parameters. Call mledist default optimization function (optim implemented stats package) without gradient different optimization methods. case constrained optimization, mledist permits direct use constrOptim function (still implemented stats package) allow linear inequality constraints using logarithmic barrier. Use exp/log transformation shape parameters \\(\\delta_1\\) \\(\\delta_2\\) ensure shape parameters strictly positive. extract values fitted parameters, value corresponding log-likelihood number counts function minimize gradient (whether theoretical gradient numerically approximated one).","code":"ctr <- list(trace=0, REPORT=1, maxit=1000) unconstropt <- fitbench(x, \"beta\", \"mle\", grad=grlnlbeta, lower=0) ## BFGS NM CGFR CGPR CGBS L-BFGS-B NM-B G-BFGS ## 14 14 14 14 14 14 14 14 ## G-CGFR G-CGPR G-CGBS G-BFGS-B G-NM-B G-CGFR-B G-CGPR-B G-CGBS-B ## 14 14 14 14 14 14 14 14 dbeta2 <- function(x, shape1, shape2, log) dbeta(x, exp(shape1), exp(shape2), log=log) #take the log of the starting values startarg <- lapply(fitdistrplus:::startargdefault(x, \"beta\"), log) #redefine the gradient for the new parametrization grbetaexp <- function(par, obs, ...) grlnlbeta(exp(par), obs) * exp(par) expopt <- fitbench(x, distr=\"beta2\", method=\"mle\", grad=grbetaexp, start=startarg) ## BFGS NM CGFR CGPR CGBS G-BFGS G-CGFR G-CGPR G-CGBS ## 14 14 14 14 14 14 14 14 14 #get back to original parametrization expopt[c(\"fitted shape1\", \"fitted shape2\"), ] <- exp(expopt[c(\"fitted shape1\", \"fitted shape2\"), ])"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"results-of-the-numerical-investigation","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Results of the numerical investigation","title":"Which optimization algorithm to choose?","text":"Results displayed following tables: (1) original parametrization without specifying gradient (-B stands bounded version), (2) original parametrization (true) gradient (-B stands bounded version -G gradient), (3) log-transformed parametrization without specifying gradient, (4) log-transformed parametrization (true) gradient (-G stands gradient). Table 2.1: Unconstrained optimization approximated gradient Table 2.2: Unconstrained optimization true gradient Table 2.3: Exponential trick optimization approximated gradient Table 2.4: Exponential trick optimization true gradient Using llsurface, plot log-likehood surface around true value (green) fitted parameters (red). can simulate bootstrap replicates using bootdist function.","code":"llsurface(min.arg=c(0.1, 0.1), max.arg=c(7, 3), xlim=c(.1,7), plot.arg=c(\"shape1\", \"shape2\"), nlev=25, lseq=50, data=x, distr=\"beta\", back.col = FALSE) points(unconstropt[1,\"BFGS\"], unconstropt[2,\"BFGS\"], pch=\"+\", col=\"red\") points(3, 3/4, pch=\"x\", col=\"green\") b1 <- bootdist(fitdist(x, \"beta\", method = \"mle\", optim.method = \"BFGS\"), niter = 100, parallel = \"snow\", ncpus = 2) summary(b1) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## shape1 2.73 2.272 3.283 ## shape2 0.75 0.652 0.888 plot(b1, trueval = c(3, 3/4))"},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"theoretical-value-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution > 3.1 Log-likelihood function and its gradient for negative binomial distribution","what":"Theoretical value","title":"Which optimization algorithm to choose?","text":"p.m.f. Negative binomial distribution given \\[ f(x; m,p) = \\frac{\\Gamma(x+m)}{\\Gamma(m)x!} p^m (1-p)^x, \\] \\(\\Gamma\\) denotes beta function, see NIST Handbook mathematical functions https://dlmf.nist.gov/. exists alternative representation \\(\\mu=m (1-p)/p\\) equivalently \\(p=m/(m+\\mu)\\). Thus, log-likelihood set observations \\((x_1,\\dots,x_n)\\) \\[ \\log L(m,p) = \\sum_{=1}^{n} \\log\\Gamma(x_i+m) -n\\log\\Gamma(m) -\\sum_{=1}^{n} \\log(x_i!) + mn\\log(p) +\\sum_{=1}^{n} {x_i}\\log(1-p) \\] gradient respect \\(m\\) \\(p\\) \\[ \\nabla \\log L(m,p) = \\left(\\begin{matrix} \\sum_{=1}^{n} \\psi(x_i+m) -n \\psi(m) + n\\log(p) \\\\ mn/p -\\sum_{=1}^{n} {x_i}/(1-p) \\end{matrix}\\right), \\] \\(\\psi(x)=\\Gamma'(x)/\\Gamma(x)\\) digamma function, see NIST Handbook mathematical functions https://dlmf.nist.gov/.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"r-implementation-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution > 3.1 Log-likelihood function and its gradient for negative binomial distribution","what":"R implementation","title":"Which optimization algorithm to choose?","text":"fitdistrplus package, minimize opposite log-likelihood: implement opposite gradient grlnL.","code":"grlnlNB <- function(x, obs, ...) { m <- x[1] p <- x[2] n <- length(obs) c(sum(psigamma(obs+m)) - n*psigamma(m) + n*log(p), m*n/p - sum(obs)/(1-p)) }"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"random-generation-of-a-sample-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Random generation of a sample","title":"Which optimization algorithm to choose?","text":"","code":"#(2) negative binomial distribution n <- 200 trueval <- c(\"size\"=10, \"prob\"=3/4, \"mu\"=10/3) x <- rnbinom(n, trueval[\"size\"], trueval[\"prob\"]) hist(x, prob=TRUE, ylim=c(0, .3), xlim=c(0, 10)) lines(density(x), col=\"red\") points(min(x):max(x), dnbinom(min(x):max(x), trueval[\"size\"], trueval[\"prob\"]), col = \"green\") legend(\"topright\", lty = 1, col = c(\"red\", \"green\"), legend = c(\"empirical\", \"theoretical\"), bty=\"n\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"fit-a-negative-binomial-distribution","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Fit a negative binomial distribution","title":"Which optimization algorithm to choose?","text":"Define control parameters make benchmark. case constrained optimization, mledist permits direct use constrOptim function (still implemented stats package) allow linear inequality constraints using logarithmic barrier. Use exp/log transformation shape parameters \\(\\delta_1\\) \\(\\delta_2\\) ensure shape parameters strictly positive. extract values fitted parameters, value corresponding log-likelihood number counts function minimize gradient (whether theoretical gradient numerically approximated one).","code":"ctr <- list(trace = 0, REPORT = 1, maxit = 1000) unconstropt <- fitbench(x, \"nbinom\", \"mle\", grad = grlnlNB, lower = 0) ## BFGS NM CGFR CGPR CGBS L-BFGS-B NM-B G-BFGS ## 14 14 14 14 14 14 14 14 ## G-CGFR G-CGPR G-CGBS G-BFGS-B G-NM-B G-CGFR-B G-CGPR-B G-CGBS-B ## 14 14 14 14 14 14 14 14 unconstropt <- rbind(unconstropt, \"fitted prob\" = unconstropt[\"fitted mu\", ] / (1 + unconstropt[\"fitted mu\", ])) dnbinom2 <- function(x, size, prob, log) dnbinom(x, exp(size), 1 / (1 + exp(-prob)), log = log) # transform starting values startarg <- fitdistrplus:::startargdefault(x, \"nbinom\") startarg$mu <- startarg$size / (startarg$size + startarg$mu) startarg <- list(size = log(startarg[[1]]), prob = log(startarg[[2]] / (1 - startarg[[2]]))) # redefine the gradient for the new parametrization Trans <- function(x) c(exp(x[1]), plogis(x[2])) grNBexp <- function(par, obs, ...) grlnlNB(Trans(par), obs) * c(exp(par[1]), plogis(x[2])*(1-plogis(x[2]))) expopt <- fitbench(x, distr=\"nbinom2\", method=\"mle\", grad=grNBexp, start=startarg) ## BFGS NM CGFR CGPR CGBS G-BFGS G-CGFR G-CGPR G-CGBS ## 14 14 14 14 14 14 14 14 14 # get back to original parametrization expopt[c(\"fitted size\", \"fitted prob\"), ] <- apply(expopt[c(\"fitted size\", \"fitted prob\"), ], 2, Trans)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"results-of-the-numerical-investigation-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Results of the numerical investigation","title":"Which optimization algorithm to choose?","text":"Results displayed following tables: (1) original parametrization without specifying gradient (-B stands bounded version), (2) original parametrization (true) gradient (-B stands bounded version -G gradient), (3) log-transformed parametrization without specifying gradient, (4) log-transformed parametrization (true) gradient (-G stands gradient). Table 3.1: Unconstrained optimization approximated gradient Table 3.2: Unconstrained optimization true gradient Table 3.3: Exponential trick optimization approximated gradient Table 3.4: Exponential trick optimization true gradient Using llsurface, plot log-likehood surface around true value (green) fitted parameters (red). can simulate bootstrap replicates using bootdist function.","code":"llsurface(min.arg = c(5, 0.3), max.arg = c(15, 1), xlim=c(5, 15), plot.arg = c(\"size\", \"prob\"), nlev = 25, lseq = 50, data = x, distr = \"nbinom\", back.col = FALSE) points(unconstropt[\"fitted size\", \"BFGS\"], unconstropt[\"fitted prob\", \"BFGS\"], pch = \"+\", col = \"red\") points(trueval[\"size\"], trueval[\"prob\"], pch = \"x\", col = \"green\") b1 <- bootdist(fitdist(x, \"nbinom\", method = \"mle\", optim.method = \"BFGS\"), niter = 100, parallel = \"snow\", ncpus = 2) summary(b1) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## size 61.95 11.05 118.32 ## mu 3.43 3.17 3.72 ## ## The estimation method converged only for 76 among 100 iterations plot(b1, trueval=trueval[c(\"size\", \"mu\")])"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Which optimization algorithm to choose?","text":"Based two previous examples, observe methods converge point. reassuring. However, number function evaluations (gradient evaluations) different method another. Furthermore, specifying true gradient log-likelihood help fitting procedure generally slows convergence. Generally, best method standard BFGS method BFGS method exponential transformation parameters. Since exponential function differentiable, asymptotic properties still preserved (Delta method) finite-sample may produce small bias.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Overview of the fitdistrplus package","text":"Fitting distributions data common task statistics consists choosing probability distribution modelling random variable, well finding parameter estimates distribution. requires judgment expertise generally needs iterative process distribution choice, parameter estimation, quality fit assessment. R (R Development Core Team 2013) package MASS (Venables Ripley 2010), maximum likelihood estimation available via fitdistr function; steps fitting process can done using R functions (Ricci 2005). paper, present R package fitdistrplus (Delignette-Muller et al. 2014) implementing several methods fitting univariate parametric distribution. first objective developing package provide R users set functions dedicated help overall process. fitdistr function estimates distribution parameters maximizing likelihood function using optim function. distinction parameters different roles (e.g., main parameter nuisance parameter) made, paper focuses parameter estimation general point--view. cases, estimation methods prefered, maximum goodness--fit estimation (also called minimum distance estimation), proposed R package actuar three different goodness--fit distances (Dutang, Goulet, Pigeon 2008). developping fitdistrplus package, second objective consider various estimation methods addition maximum likelihood estimation (MLE). Functions developped enable moment matching estimation (MME), quantile matching estimation (QME), maximum goodness--fit estimation (MGE) using eight different distances. Moreover, fitdistrplus package offers possibility specify user-supplied function optimization, useful cases classical optimization techniques, included optim, adequate. applied statistics, frequent fit distributions censored data Commeau et al. (2012). MASS fitdistr function enable maximum likelihood estimation type data. packages can used work censored data, especially survival data Jordan (2005), packages generally focus specific models, enabling fit restricted set distributions. third objective thus provide R users function estimate univariate distribution parameters right-, left- interval-censored data. packages CRAN provide estimation procedures user-supplied parametric distribution support different types data. distrMod package (Kohl Ruckdeschel 2010) provides object-oriented (S4) implementation probability models includes distribution fitting procedures given minimization criterion. criterion user-supplied function sufficiently flexible handle censored data, yet trivial way, see Example M4 distrMod vignette. fitting functions MLEstimator MDEstimator return S4 class coercion method class mle provided respective functionalities (e.g., confint logLik) package stats4 available, . fitdistrplus, chose use standard S3 class system understanding R users. designing fitdistrplus package, forget implement generic functions also available S3 classes. Finally, various packages provide functions estimate mode, moments L-moments distribution, see reference manuals modeest, lmomco Lmoments packages. package available Comprehensive R Archive Network . paper organized follows: Section 2 presents tools fitting continuous distributions classic non-censored data. Section 3 deals estimation methods types data, Section 4 concludes.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Choice","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Choice of candidate distributions","title":"Overview of the fitdistrplus package","text":"illustrating use various functions fitdistrplus package continuous non-censored data, first use data set named groundbeef included package. data set contains pointwise values serving sizes grams, collected French survey, ground beef patties consumed children 5 years old. used quantitative risk assessment published Delignette-Muller Cornu (2008). fitting one distributions data set, generally necessary choose good candidates among predefined set distributions. choice may guided knowledge stochastic processes governing modeled variable, , absence knowledge regarding underlying process, observation empirical distribution. help user choice, developed functions plot characterize empirical distribution. First , common start plots empirical distribution function histogram (density plot), can obtained plotdist function fitdistrplus package. function provides two plots (see Figure 2.1): left-hand plot default histogram density scale (density plot , according values arguments histo demp) right-hand plot empirical cumulative distribution function (CDF). Figure 2.1: Histogram CDF plots empirical distribution continuous variable (serving size groundbeef data set) provided plotdist function. addition empirical plots, descriptive statistics may help choose candidates describe distribution among set parametric distributions. Especially skewness kurtosis, linked third fourth moments, useful purpose. non-zero skewness reveals lack symmetry empirical distribution, kurtosis value quantifies weight tails comparison normal distribution kurtosis equals 3. skewness kurtosis corresponding unbiased estimator (Casella Berger 2002) sample \\((X_i)_i \\stackrel{\\text{..d.}}{\\sim} X\\) observations \\((x_i)_i\\) given : \\[\\begin{equation} sk(X) = \\frac{E[(X-E(X))^3]}{Var(X)^{\\frac{3}{2}}}~,~\\widehat{sk}=\\frac{\\sqrt{n(n-1)}}{n-2}\\times\\frac{m_{3}}{m_{2}^{\\frac{3}{2}}},\\tag{2.1} \\end{equation}\\] \\[\\begin{equation} kr(X) = \\frac{E[(X-E(X))^4]}{Var(X)^{2}}~,~\\widehat{kr}=\\frac{n-1}{(n-2)(n-3)}((n+1) \\times \\frac{m_{4}}{m_{2}^{2}}-3(n-1)) + 3,\\tag{2.2} \\end{equation}\\] \\(m_{2}\\), \\(m_{3}\\), \\(m_{4}\\) denote empirical moments defined \\(m_{k}=\\frac{1}{n}\\sum_{=1}^n(x_{}-\\overline{x})^{k}\\), \\(x_{}\\) \\(n\\) observations variable \\(x\\) \\(\\overline{x}\\) mean value. descdist function provides classical descriptive statistics (minimum, maximum, median, mean, standard deviation), skewness kurtosis. default, unbiased estimations three last statistics provided. Nevertheless, argument method can changed \"unbiased\" (default) \"sample\" obtain without correction bias. skewness-kurtosis plot one proposed Cullen Frey (1999) provided descdist function empirical distribution (see Figure 2.2 groundbeef data set). plot, values common distributions displayed order help choice distributions fit data. distributions (normal, uniform, logistic, exponential), one possible value skewness kurtosis. Thus, distribution represented single point plot. distributions, areas possible values represented, consisting lines (gamma lognormal distributions), larger areas (beta distribution). Skewness kurtosis known robust. order take account uncertainty estimated values kurtosis skewness data, nonparametric bootstrap procedure (Efron Tibshirani 1994) can performed using argument boot. Values skewness kurtosis computed bootstrap samples (constructed random sampling replacement original data set) reported skewness-kurtosis plot. Nevertheless, user needs know skewness kurtosis, like higher moments, high variance. problem completely solved use bootstrap. skewness-kurtosis plot regarded indicative . properties random variable considered, notably expected value range, complement use plotdist descdist functions. call descdist function describe distribution serving size groundbeef data set draw corresponding skewness-kurtosis plot (see Figure 2.2). Looking results example positive skewness kurtosis far 3, fit three common right-skewed distributions considered, Weibull, gamma lognormal distributions. Figure 2.2: Skewness-kurtosis plot continuous variable (serving size groundbeef data set) provided descdist function.","code":"library(\"fitdistrplus\") ## Loading required package: MASS ## Loading required package: survival data(\"groundbeef\") str(groundbeef) ## 'data.frame': 254 obs. of 1 variable: ## $ serving: num 30 10 20 24 20 24 40 20 50 30 ... plotdist(groundbeef$serving, histo = TRUE, demp = TRUE) descdist(groundbeef$serving, boot = 1000) ## summary statistics ## ------ ## min: 10 max: 200 ## median: 79 ## mean: 73.65 ## estimated sd: 35.88 ## estimated skewness: 0.7353 ## estimated kurtosis: 3.551"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"FIT","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Fit of distributions by maximum likelihood estimation","title":"Overview of the fitdistrplus package","text":"selected, one parametric distributions \\(f(.\\vert \\theta)\\) (parameter \\(\\theta\\\\mathbb{R}^d\\)) may fitted data set, one time, using fitdist function. ..d. sample assumption, distribution parameters \\(\\theta\\) default estimated maximizing likelihood function defined : \\[\\begin{equation} L(\\theta)=\\prod_{=1}^n f(x_{}\\vert \\theta)\\tag{2.3} \\end{equation}\\] \\(x_{}\\) \\(n\\) observations variable \\(X\\) \\(f(.\\vert \\theta)\\) density function parametric distribution. proposed estimation methods described Section 3.1. fitdist function returns S3 object class fitdist print, summary plot functions provided. fit distribution using fitdist assumes corresponding d, p, q functions (standing respectively density, distribution quantile functions) defined. Classical distributions already defined way stats package, e.g., dnorm, pnorm qnorm normal distribution (see ?Distributions). Others may found various packages (see CRAN task view: Probability Distributions ). Distributions found package must implemented user d, p, q functions. call fitdist, distribution specified via argument dist either character string corresponding common root name used names d, p, q functions (e.g., \"norm\" normal distribution) density function , root name extracted (e.g., dnorm normal distribution). Numerical results returned fitdist function (1) parameter estimates, (2) estimated standard errors (computed estimate Hessian matrix maximum likelihood solution), (3) loglikelihood, (4) Akaike Bayesian information criteria (-called AIC BIC), (5) correlation matrix parameter estimates. call fitdist function fit Weibull distribution serving size groundbeef data set. plot object class fitdist provides four classical goodness--fit plots (Cullen Frey 1999) presented Figure 2.3: density plot representing density function fitted distribution along histogram empirical distribution, CDF plot empirical distribution fitted distribution, Q-Q plot representing empirical quantiles (y-axis) theoretical quantiles (x-axis), P-P plot representing empirical distribution function evaluated data point (y-axis) fitted distribution function (x-axis). CDF, Q-Q P-P plots, probability plotting position defined default using Hazen’s rule, probability points empirical distribution calculated (1:n - 0.5)/n, recommended Blom (1959). plotting position can easily changed (see reference manual details (Delignette-Muller et al. 2014)). Unlike generic plot function, denscomp, cdfcomp, qqcomp ppcomp functions enable draw separately four plots, order compare empirical distribution multiple parametric distributions fitted data set. functions must called first argument corresponding list objects class fitdist, optionally arguments customize plot (see reference manual lists arguments may specific plot (Delignette-Muller et al. 2014)). following example, compare fit Weibull, lognormal gamma distributions groundbeef data set (Figure 2.3). Figure 2.3: Four Goodness--fit plots various distributions fitted continuous data (Weibull, gamma lognormal distributions fitted serving sizes groundbeef data set) provided functions denscomp, qqcomp, cdfcomp ppcomp. density plot CDF plot may considered basic classical goodness--fit plots. two plots complementary can informative cases. Q-Q plot emphasizes lack--fit distribution tails P-P plot emphasizes lack--fit distribution center. present example (Figure 2.3), none three fitted distributions correctly describes center distribution, Weibull gamma distributions prefered better description right tail empirical distribution, especially tail important use fitted distribution, context food risk assessment. data set named endosulfan now used illustrate features fitdistrplus package. data set contains acute toxicity values organochlorine pesticide endosulfan (geometric mean LC50 ou EC50 values \\(\\mu g.L^{-1}\\)), tested Australian non-Australian laboratory-species (Hose Van den Brink 2004). ecotoxicology, lognormal loglogistic distribution often fitted data set order characterize species sensitivity distribution (SSD) pollutant. low percentile fitted distribution, generally 5% percentile, calculated named hazardous concentration 5% (HC5). interpreted value pollutant concentration protecting 95% species (Posthuma, Suter, Traas 2010). fit lognormal loglogistic distribution whole endosulfan data set rather bad (Figure 2.4), especially due minority high values. two-parameter Pareto distribution three-parameter Burr distribution (extension loglogistic Pareto distributions) fitted. Pareto Burr distributions provided package actuar. , define starting values (optimization process) reasonable starting values implicity defined within fitdist function distributions defined R (see ?fitdist details). distributions like Pareto Burr distribution, initial values distribution parameters supplied argument start, named list initial values parameter (appear d, p, q functions). defined reasonable starting values1 various distributions can fitted graphically compared. example, function cdfcomp can used report CDF values logscale emphasize discrepancies tail interest defining HC5 value (Figure 2.4). Figure 2.4: CDF plot compare fit four distributions acute toxicity values various organisms organochlorine pesticide endosulfan (endosulfan data set) provided cdfcomp function, CDF values logscale emphasize discrepancies left tail. None fitted distribution correctly describes right tail observed data set, shown Figure 2.4, left-tail seems better described Burr distribution. use considered estimate HC5 value 5% quantile distribution. can easily done using quantile generic function defined object class fitdist. calculation together calculation empirical quantile comparison. addition ecotoxicology context, quantile generic function also attractive actuarial-financial context. fact, value--risk \\(VAR_\\alpha\\) defined \\(1-\\alpha\\)-quantile loss distribution can computed quantile fitdist object. computation different goodness--fit statistics proposed fitdistrplus package order compare fitted distributions. purpose goodness--fit statistics aims measure distance fitted parametric distribution empirical distribution: e.g., distance fitted cumulative distribution function \\(F\\) empirical distribution function \\(F_{n}\\). fitting continuous distributions, three goodness--fit statistics classicaly considered: Cramer-von Mises, Kolmogorov-Smirnov Anderson-Darling statistics (D’Agostino Stephens 1986). Naming \\(x_{}\\) \\(n\\) observations continuous variable \\(X\\) arranged ascending order, Table 2.1 gives definition empirical estimate three considered goodness--fit statistics. can computed using function gofstat defined Stephens (D’Agostino Stephens 1986). Table 2.1: Goodness--fit statistics defined Stephens (D’Agostino Stephens 1986). \\(F_i\\stackrel{\\triangle}{=} F(x_i)\\) giving weight distribution tails, Anderson-Darling statistic special interest matters equally emphasize tails well main body distribution. often case risk assessment Vose (2010). reason, statistics often used select best distribution among fitted. Nevertheless, statistics used cautiously comparing fits various distributions. Keeping mind weighting CDF quadratic difference depends parametric distribution definition (see Table 2.1), Anderson-Darling statistics computed several distributions fitted data set theoretically difficult compare. Moreover, statistic, Cramer-von Mises Kolmogorov-Smirnov ones, take account complexity model (.e., parameter number). problem compared distributions characterized number parameters, systematically promote selection complex distributions case. Looking classical penalized criteria based loglikehood (AIC, BIC) seems thus also interesting, especially discourage overfitting. previous example, goodness--fit statistics based CDF distance favor Burr distribution, one characterized three parameters, AIC BIC values respectively give preference Burr distribution Pareto distribution. choice two distributions seems thus less obvious discussed. Even specifically recommended discrete distributions, Chi-squared statistic may also used continuous distributions (see Section 3.3 reference manual examples (Delignette-Muller et al. 2014)).","code":"fw <- fitdist(groundbeef$serving, \"weibull\") summary(fw) ## Fitting of the distribution ' weibull ' by maximum likelihood ## Parameters : ## estimate Std. Error ## shape 2.186 0.1046 ## scale 83.348 2.5269 ## Loglikelihood: -1255 AIC: 2514 BIC: 2522 ## Correlation matrix: ## shape scale ## shape 1.0000 0.3218 ## scale 0.3218 1.0000 par(mfrow = c(2, 2), mar = c(4, 4, 2, 1)) fg <- fitdist(groundbeef$serving, \"gamma\") fln <- fitdist(groundbeef$serving, \"lnorm\") plot.legend <- c(\"Weibull\", \"lognormal\", \"gamma\") denscomp(list(fw, fln, fg), legendtext = plot.legend) qqcomp(list(fw, fln, fg), legendtext = plot.legend) cdfcomp(list(fw, fln, fg), legendtext = plot.legend) ppcomp(list(fw, fln, fg), legendtext = plot.legend) library(actuar) ## ## Attaching package: 'actuar' ## The following objects are masked from 'package:stats': ## ## sd, var ## The following object is masked from 'package:grDevices': ## ## cm data(\"endosulfan\") ATV <- endosulfan$ATV fendo.ln <- fitdist(ATV, \"lnorm\") fendo.ll <- fitdist(ATV, \"llogis\", start = list(shape = 1, scale = 500)) fendo.P <- fitdist(ATV, \"pareto\", start = list(shape = 1, scale = 500)) fendo.B <- fitdist(ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) cdfcomp(list(fendo.ln, fendo.ll, fendo.P, fendo.B), xlogscale = TRUE, ylogscale = TRUE, legendtext = c(\"lognormal\", \"loglogistic\", \"Pareto\", \"Burr\")) quantile(fendo.B, probs = 0.05) ## Estimated quantiles for each specified probability (non-censored data) ## p=0.05 ## estimate 0.2939 quantile(ATV, probs = 0.05) ## 5% ## 0.2 gofstat(list(fendo.ln, fendo.ll, fendo.P, fendo.B), fitnames = c(\"lnorm\", \"llogis\", \"Pareto\", \"Burr\")) ## Goodness-of-fit statistics ## lnorm llogis Pareto Burr ## Kolmogorov-Smirnov statistic 0.1672 0.1196 0.08488 0.06155 ## Cramer-von Mises statistic 0.6374 0.3827 0.13926 0.06803 ## Anderson-Darling statistic 3.4721 2.8316 0.89206 0.52393 ## ## Goodness-of-fit criteria ## lnorm llogis Pareto Burr ## Akaike's Information Criterion 1069 1069 1048 1046 ## Bayesian Information Criterion 1074 1075 1053 1054"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Uncertainty","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Uncertainty in parameter estimates","title":"Overview of the fitdistrplus package","text":"uncertainty parameters fitted distribution can estimated parametric nonparametric bootstraps using boodist function non-censored data (Efron Tibshirani 1994). function returns bootstrapped values parameters S3 class object can plotted visualize bootstrap region. medians 95% confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations (due lack convergence optimization algorithm bootstrapped data sets), number iterations estimation converges also printed summary. plot object class bootdist consists scatterplot matrix scatterplots bootstrapped values parameters providing representation joint uncertainty distribution fitted parameters. example use bootdist function previous fit Burr distribution endosulfan data set (Figure 2.5). Figure 2.5: Bootstrappped values parameters fit Burr distribution characterized three parameters (example endosulfan data set) provided plot object class bootdist. Bootstrap samples parameter estimates useful especially calculate confidence intervals parameter fitted distribution marginal distribution bootstraped values. also interesting look joint distribution bootstraped values scatterplot (matrix scatterplots number parameters exceeds two) order understand potential structural correlation parameters (see Figure 2.5). use whole bootstrap sample also interest risk assessment field. use enables characterization uncertainty distribution parameters. can directly used within second-order Monte Carlo simulation framework, especially within package mc2d (Pouillot, Delignette-Muller, Denis 2011). One refer Pouillot Delignette-Muller (2010) introduction use mc2d fitdistrplus packages context quantitative risk assessment. bootstrap method can also used calculate confidence intervals quantiles fitted distribution. purpose, generic quantile function provided class bootdist. default, 95% percentiles bootstrap confidence intervals quantiles provided. Going back previous example ecotoxicolgy, function can used estimate uncertainty associated HC5 estimation, example previously fitted Burr distribution endosulfan data set.","code":"bendo.B <- bootdist(fendo.B, niter = 1001) summary(bendo.B) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## shape1 0.1983 0.09283 0.3606 ## shape2 1.5863 1.05306 3.0629 ## rate 1.4907 0.70828 2.7775 plot(bendo.B) quantile(bendo.B, probs = 0.05) ## (original) estimated quantiles for each specified probability (non-censored data) ## p=0.05 ## estimate 0.2939 ## Median of bootstrap estimates ## p=0.05 ## estimate 0.2994 ## ## two-sided 95 % CI of each quantile ## p=0.05 ## 2.5 % 0.1792 ## 97.5 % 0.4999"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Alternatives","dir":"Articles","previous_headings":"3 Advanced topics","what":"Alternative methods for parameter estimation","title":"Overview of the fitdistrplus package","text":"subsection focuses alternative estimation methods. One alternative continuous distributions maximum goodness--fit estimation method also called minimum distance estimation method Dutang, Goulet, Pigeon (2008). package method proposed eight different distances: three classical distances defined Table 2.1, one variants Anderson-Darling distance proposed Luceno (2006) defined Table 3.1. right-tail AD gives weight right-tail, left-tail AD gives weight left tail. Either tails, , can receive even larger weights using second order Anderson-Darling Statistics. Table 3.1: Modified Anderson-Darling statistics defined Luceno (2006). \\(F_i\\stackrel{\\triangle}{=} F(x_{})\\) \\(\\overline F_i\\stackrel{\\triangle}{=}1-F(x_{})\\) fit distribution maximum goodness--fit estimation, one needs fix argument method mge call fitdist specify argument gof coding chosen goodness--fit distance. function intended used continuous non-censored data. Maximum goodness--fit estimation may useful give weight data one tail distribution. previous example ecotoxicology, used non classical distribution (Burr distribution) correctly fit empirical distribution especially left tail. order correctly estimate 5\\(\\%\\) percentile, also consider fit classical lognormal distribution, minimizing goodness--fit distance giving weight left tail empirical distribution. follows, left tail Anderson-Darling distances first second order used fit lognormal endosulfan data set (see Figure 3.1). Figure 3.1: Comparison lognormal distribution fitted MLE MGE using two different goodness--fit distances: left-tail Anderson-Darling left-tail Anderson Darling second order (example endosulfan data set) provided cdfcomp function, CDF values logscale emphasize discrepancies left tail. Comparing 5% percentiles (HC5) calculated using three fits one calculated MLE fit Burr distribution, can observe, example, fitting lognormal distribution maximizing left tail Anderson-Darling distances first second order enables approach value obtained fitting Burr distribution MLE. moment matching estimation (MME) another method commonly used fit parametric distributions (Vose 2010). MME consists finding value parameter \\(\\theta\\) equalizes first theoretical raw moments parametric distribution corresponding empirical raw moments Equation (3.1): \\[\\begin{equation} E(X^{k}|\\theta)=\\frac{1}{n}\\sum_{=1}^{n}x_{}^{k},\\tag{3.1} \\end{equation}\\] \\(k=1,\\ldots,d\\), \\(d\\) number parameters estimate \\(x_{}\\) \\(n\\) observations variable \\(X\\). moments order greater equal 2, may also relevant match centered moments. Therefore, match moments given Equation (3.2): \\[\\begin{equation} E(X\\vert \\theta) = \\overline{x} ~,~E\\left((X-E(X))^{k}|\\theta\\right)=m_k, \\text{ } k=2,\\ldots,d,\\tag{3.2} \\end{equation}\\] \\(m_k\\) denotes empirical centered moments. method can performed setting argument method \"mme\" call fitdist. estimate computed closed-form formula following distributions: normal, lognormal, exponential, Poisson, gamma, logistic, negative binomial, geometric, beta uniform distributions. case, distributions characterized one parameter (geometric, Poisson exponential), parameter simply estimated matching theoretical observed means, distributions characterized two parameters, parameters estimated matching theoretical observed means variances (Vose 2010). distributions, equation moments solved numerically using optim function minimizing sum squared differences observed theoretical moments (see fitdistrplus reference manual technical details (Delignette-Muller et al. 2014)). classical data set Danish insurance industry published McNeil (1997) used illustrate method. fitdistrplus, data set stored danishuni univariate version contains loss amounts collected Copenhagen Reinsurance 1980 1990. actuarial science, standard consider positive heavy-tailed distributions special focus right-tail distributions. numerical experiment, choose classic actuarial distributions loss modelling: lognormal distribution Pareto type II distribution (Klugman, Panjer, Willmot 2009). lognormal distribution fitted danishuni data set matching moments implemented closed-form formula. left-hand graph Figure 3.2, fitted distribution functions obtained using moment matching estimation (MME) maximum likelihood estimation (MLE) methods compared. MME method provides cautious estimation insurance risk MME-fitted distribution function (resp. MLE-fitted) underestimates (overestimates) empirical distribution function large values claim amounts. Figure 3.2: Comparison MME MLE fitting lognormal Pareto distribution loss data danishuni data set. second time, Pareto distribution, gives weight right-tail distribution, fitted. lognormal distribution, Pareto two parameters, allows fair comparison. use implementation actuar package providing raw centered moments distribution (addition d, p, q r functions (Goulet 2012). Fitting heavy-tailed distribution first second moments exist certain values shape parameter requires cautiousness. carried providing, optimization process, lower upper bound parameter. code calls L-BFGS-B optimization method optim, since quasi-Newton allows box constraints 2. choose match moments defined Equation (3.1), function computing empirical raw moment (called memp example) passed fitdist. two-parameter distributions (.e., \\(d=2\\)), Equations (3.1) (3.2) equivalent. shown Figure 3.2, MME MLE fits far less distant (looking right-tail) Pareto distribution lognormal distribution data set. Furthermore, two distributions, MME method better fits right-tail distribution visual point view. seems logical since empirical moments influenced large observed values. previous traces, gave values goodness--fit statistics. Whatever statistic considered, MLE-fitted lognormal always provides best fit observed data. Maximum likelihood moment matching estimations certainly commonly used method fitting distributions (Cullen Frey 1999). Keeping mind two methods may produce different results, user aware great sensitivity outliers choosing moment matching estimation. may seen advantage example objective better describe right tail distribution, may seen drawback objective different. Fitting parametric distribution may also done matching theoretical quantiles parametric distributions (specified probabilities) empirical quantiles (Tse 2009). equality theoretical empirical quantiles expressed Equation (3.3) , similar Equations (3.1) (3.2): \\[\\begin{equation} F^{-1}(p_{k}|\\theta)=Q_{n,p_{k}}\\tag{3.3} \\end{equation}\\] \\(k=1,\\ldots,d\\), \\(d\\) number parameters estimate (dimension \\(\\theta\\) fixed parameters) \\(Q_{n,p_{k}}\\) empirical quantiles calculated data specified probabilities \\(p_{k}\\). Quantile matching estimation (QME) performed setting argument method \"qme\" call fitdist adding argument probs defining probabilities quantile matching performed (see Figure 3.3). length vector must equal number parameters estimate (vector moment orders MME). Empirical quantiles computed using quantile function stats package using type=7 default (see ?quantile Hyndman Fan (1996)). type quantile can easily changed using qty argument call qme function. quantile matching carried numerically, minimizing sum squared differences observed theoretical quantiles. Figure 3.3: Comparison QME MLE fitting lognormal distribution loss data danishuni data set. example fitting lognormal distribution `danishuni} data set matching probabilities \\((p_1= 1/3, p_2=2/3)\\) \\((p_1= 8/10, p_2=9/10)\\). expected, second QME fit gives weight right-tail distribution. Compared maximum likelihood estimation, second QME fit best suits right-tail distribution, whereas first QME fit best models body distribution. quantile matching estimation particular interest need focus around particular quantiles, e.g., \\(p=99.5\\%\\) Solvency II insurance context \\(p=5\\%\\) HC5 estimation ecotoxicology context.","code":"fendo.ln.ADL <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"ADL\") fendo.ln.AD2L <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD2L\") cdfcomp(list(fendo.ln, fendo.ln.ADL, fendo.ln.AD2L), xlogscale = TRUE, ylogscale = TRUE, main = \"Fitting a lognormal distribution\", xlegend = \"bottomright\", legendtext = c(\"MLE\", \"Left-tail AD\", \"Left-tail AD 2nd order\")) (HC5.estimates <- c( empirical = as.numeric(quantile(ATV, probs = 0.05)), Burr = as.numeric(quantile(fendo.B, probs = 0.05)$quantiles), lognormal_MLE = as.numeric(quantile(fendo.ln, probs = 0.05)$quantiles), lognormal_AD2 = as.numeric(quantile(fendo.ln.ADL, probs = 0.05)$quantiles), lognormal_AD2L = as.numeric(quantile(fendo.ln.AD2L, probs = 0.05)$quantiles))) ## empirical Burr lognormal_MLE lognormal_AD2 lognormal_AD2L ## 0.20000 0.29393 0.07259 0.19591 0.25877 data(\"danishuni\") str(danishuni) ## 'data.frame': 2167 obs. of 2 variables: ## $ Date: Date, format: \"1980-01-03\" \"1980-01-04\" ... ## $ Loss: num 1.68 2.09 1.73 1.78 4.61 ... fdanish.ln.MLE <- fitdist(danishuni$Loss, \"lnorm\") fdanish.ln.MME <- fitdist(danishuni$Loss, \"lnorm\", method = \"mme\", order = 1:2) library(actuar) fdanish.P.MLE <- fitdist(danishuni$Loss, \"pareto\", start = list(shape = 10, scale = 10), lower = 2+1e-6, upper = Inf) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced memp <- function(x, order) mean(x^order) fdanish.P.MME <- fitdist(danishuni$Loss, \"pareto\", method = \"mme\", order = 1:2, memp = \"memp\", start = list(shape = 10, scale = 10), lower = c(2+1e-6, 2+1e-6), upper = c(Inf, Inf)) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious par(mfrow = c(1, 2)) cdfcomp(list(fdanish.ln.MLE, fdanish.ln.MME), legend = c(\"lognormal MLE\", \"lognormal MME\"), main = \"Fitting a lognormal distribution\", xlogscale = TRUE, datapch = 20) cdfcomp(list(fdanish.P.MLE, fdanish.P.MME), legend = c(\"Pareto MLE\", \"Pareto MME\"), main = \"Fitting a Pareto distribution\", xlogscale = TRUE, datapch = 20) gofstat(list(fdanish.ln.MLE, fdanish.P.MLE, fdanish.ln.MME, fdanish.P.MME), fitnames = c(\"lnorm.mle\", \"Pareto.mle\", \"lnorm.mme\", \"Pareto.mme\")) ## Goodness-of-fit statistics ## lnorm.mle Pareto.mle lnorm.mme Pareto.mme ## Kolmogorov-Smirnov statistic 0.1375 0.3124 0.4368 0.37 ## Cramer-von Mises statistic 14.7911 37.7227 88.9503 55.43 ## Anderson-Darling statistic 87.1933 208.3388 416.2567 281.58 ## ## Goodness-of-fit criteria ## lnorm.mle Pareto.mle lnorm.mme Pareto.mme ## Akaike's Information Criterion 8120 9250 9792 9409 ## Bayesian Information Criterion 8131 9261 9803 9420 fdanish.ln.QME1 <- fitdist(danishuni$Loss, \"lnorm\", method = \"qme\", probs = c(1/3, 2/3)) fdanish.ln.QME2 <- fitdist(danishuni$Loss, \"lnorm\", method = \"qme\", probs = c(8/10, 9/10)) cdfcomp(list(fdanish.ln.MLE, fdanish.ln.QME1, fdanish.ln.QME2), legend = c(\"MLE\", \"QME(1/3, 2/3)\", \"QME(8/10, 9/10)\"), main = \"Fitting a lognormal distribution\", xlogscale = TRUE, datapch = 20)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Customization","dir":"Articles","previous_headings":"3 Advanced topics","what":"Customization of the optimization algorithm","title":"Overview of the fitdistrplus package","text":"time numerical minimization carried fitdistrplus package, optim function stats package used default Nelder-Mead method distributions characterized one parameter BFGS method distributions characterized one parameter. Sometimes default algorithm fails converge. interesting change options optim function use another optimization function optim minimize objective function. argument optim.method can used call fitdist fitdistcens. internally passed mledist, mmedist, mgedist qmedist, optim (see ?optim details different algorithms available). Even error raised computing optimization, changing algorithm particular interest enforce bounds parameters. instance, volatility parameter \\(\\sigma\\) strictly positive \\(\\sigma>0\\) probability parameter \\(p\\) lies \\(p\\[0,1]\\). possible using arguments lower /upper, use automatically forces optim.method=\"L-BFGS-B\". examples fits gamma distribution \\(\\mathcal{G}(\\alpha, \\lambda)\\) groundbeef data set various algorithms. Note conjugate gradient algorithm (CG) needs far iterations converge (around 2500 iterations) compared algorithms (converging less 100 iterations). also possible use another function optim minimize objective function specifying argument custom.optim call fitdist. may necessary customize optimization function meet following requirements. (1) custom.optim function must following arguments: fn function optimized par initialized parameters. (2) custom.optim carry MINIMIZATION must return following components: par estimate, convergence convergence code, value=fn(par) hessian. example code written wrap genoud function rgenoud package order respect optimization ``template’’. rgenoud package implements genetic (stochastic) algorithm. customized optimization function can passed argument custom.optim call fitdist fitdistcens. following code can example used fit gamma distribution groundbeef data set. Note example various arguments also passed fitdist genoud: nvars, Domains, boundary.enforcement, print.level hessian. code compares parameter estimates (\\(\\hat\\alpha\\), \\(\\hat\\lambda\\)) different algorithms: shape \\(\\alpha\\) rate \\(\\lambda\\) parameters relatively similar example, roughly 4.00 0.05, respectively.","code":"data(\"groundbeef\") fNM <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"Nelder-Mead\") fBFGS <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"BFGS\") fSANN <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"SANN\") fCG <- try(fitdist(groundbeef$serving, \"gamma\", optim.method = \"CG\", control = list(maxit = 10000))) if(inherits(fCG, \"try-error\")) {fCG <- list(estimate = NA)} mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values = par, ...) standardres <- c(res, convergence = 0) return(standardres) } fgenoud <- mledist(groundbeef$serving, \"gamma\", custom.optim = mygenoud, nvars = 2, max.generations = 10, Domains = cbind(c(0, 0), c(10, 10)), boundary.enforcement = 1, hessian = TRUE, print.level = 0, P9 = 10) ## Loading required package: rgenoud ## ## rgenoud (Version 5.9-0.10, Build Date: 2023-12-13) ## ## See http://sekhon.berkeley.edu/rgenoud for additional documentation. ## ## Please cite software as: ## ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011. ## ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.'' ## ## Journal of Statistical Software, 42(11): 1-26. ## ## cbind(NM = fNM$estimate, BFGS = fBFGS$estimate, SANN = fSANN$estimate, CG = fCG$estimate, fgenoud = fgenoud$estimate) ## NM BFGS SANN CG fgenoud ## shape 4.00956 4.2118 4.058 4.12783 4.00834 ## rate 0.05444 0.0572 0.055 0.05605 0.05443"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"otherdata","dir":"Articles","previous_headings":"3 Advanced topics","what":"Fitting distributions to other types of data","title":"Overview of the fitdistrplus package","text":"section modified since publication vignette Journal Statistical Software order include new goodness--fit plots censored discrete data. Analytical methods often lead semi-quantitative results referred censored data. Observations known limit detection left-censored data. Observations known limit quantification right-censored data. Results known lie two bounds interval-censored data. two bounds may correspond limit detection limit quantification, generally uncertainty bounds around observation. Right-censored data also commonly encountered survival data (Klein Moeschberger 2003). data set may thus contain right-, left-, interval-censored data, may mixture categories, possibly different upper lower bounds. Censored data sometimes excluded data analysis replaced fixed value, cases may lead biased results. recommended approach correctly model data based upon maximum likelihood Helsel (2005). Censored data may thus contain left-censored, right-censored interval-censored values, several lower upper bounds. use package fitdistrplus, data must coded dataframe two columns, respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. illustrate use package fitdistrplus fit distributions censored continous data, use another data set ecotoxicology, included package named salinity. data set contains acute salinity tolerance (LC50 values electrical conductivity, \\(mS\\).\\(cm^{-1}\\)) riverine macro-invertebrates taxa southern Murray-Darling Basin Central Victoria, Australia (Kefford et al. 2007). Using censored data coded salinity} data set, empirical distribution can plotted using theplotdistcens} function. older versions package, default function used Expectation-Maximization approach Turnbull (1974) compute overall empirical cdf curve optional confidence intervals, calls survfit plot.survfit functions survival package. Even representation always available (fixing argument NPMLE.method \"Turnbull.middlepoints\"), now default plot empirical cumulative distribution function (ECDF) explicitly represents regions non uniqueness NPMLE ECDF. default computation regions non uniqueness associated masses uses non parametric maximum likelihood estimation (NPMLE) approach developped Wang Wang Fani (2018). Figure 3.5 shows top left new plot data together two fitted distributions. Grey filled rectangles plot represent regions non uniqueness NPMLE ECDF. less rigorous sometimes illustrative plot can obtained fixing argument NPMLE FALSE call plotdistcens (see Figure 3.4 example help page Function plotdistcens details). plot enables see real nature censored data, points intervals, difficulty building plot define relevant ordering observations. Figure 3.4: Simple plot censored raw data (72-hour acute salinity tolerance riverine macro-invertebrates salinity data set) ordered points intervals. non censored data, one parametric distributions can fitted censored data set, one time, using case fitdistcens function. function estimates vector distribution parameters \\(\\theta\\) maximizing likelihood censored data defined : \\[\\begin{equation} L(\\theta) = \\prod_{=1}^{N_{nonC}} f(x_{}|\\theta)\\times \\prod_{j=1}^{N_{leftC}} F(x^{upper}_{j}|\\theta) \\\\ \\times \\prod_{k=1}^{N_{rightC}} (1- F(x^{lower}_{k}|\\theta))\\times \\prod_{m=1}^{N_{intC}} (F(x^{upper}_{m}|\\theta)- F(x^{lower}_{j}|\\theta))\\tag{3.4} \\end{equation}\\] \\(x_{}\\) \\(N_{nonC}\\) non-censored observations, \\(x^{upper}_{j}\\) upper values defining \\(N_{leftC}\\) left-censored observations, \\(x^{lower}_{k}\\) lower values defining \\(N_{rightC}\\) right-censored observations, \\([x^{lower}_{m} ; x^{upper}_{m}]\\) intervals defining \\(N_{intC}\\) interval-censored observations, F cumulative distribution function parametric distribution Helsel (2005). fitdist, fitdistcens returns results fit parametric distribution data set S3 class object can easily printed, summarized plotted. salinity data set, lognormal distribution loglogistic can fitted commonly done ecotoxicology data. fitdist, distributions (see Delignette-Muller et al. (2014) details), necessary specify initial values distribution parameters argument start. plotdistcens function can help find correct initial values distribution parameters non trivial cases, manual iterative use necessary. Computations goodness--fit statistics yet developed fits using censored data quality fit can judged using Akaike Schwarz’s Bayesian information criteria (AIC BIC) goodness--fit CDF plot, respectively provided summarizing plotting object class fitdistcens. Functions cdfcompcens, qqcompcens ppcompcens can also used compare fit various distributions censored data set. calls similar ones cdfcomp, qqcomp ppcomp. examples use functions two fitted distributions salinity data set (see Figure 3.5). qqcompcens ppcompcens used one fitted distribution, non uniqueness rectangles filled small noise added y-axis order help visualization various fits. rather recommend use plotstyle ggplot qqcompcens ppcompcens compare fits various distributions provides clearer plot splitted facets (see ?graphcompcens). Figure 3.5: goodness--fit plots fits lognormal loglogistic distribution censored data: LC50 values salinity data set. Function bootdistcens equivalent bootdist censored data, except proposes nonparametric bootstrap. Indeed, obvious simulate censoring within parametric bootstrap resampling procedure. generic function quantile can also applied object class fitdistcens bootdistcens, continuous non-censored data. addition fit distributions censored non censored continuous data, package can also accomodate discrete variables, count numbers, using functions developped continuous non-censored data. functions provide somewhat different graphs statistics, taking account discrete nature modeled variable. discrete nature variable automatically recognized classical distribution fitted data (binomial, negative binomial, geometric, hypergeometric Poisson distributions) must indicated fixing argument discrete TRUE call functions cases. toxocara data set included package corresponds observation discrete variable. Numbers Toxocara cati parasites present digestive tract reported random sampling feral cats living Kerguelen island (Fromont et al. 2001). use illustrate case discrete data. fit discrete distribution discrete data maximum likelihood estimation requires procedure continuous non-censored data. example, using toxocara data set, Poisson negative binomial distributions can easily fitted. discrete distributions, plot object class fitdist simply provides two goodness--fit plots comparing empirical theoretical distributions density CDF. Functions cdfcomp denscomp can also used compare several plots data set, follows previous fits (Figure 3.6). Figure 3.6: Comparison fits negative binomial Poisson distribution numbers Toxocara cati parasites toxocara data set. fitting discrete distributions, Chi-squared statistic computed gofstat function using cells defined argument chisqbreaks cells automatically defined data order reach roughly number observations per cell. number roughly equal argument meancount, sligthly greater ties. choice define cells empirical distribution (data), theoretical distribution, done enable comparison Chi-squared values obtained different distributions fitted data set. arguments chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells, \\(n\\) length data set (Vose 2010). Using default option two previous fits compared follows, giving preference negative binomial distribution, Chi-squared statistics information criteria:","code":"data(\"salinity\") str(salinity) ## 'data.frame': 108 obs. of 2 variables: ## $ left : num 20 20 20 20 20 21.5 15 20 23.7 25 ... ## $ right: num NA NA NA NA NA 21.5 30 25 23.7 NA ... plotdistcens(salinity, NPMLE = FALSE) fsal.ln <- fitdistcens(salinity, \"lnorm\") fsal.ll <- fitdistcens(salinity, \"llogis\", start = list(shape = 5, scale = 40)) summary(fsal.ln) ## Fitting of the distribution ' lnorm ' By maximum likelihood on censored data ## Parameters ## estimate Std. Error ## meanlog 3.3854 0.06487 ## sdlog 0.4961 0.05455 ## Loglikelihood: -139.1 AIC: 282.1 BIC: 287.5 ## Correlation matrix: ## meanlog sdlog ## meanlog 1.0000 0.2938 ## sdlog 0.2938 1.0000 summary(fsal.ll) ## Fitting of the distribution ' llogis ' By maximum likelihood on censored data ## Parameters ## estimate Std. Error ## shape 3.421 0.4158 ## scale 29.930 1.9447 ## Loglikelihood: -140.1 AIC: 284.1 BIC: 289.5 ## Correlation matrix: ## shape scale ## shape 1.0000 -0.2022 ## scale -0.2022 1.0000 par(mfrow = c(2, 2)) cdfcompcens(list(fsal.ln, fsal.ll), legendtext = c(\"lognormal\", \"loglogistic \")) qqcompcens(fsal.ln, legendtext = \"lognormal\") ppcompcens(fsal.ln, legendtext = \"lognormal\") qqcompcens(list(fsal.ln, fsal.ll), legendtext = c(\"lognormal\", \"loglogistic \"), main = \"Q-Q plot with 2 dist.\") data(\"toxocara\") str(toxocara) ## 'data.frame': 53 obs. of 1 variable: ## $ number: int 0 0 0 0 0 0 0 0 0 0 ... (ftoxo.P <- fitdist(toxocara$number, \"pois\")) ## Fitting of the distribution ' pois ' by maximum likelihood ## Parameters: ## estimate Std. Error ## lambda 8.679 0.4047 (ftoxo.nb <- fitdist(toxocara$number, \"nbinom\")) ## Fitting of the distribution ' nbinom ' by maximum likelihood ## Parameters: ## estimate Std. Error ## size 0.3971 0.08289 ## mu 8.6803 1.93501 par(mfrow = c(1, 2)) denscomp(list(ftoxo.P, ftoxo.nb), legendtext = c(\"Poisson\", \"negative binomial\"), fitlty = 1) cdfcomp(list(ftoxo.P, ftoxo.nb), legendtext = c(\"Poisson\", \"negative binomial\"), fitlty = 1) gofstat(list(ftoxo.P, ftoxo.nb), fitnames = c(\"Poisson\", \"negative binomial\")) ## Chi-squared statistic: 31257 7.486 ## Degree of freedom of the Chi-squared distribution: 5 4 ## Chi-squared p-value: 0 0.1123 ## the p-value may be wrong with some theoretical counts < 5 ## Chi-squared table: ## obscounts theo Poisson theo negative binomial ## <= 0 14 0.009014 15.295 ## <= 1 8 0.078237 5.809 ## <= 3 6 1.321767 6.845 ## <= 4 6 2.131298 2.408 ## <= 9 6 29.827829 7.835 ## <= 21 6 19.626224 8.271 ## > 21 7 0.005631 6.537 ## ## Goodness-of-fit criteria ## Poisson negative binomial ## Akaike's Information Criterion 1017 322.7 ## Bayesian Information Criterion 1019 326.6"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"ccl","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Overview of the fitdistrplus package","text":"R package fitdistrplus allows easily fit distributions. main objective developing package provide tools helping R users fit distributions data. encouraged pursue work feedbacks users package various areas food environmental risk assessment, epidemiology, ecology, molecular biology, genomics, bioinformatics, hydraulics, mechanics, financial actuarial mathematics operations research. Indeed, package already used lot practionners academics simple MLE fits Voigt et al. (2014), MLE fits goodness--fit statistics Vaninsky (2013), MLE fits bootstrap Rigaux et al. (2014), MLE fits, bootstrap goodness--fit statistics (Larras, Montuelle, Bouchez 2013), MME fit Sato et al. (2013), censored MLE bootstrap Contreras, Huerta, Arnold (2013), graphic analysing (Anand, Yeturu, Chandra 2012), grouped-data fitting methods (Fu, Steiner, Costafreda 2012) generally Drake, Chalabi, Coker (2014). fitdistrplus package complementary distrMod package (Kohl Ruckdeschel 2010). distrMod provides even flexible way estimate distribution parameters use requires greater initial investment learn manipulate S4 classes methods developed distr-family packages. Many extensions fitdistrplus package planned future: target extend censored data methods moment available non-censored data, especially concerning goodness--fit evaluation fitting methods. also enlarge choice fitting methods non-censored data, proposing new goodness--fit distances (e.g., distances based quantiles) maximum goodness--fit estimation new types moments (e.g., limited expected values) moment matching estimation. last, consider case multivariate distribution fitting.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"acknowledgments","dir":"Articles","previous_headings":"","what":"Acknowledgments","title":"Overview of the fitdistrplus package","text":"package stage without stimulating contribution Régis Pouillot Jean-Baptiste Denis, especially conceptualization. also want thank Régis Pouillot valuable comments first version paper. authors gratefully acknowledges two anonymous referees Editor useful constructive comments. remaining errors, course, attributed authors alone.","code":""},{"path":[]},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"geometric-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Geometric distribution","title":"Starting values used in fitdistrplus","text":"MME used \\(\\hat p=1/(1+m_1)\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"negative-binomial-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Negative binomial distribution","title":"Starting values used in fitdistrplus","text":"MME used \\(\\hat n = m_1^2/(\\mu_2-m_1)\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"poisson-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Poisson distribution","title":"Starting values used in fitdistrplus","text":"MME MLE \\(\\hat \\lambda = m_1\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"binomial-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Binomial distribution","title":"Starting values used in fitdistrplus","text":"MME used \\[ Var[X]/E[X] = 1-p \\Rightarrow \\hat p = 1- \\mu_2/m_1. \\] size parameter \\[ \\hat n = \\lceil\\max(\\max_i x_i, m_1/\\hat p)\\rceil. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"logarithmic-distribution","dir":"Articles","previous_headings":"1 Discrete distributions","what":"logarithmic distribution","title":"Starting values used in fitdistrplus","text":"expectation simplifies small values \\(p\\) \\[ E[X] = -\\frac{1}{\\log(1-p)}\\frac{p}{1-p} \\approx -\\frac{1}{-p}\\frac{p}{1-p} =\\frac{1}{1-p}. \\] initial estimate \\[ \\hat p = 1-1/m_1. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"zero-truncated-distributions","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Zero truncated distributions","title":"Starting values used in fitdistrplus","text":"distribution distribution \\(X\\vert X>0\\) \\(X\\) follows particular discrete distributions. Hence initial estimate one used base R sample \\(x-1\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"zero-modified-distributions","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Zero modified distributions","title":"Starting values used in fitdistrplus","text":"MLE probability parameter empirical mass 0 \\(\\hat p_0=\\frac1n \\sum_i 1_{x_i=0}\\). estimators use classical estimator probability parameter \\(1-\\hat p_0\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"poisson-inverse-gaussian-distribution","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Poisson inverse Gaussian distribution","title":"Starting values used in fitdistrplus","text":"first two moments \\[ E[X]=\\mu, Var[X] = \\mu+\\phi\\mu^3. \\] initial estimate \\[ \\hat\\mu=m_1, \\hat\\phi = (\\mu_2 - m_1)/m_1^3. \\]","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"normal-distribution","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Normal distribution","title":"Starting values used in fitdistrplus","text":"MLE MME use empirical mean variance.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"lognormal-distribution","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Lognormal distribution","title":"Starting values used in fitdistrplus","text":"log sample follows normal distribution, normal log sample.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"beta-distribution-of-the-first-kind","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Beta distribution (of the first kind)","title":"Starting values used in fitdistrplus","text":"density function beta \\(\\mathcal (,b)\\) \\[ f_X(x) = \\frac{\\Gamma()\\Gamma(b)}{\\Gamma(+b)} x^{-1}(1-x)^{b-1}. \\] initial estimate MME \\[\\begin{equation} \\hat = m_1 \\delta, \\hat b = (1-m_1)\\delta, \\delta = \\frac{m_1(1-m_1)}{\\mu_2}-1, \\tag{2.1} \\end{equation}\\]","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"log-gamma","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Log-gamma","title":"Starting values used in fitdistrplus","text":"Use gamma initial values sample \\(\\log(x)\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"gumbel","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Gumbel","title":"Starting values used in fitdistrplus","text":"distribution function \\[ F(x) = \\exp(-\\exp(-\\frac{x-\\alpha}{\\theta})). \\] Let \\(q_1\\) \\(q_3\\) first third quartiles. \\[ \\left\\{\\begin{array} -\\theta\\log(-\\log(p_1)) = q_1-\\alpha \\\\ -\\theta\\log(-\\log(p_3)) = q_3-\\alpha \\end{array}\\right. \\Leftrightarrow \\left\\{\\begin{array} -\\theta\\log(-\\log(p_1))+\\theta\\log(-\\log(p_3)) = q_1-q_3 \\\\ \\alpha= \\theta\\log(-\\log(p_3)) + q_3 \\end{array}\\right. \\Leftrightarrow \\left\\{\\begin{array} \\theta= \\frac{q_1-q_3}{\\log(-\\log(p_3)) - \\log(-\\log(p_1))} \\\\ \\alpha= \\theta\\log(-\\log(p_3)) + q_3 \\end{array}\\right.. \\] Using median location parameter \\(\\alpha\\) yields initial estimate \\[ \\hat\\theta= \\frac{q_1-q_3}{\\log(\\log(4/3)) - \\log(\\log(4))}, \\hat\\alpha = \\hat\\theta\\log(\\log(2)) + q_2. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-gaussian-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Inverse Gaussian distribution","title":"Starting values used in fitdistrplus","text":"moments distribution \\[ E[X] = \\mu, Var[X] = \\mu^3\\phi. \\] Hence initial estimate \\(\\hat\\mu=m_1\\), \\(\\hat\\phi=\\mu_2/m_1^3\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"generalized-beta","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Generalized beta","title":"Starting values used in fitdistrplus","text":"distribution \\(\\theta X^{1/\\tau}\\) \\(X\\) beta distributed \\(\\mathcal (,b)\\) moments \\[ E[X] = \\theta \\beta(+1/\\tau, b)/\\beta(,b) = \\theta \\frac{\\Gamma(+1/\\tau)}{\\Gamma()}\\frac{\\Gamma(+b)}{\\Gamma(+b+1/\\tau)}, \\] \\[ E[X^2] = \\theta^2 \\frac{\\Gamma(+2/\\tau)}{\\Gamma()}\\frac{\\Gamma(+b)}{\\Gamma(+b+2/\\tau)}. \\] Hence large value \\(\\tau\\), \\[ E[X^2] /E[X] = \\theta \\frac{\\Gamma(+2/\\tau)}{\\Gamma(+b+2/\\tau)} \\frac{\\Gamma(+b+1/\\tau)}{\\Gamma(+1/\\tau)} \\approx \\theta. \\] Note MLE \\(\\theta\\) maximum use \\[ \\hat\\tau=3, \\hat\\theta = \\frac{m_2}{m_1}\\max_i x_i 1_{m_2>m_1} +\\frac{m_1}{m_2}\\max_i x_i 1_{m_2\\geq m_1}. \\] use beta initial estimate sample \\((\\frac{x_i}{\\hat\\theta})^{\\hat\\tau}\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"feller-pareto-family","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Feller-Pareto family","title":"Starting values used in fitdistrplus","text":"Feller-Pareto distribution distribution \\(X=\\mu+\\theta(1/B-1)^{1/\\gamma}\\) \\(B\\) follows beta distribution shape parameters \\(\\alpha\\) \\(\\tau\\). See details https://doi.org/10.18637/jss.v103.i06 Hence let \\(Y = (X-\\mu)/\\theta\\), \\[ \\frac{Y}{1+Y} = \\frac{X-\\mu}{\\theta+X-\\mu} = (1-B)^{1/\\gamma}. \\] \\(\\gamma\\) close 1, \\(\\frac{Y}{1+Y}\\) approximately beta distributed \\(\\tau\\) \\(\\alpha\\). log-likelihood \\[\\begin{equation} \\mathcal L(\\mu, \\theta, \\alpha, \\gamma, \\tau) = (\\tau \\gamma - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - (\\alpha+\\tau)\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) + n\\log(\\gamma) - n\\log(\\theta) -n \\log(\\beta(\\alpha,\\tau)). \\tag{2.2}. \\end{equation}\\] MLE \\(\\mu\\) minimum. gradient respect \\(\\theta, \\alpha, \\gamma, \\tau\\) \\[\\begin{equation} \\nabla \\mathcal L(\\mu, \\theta, \\alpha, \\gamma, \\tau) = \\begin{pmatrix} -(\\tau \\gamma - 1) \\sum_{} \\frac{x_i}{\\theta(x_i-\\mu)} + (\\alpha+\\tau)\\sum_i \\frac{x_i\\gamma(\\frac{x_i-\\mu}\\theta)^{\\gamma-1}}{\\theta^2(1+(\\frac{x_i-\\mu}\\theta)^\\gamma)} - n/\\theta \\\\ - \\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) -n(\\psi(\\tau) - \\psi(\\alpha+\\tau)) \\\\ (\\tau - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - (\\alpha+\\tau)\\sum_i \\frac{(\\frac{x_i-\\mu}\\theta)^\\gamma}{ 1+(\\frac{x_i-\\mu}\\theta)^\\gamma}\\log(\\frac{x_i-\\mu}\\theta) + n/\\gamma \\\\ (\\gamma - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - \\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) -n (\\psi(\\tau) - \\psi(\\alpha+\\tau)) \\end{pmatrix}. \\tag{2.3} \\end{equation}\\] Cancelling first component score \\(\\gamma=\\alpha=2\\), get \\[ -(2\\tau - 1) \\sum_{} \\frac{x_i}{\\theta(x_i-\\mu)} + (2+\\tau)\\sum_i \\frac{x_i 2(x_i-\\mu)}{\\theta^3(1+(\\frac{x_i-\\mu}\\theta)^2)} = \\frac{n}{\\theta} \\Leftrightarrow -(2\\tau - 1)\\theta^2\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} + (2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{(1+(\\frac{x_i-\\mu}\\theta)^2)} = \\theta^2 \\] \\[ \\Leftrightarrow (2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{1+(\\frac{x_i-\\mu}\\theta)^2} = (2\\tau - 1)\\theta^2\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} -1\\right) \\Leftrightarrow \\sqrt{ \\frac{(2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{1+(\\frac{x_i-\\mu}\\theta)^2} }{(2\\tau - 1)\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} -1\\right)} } = \\theta. \\] Neglecting unknown value \\(\\tau\\) denominator \\(\\theta\\), get \\(\\hat\\mu\\) set ((2.16)) \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac{ \\frac1n\\sum_i \\frac{x_i 2(x_i-\\hat\\mu)}{1+(x_i-\\hat\\mu)^2} }{\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\hat\\mu} -1\\right)} }. \\tag{2.4} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = (x_i - \\hat\\mu)/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)). Cancelling last component gradient leads \\[ (\\gamma - 1) \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - \\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) = \\psi(\\tau) - \\psi(\\alpha+\\tau) \\Leftrightarrow (\\gamma - 1) \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) = \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) . \\] Neglecting value \\(\\gamma\\) right-hand side obtain \\[\\begin{equation} \\hat\\gamma = 1+ \\frac{ \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)) }{ \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) }. \\tag{2.5} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"transformed-beta","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Transformed beta","title":"Starting values used in fitdistrplus","text":"Feller-Pareto \\(\\mu=0\\). first component (2.3) simplifies \\(\\gamma=\\alpha=2\\) \\[ -(2\\tau - 1) \\sum_{} \\frac{x_i}{\\theta(x_i)} + (2+\\tau)\\sum_i \\frac{2x_i^2}{\\theta^3(1+(\\frac{x_i}\\theta)^2)} = \\frac{n}{\\theta} \\Leftrightarrow -(2\\tau - 1) \\theta^2 + (2+\\tau)\\frac1n\\sum_i \\frac{2x_i^2}{1+(\\frac{x_i}\\theta)^2} = \\theta^2 \\] \\[ \\theta^2=\\frac{2+\\tau}{2\\tau}\\frac1n\\sum_i \\frac{2x_i^2}{1+(\\frac{x_i}\\theta)^2}. \\] Neglecting unknown value \\(\\tau\\) denominator \\(\\theta\\), get \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac1n\\sum_i \\frac{2x_i^2}{1+x_i^2} }. \\tag{2.6} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = x_i/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)). Similar Feller-Pareto, set \\[\\begin{equation} \\hat\\gamma = 1+ \\frac{ \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+\\frac{x_i}\\theta) }{ \\frac1n\\sum_{} \\log(\\frac{x_i}\\theta) }. \\tag{2.5} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"generalized-pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Generalized Pareto","title":"Starting values used in fitdistrplus","text":"Feller-Pareto \\(\\mu=0\\) \\(\\gamma=1\\). first component (2.3) simplifies \\(\\gamma=2\\) \\[ -(\\tau - 1) \\frac{n}{\\theta} + (2+\\tau)\\sum_i \\frac{x_i}{\\theta^2(1+\\frac{x_i}\\theta} = n/\\theta \\Leftrightarrow -(\\tau - 1) \\theta + (2+\\tau)\\frac1n\\sum_i \\frac{x_i}{(1+\\frac{x_i}\\theta} = \\theta. \\] Neglecting unknown value \\(\\tau\\) leads \\[\\begin{equation} \\hat\\theta = \\frac1n\\sum_i \\frac{x_i}{1+x_i} \\tag{2.7} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = x_i/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"burr","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Burr","title":"Starting values used in fitdistrplus","text":"Burr Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\gamma)^{-\\alpha}. \\] Using median \\(q_2\\), \\[ \\log(1/2) = - \\alpha \\log(1+(q_2/\\theta)^\\gamma). \\] initial value \\[\\begin{equation} \\alpha = \\frac{\\log(2)}{\\log(1+(q_2/\\theta)^\\gamma)}, \\tag{2.8} \\end{equation}\\] first component (2.3) simplifies \\(\\gamma=\\alpha=2\\), \\(\\tau=1\\), \\(\\mu=0\\). \\[ - n/\\theta + 3\\sum_i \\frac{2x_i(\\frac{x_i}\\theta)}{\\theta^2(1+(\\frac{x_i}\\theta)^2)} = n/\\theta \\Leftrightarrow \\theta^2\\frac1n\\sum_i \\frac{2x_i(\\frac{x_i}\\theta)}{(1+(\\frac{x_i}\\theta)^2)} = 2/3. \\] Neglecting unknown value denominator \\(\\theta\\), get \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac{2}{3 \\frac1n\\sum_i \\frac{2x_i^2}{1+(x_i)^2} } }. \\tag{2.6} \\end{equation}\\] use \\(\\hat\\gamma\\) (2.5) \\(\\tau=1\\) \\(\\alpha=2\\) previous \\(\\hat\\theta\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"loglogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Loglogistic","title":"Starting values used in fitdistrplus","text":"Loglogistic Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\), \\(\\alpha=1\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\gamma)^{-1}. \\] \\[ \\frac1{1-F(x)}-1 = (x/\\theta)^\\gamma \\Leftrightarrow \\log(\\frac{F(x)}{1-F(x)}) = \\gamma\\log(x/\\theta). \\] Let \\(q_1\\) \\(q_3\\) first third quartile. \\[ \\log(\\frac{1/3}{2/3})= \\gamma\\log(q_1/\\theta), \\log(\\frac{2/3}{1/3})= \\gamma\\log(q_3/\\theta) \\Leftrightarrow -\\log(2)= \\gamma\\log(q_1/\\theta), \\log(2)= \\gamma\\log(q_3/\\theta). \\] difference previous equations simplifies \\[ \\hat\\gamma=\\frac{2\\log(2)}{\\log(q_3/q_1)}. \\] sum previous equations \\[ 0 = \\gamma\\log(q_1)+\\gamma\\log(q_3) - 2\\gamma\\log(\\theta). \\] \\[\\begin{equation} \\hat\\theta = \\frac12 e^{\\log(q_1q_3)}. \\tag{2.9} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"paralogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Paralogistic","title":"Starting values used in fitdistrplus","text":"Paralogistic Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\), \\(\\alpha=\\gamma\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\alpha)^{-\\alpha}. \\] \\[ \\log(1-F(x)) = -\\alpha \\log(1+(x/\\theta)^\\alpha). \\] log-likelihood \\[\\begin{equation} \\mathcal L(\\theta, \\alpha) = ( \\alpha - 1) \\sum_{} \\log(\\frac{x_i}\\theta) - (\\alpha+1)\\sum_i \\log(1+(\\frac{x_i}\\theta)^\\alpha) + 2n\\log(\\alpha) - n\\log(\\theta). \\tag{2.10} \\end{equation}\\] gradient respect \\(\\theta\\), \\(\\alpha\\) \\[ \\begin{pmatrix} ( \\alpha - 1)\\frac{-n}{\\theta} - (\\alpha+1)\\sum_i \\frac{-x_i\\alpha(x_i/\\theta)^{\\alpha-1}}{1+(\\frac{x_i}\\theta)^\\alpha} - n/\\theta \\\\ \\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+(\\frac{x_i}\\theta)^\\alpha }) - (\\alpha+1)\\sum_i \\frac{(\\frac{x_i}\\theta)^\\alpha \\log(x_i/\\theta)}{1+(\\frac{x_i}\\theta)^\\alpha} + 2n/\\alpha \\\\ \\end{pmatrix}. \\] first component cancels \\[ - (\\alpha+1)\\sum_i \\frac{-x_i\\alpha(x_i/\\theta)^{\\alpha-1}}{1+(\\frac{x_i}\\theta)^\\alpha} = \\alpha n/\\theta \\Leftrightarrow (\\alpha+1)\\frac1n\\sum_i \\frac{ (x_i)^{\\alpha+1}}{1+(\\frac{x_i}\\theta)^\\alpha} = \\theta^\\alpha. \\] second component cancels \\[ \\frac1n\\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+(\\frac{x_i}\\theta)^\\alpha }) = -2/\\alpha +(\\alpha+1)\\frac1n\\sum_i \\frac{(\\frac{x_i}\\theta)^\\alpha \\log(x_i/\\theta)}{1+(\\frac{x_i}\\theta)^\\alpha}. \\] Choosing \\(\\theta=1\\), \\(\\alpha=2\\) sums leads \\[ \\frac1n\\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+x_i^2 }) - \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} = -2/\\alpha +(\\alpha)\\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2}. \\] Initial estimators \\[\\begin{equation} \\hat\\alpha = \\frac{ \\frac1n\\sum_{} \\log(\\frac{ x_i}{1+x_i^2 }) - \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} }{ \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} - 2 }, \\tag{2.11} \\end{equation}\\] \\[\\begin{equation} \\hat\\theta = (\\hat\\alpha+1)\\frac1n\\sum_i \\frac{ (x_i)^{\\hat\\alpha+1}}{1+(x_i)^{\\hat\\alpha}}. \\tag{2.12} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-burr","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse Burr","title":"Starting values used in fitdistrplus","text":"Use Burr estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-paralogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse paralogistic","title":"Starting values used in fitdistrplus","text":"Use paralogistic estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse pareto","title":"Starting values used in fitdistrplus","text":"Use pareto estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-iv","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto IV","title":"Starting values used in fitdistrplus","text":"survival function \\[ 1-F(x) = \\left(1+ \\left(\\frac{x-\\mu}{\\theta}\\right)^{\\gamma} \\right)^{-\\alpha}, \\] see ?Pareto4 actuar. first third quartiles \\(q_1\\) \\(q_3\\) verify \\[ ((\\frac34)^{-1/\\alpha}-1)^{1/\\gamma} = \\frac{q_1-\\mu}{\\theta}, ((\\frac14)^{-1/\\alpha}-1)^{1/\\gamma} = \\frac{q_3-\\mu}{\\theta}. \\] Hence get two useful relations \\[\\begin{equation} \\gamma = \\frac{ \\log\\left( \\frac{ (\\frac43)^{1/\\alpha}-1 }{ (4)^{1/\\alpha}-1 } \\right) }{ \\log\\left(\\frac{q_1-\\mu}{q_3-\\mu}\\right) }, \\tag{2.13} \\end{equation}\\] \\[\\begin{equation} \\theta = \\frac{q_1- q_3 }{ ((\\frac43)^{1/\\alpha}-1)^{1/\\gamma} - ((4)^{1/\\alpha}-1)^{1/\\gamma} }. \\tag{2.14} \\end{equation}\\] log-likelihood Pareto 4 sample (see Equation (5.2.94) Arnold (2015) updated Goulet et al. notation) \\[ \\mathcal L(\\mu,\\theta,\\gamma,\\alpha) = (\\gamma -1) \\sum_i \\log(\\frac{x_i-\\mu}{\\theta}) -(\\alpha+1)\\sum_i \\log(1+ (\\frac{x_i-\\mu}{\\theta})^{\\gamma}) +n\\log(\\gamma) -n\\log(\\theta)+n\\log(\\alpha). \\] Cancelling derivate \\(\\mathcal L(\\mu,\\theta,\\gamma,\\alpha)\\) respect \\(\\alpha\\) leads \\[\\begin{equation} \\alpha =n/\\sum_i \\log(1+ (\\frac{x_i-\\mu}{\\theta})^{\\gamma}). \\tag{2.15} \\end{equation}\\] MLE threshold parameter \\(\\mu\\) minimum. initial estimate slightly minimum order observations strictly \\[\\begin{equation} \\hat\\mu = \\left\\{ \\begin{array}{ll} (1-\\epsilon) \\min_i x_i & \\text{} \\min_i x_i <0 \\\\ (1+\\epsilon)\\min_i x_i & \\text{} \\min_i x_i \\geq 0 \\\\ \\end{array} \\right. . \\tag{2.16} \\end{equation}\\] \\(\\epsilon=0.05\\). Initial parameter estimation \\(\\hat\\mu\\), \\(\\alpha^\\star = 2\\) , \\(\\hat\\gamma\\) (2.13) \\(\\alpha^\\star\\), \\(\\hat\\theta\\) (2.14) \\(\\alpha^\\star\\) \\(\\hat\\gamma\\), \\(\\hat\\alpha\\) (2.15) \\(\\hat\\mu\\), \\(\\hat\\theta\\) \\(\\hat\\gamma\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-iii","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto III","title":"Starting values used in fitdistrplus","text":"Pareto III corresponds Pareto IV \\(\\alpha=1\\). \\[\\begin{equation} \\gamma = \\frac{ \\log\\left( \\frac{ \\frac43-1 }{ 4-1 } \\right) }{ \\log\\left(\\frac{q_1-\\mu}{q_3-\\mu}\\right) }, \\label{eq:pareto3:gamma:relation} \\end{equation}\\] \\[\\begin{equation} \\theta = \\frac{ (\\frac13)^{1/\\gamma} - (3)^{1/\\gamma} }{q_1- q_3 }. \\label{eq:pareto3:theta:relation} \\end{equation}\\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\hat\\gamma\\) \\(\\eqref{eq:pareto3:gamma:relation}\\), \\(\\hat\\theta\\) \\(\\eqref{eq:pareto3:theta:relation}\\) \\(\\hat\\gamma\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-ii","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto II","title":"Starting values used in fitdistrplus","text":"Pareto II corresponds Pareto IV \\(\\gamma=1\\). \\[\\begin{equation} \\theta = \\frac{ (\\frac43)^{1/\\alpha} - 4^{1/\\alpha} }{q_1- q_3 }. \\label{eq:pareto2:theta:relation} \\end{equation}\\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\alpha^\\star = 2\\) , \\(\\hat\\theta\\) \\(\\eqref{eq:pareto4:theta:relation}\\) \\(\\alpha^\\star\\) \\(\\gamma=1\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto4:alpha:relation}\\) \\(\\hat\\mu\\), \\(\\hat\\theta\\) \\(\\gamma=1\\),","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-i","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto I","title":"Starting values used in fitdistrplus","text":"Pareto corresponds Pareto IV \\(\\gamma=1\\), \\(\\mu=\\theta\\). MLE \\[\\begin{equation} \\hat\\mu = \\min_i X_i, \\hat\\alpha = \\left(\\frac1n \\sum_{=1}^n \\log(X_i/\\hat\\mu) \\right)^{-1}. \\label{eq:pareto1:alpha:mu:relation} \\end{equation}\\] can rewritten geometric mean sample \\(G_n = (\\prod_{=1}^n X_i)^{1/n}\\) \\[ \\hat\\alpha = \\log(G_n/\\hat\\mu). \\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto1:alpha:mu:relation}\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto","title":"Starting values used in fitdistrplus","text":"Pareto corresponds Pareto IV \\(\\gamma=1\\), \\(\\mu=0\\). \\[\\begin{equation} \\theta = \\frac{ (\\frac43)^{1/\\alpha} - 4^{1/\\alpha} }{q_1- q_3 }. \\label{eq:pareto:theta:relation} \\end{equation}\\] Initial parameter estimation \\[ \\alpha^\\star = \\max(2, 2(m_2-m_1^2)/(m_2-2m_1^2)), \\] \\(m_i\\) empirical raw moment order \\(\\), \\(\\hat\\theta\\) \\(\\eqref{eq:pareto4:theta:relation}\\) \\(\\alpha^\\star\\) \\(\\gamma=1\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto4:alpha:relation}\\) \\(\\mu=0\\), \\(\\hat\\theta\\) \\(\\gamma=1\\).","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"transformed-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Transformed gamma distribution","title":"Starting values used in fitdistrplus","text":"log-likelihood given \\[ \\mathcal L(\\alpha,\\tau,\\theta) = n\\log(\\tau) + \\alpha\\tau\\sum_i \\log(x_i/\\theta) -\\sum_i (x_i/\\theta)^\\tau - \\sum_i\\log(x_i) - n\\log(Gamma(\\alpha)). \\] gradient respect \\(\\alpha,\\tau,\\theta\\) given \\[ \\begin{pmatrix} \\tau- n\\psi(\\alpha)) \\\\ n/\\tau + \\alpha\\sum_i \\log(x_i/\\theta) -\\sum_i (x_i/\\theta)^{\\tau} \\log(x_i/\\theta) \\\\ -\\alpha\\tau /\\theta +\\sum_i \\tau \\frac{x_i}{\\theta^2}(x_i/\\theta)^{\\tau-1} \\end{pmatrix}. \\] compute moment-estimator gamma \\(\\eqref{eq:gamma:relation}\\) \\[ \\hat\\alpha = m_2^2/\\mu_2, \\hat\\theta= \\mu_2/m_1. \\] cancelling first component gradient set \\[ \\hat\\tau = \\frac{\\psi(\\hat\\alpha)}{\\frac1n\\sum_i \\log(x_i/\\hat\\theta) }. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"gamma distribution","title":"Starting values used in fitdistrplus","text":"Transformed gamma \\(\\tau=1\\) compute moment-estimator given \\[\\begin{equation} \\hat\\alpha = m_2^2/\\mu_2, \\hat\\theta= \\mu_2/m_1. \\label{eq:gamma:relation} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"weibull-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Weibull distribution","title":"Starting values used in fitdistrplus","text":"Transformed gamma \\(\\alpha=1\\) Let \\(\\tilde m=\\frac1n\\sum_i \\log(x_i)\\) \\(\\tilde v=\\frac1n\\sum_i (\\log(x_i) - \\tilde m)^2\\). use approximate MME \\[ \\hat\\tau = 1.2/sqrt(\\tilde v), \\hat\\theta = exp(\\tilde m + 0.572/\\hat \\tau). \\] Alternatively, can use distribution function \\[ F(x) = 1 - e^{-(x/\\sigma)^\\tau} \\Rightarrow \\log(-\\log(1-F(x))) = \\tau\\log(x) - \\tau\\log(\\theta), \\] Hence QME Weibull \\[ \\tilde\\tau = \\frac{ \\log(-\\log(1-p_1)) - \\log(-\\log(1-p_2)) }{ \\log(x_1) - \\log(x_2) }, \\tilde\\tau = x_3/(-\\log(1-p_3))^{1/\\tilde\\tau} \\] \\(p_1=1/4\\), \\(p_2=3/4\\), \\(p_3=1/2\\), \\(x_i\\) corresponding empirical quantiles. Initial parameters \\(\\tilde\\tau\\) \\(\\tilde\\theta\\) unless empirical quantiles \\(x_1=x_2\\), case use \\(\\hat\\tau\\), \\(\\hat\\theta\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"exponential-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Exponential distribution","title":"Starting values used in fitdistrplus","text":"MLE MME \\(\\hat\\lambda = 1/m_1.\\)","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-transformed-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse transformed gamma distribution","title":"Starting values used in fitdistrplus","text":"transformed gamma distribution \\((1/x_i)_i\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse gamma distribution","title":"Starting values used in fitdistrplus","text":"compute moment-estimator \\[ \\hat\\alpha = (2m_2-m_1^2)/(m_2-m_1^2), \\hat\\theta= m_1m_2/(m_2-m_1^2). \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-weibull-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse Weibull distribution","title":"Starting values used in fitdistrplus","text":"use QME.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-exponential","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse exponential","title":"Starting values used in fitdistrplus","text":"transformed gamma distribution \\((1/x_i)_i\\).","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"general-books","dir":"Articles","previous_headings":"3 Bibliography","what":"General books","title":"Starting values used in fitdistrplus","text":"N. L. Johnson, S. Kotz, N. Balakrishnan (1994). Continuous univariate distributions, Volume 1, Wiley. N. L. Johnson, S. Kotz, N. Balakrishnan (1995). Continuous univariate distributions, Volume 2, Wiley. N. L. Johnson, . W. Kemp, S. Kotz (2008). Univariate discrete distributions, Wiley. G. Wimmer (1999), Thesaurus univariate discrete probability distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"books-dedicated-to-a-distribution-family","dir":"Articles","previous_headings":"3 Bibliography","what":"Books dedicated to a distribution family","title":"Starting values used in fitdistrplus","text":"M. Ahsanullah, B.M. Golam Kibria, M. Shakil (2014). Normal Student’s t Distributions Applications, Springer. B. C. Arnold (2010). Pareto Distributions, Chapman Hall. . Azzalini (2013). Skew-Normal Related Families. N. Balakrishnan (2014). Handbook Logistic Distribution, CRC Press.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"books-with-applications","dir":"Articles","previous_headings":"3 Bibliography","what":"Books with applications","title":"Starting values used in fitdistrplus","text":"C. Forbes, M. Evans, N. Hastings, B. Peacock (2011). Statistical Distributions, Wiley. Z. . Karian, E. J. Dudewicz, K. Shimizu (2010). Handbook Fitting Statistical Distributions R, CRC Press. K. Krishnamoorthy (2015). Handbook Statistical Distributions Applications, Chapman Hall. Klugman, S., Panjer, H. & Willmot, G. (2019). Loss Models: Data Decisions, 5th ed., John Wiley & Sons.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Marie-Laure Delignette-Muller. Author. Christophe Dutang. Author. Regis Pouillot. Contributor. Jean-Baptiste Denis. Contributor. Aurélie Siberchicot. Author, maintainer.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Marie Laure Delignette-Muller, Christophe Dutang (2015). fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34. DOI 10.18637/jss.v064.i04.","code":"@Article{, title = {{fitdistrplus}: An {R} Package for Fitting Distributions}, author = {Marie Laure Delignette-Muller and Christophe Dutang}, journal = {Journal of Statistical Software}, year = {2015}, volume = {64}, number = {4}, pages = {1--34}, doi = {10.18637/jss.v064.i04}, }"},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"help-to-fit-of-a-parametric-distribution-to-non-censored-or-censored-data","dir":"","previous_headings":"","what":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Please note! Since January 2024, repository belonged lbbe-software organization. avoid confusion, strongly recommend updating existing local clones point new repository URL. can using git remote command line: git remote set-url origin git@github.com:lbbe-software/fitdistrplus.git git remote set-url origin https://github.com/lbbe-software/fitdistrplus.git fitdistrplus extends fitdistr() function (MASS package) several functions help fit parametric distribution non-censored censored data. Censored data may contain left censored, right censored interval censored values, several lower upper bounds. addition maximum likelihood estimation (MLE), package provides moment matching (MME), quantile matching (QME) maximum goodness--fit estimation (MGE) methods (available non-censored data). Weighted versions MLE, MME QME available. fitdistrplus allows fit probability distribution provided user restricted base R distributions (see ?Distributions). strongly encourage users visit CRAN task view Distributions proposed Dutang, Kiener & Swihart (2024).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"the-package","dir":"","previous_headings":"","what":"The package","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"stable version fitdistrplus can installed CRAN using: development version fitdistrplus can installed GitHub (remotes needed): Finally load package current R session following R command:","code":"install.packages(\"fitdistrplus\") if (!requireNamespace(\"remotes\", quietly = TRUE)) install.packages(\"remotes\") remotes::install_github(\"lbbe-software/fitdistrplus\") library(fitdistrplus)"},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"documentation","dir":"","previous_headings":"","what":"Documentation","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Four vignettes attached fitdistrplus package. Two beginners Overview fitdistrplus package Frequently Asked Questions last two vignettes deal advanced topics optimization algorithm choose? Starting values used fitdistrplus","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"authors--contacts","dir":"","previous_headings":"","what":"Authors & Contacts","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Please read FAQ contacting authors Marie-Laure Delignette-Muller: marielaure.delignettemuller<<@))vetagro-sup.fr Christophe Dutang: dutangc<<@))gmail.com Aurélie Siberchicot: aurelie.siberchicot<<@))univ-lyon1.fr Issues can reported fitdistrplus-issues.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"use fitdistrplus, cite: Marie Laure Delignette-Muller, Christophe Dutang (2015). fitdistrplus: R Package Fitting Distributions. Journal Statistical Software. https://www.jstatsoft.org/article/view/v064i04 DOI 10.18637/jss.v064.i04.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":null,"dir":"Reference","previous_headings":"","what":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"cdfband plots empirical cumulative distribution function bootstraped pointwise confidence intervals probabilities quantiles.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"","code":"CIcdfplot(b, CI.output, CI.type = \"two.sided\", CI.level = 0.95, CI.col = \"red\", CI.lty = 2, CI.fill = NULL, CI.only = FALSE, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datapch, datacol, fitlty, fitcol, fitlwd, horizontals = TRUE, verticals = FALSE, do.points = TRUE, use.ppoints = TRUE, a.ppoints = 0.5, name.points = NULL, lines01 = FALSE, plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"b One \"bootdist\" object. CI.output quantity (bootstraped) bootstraped confidence intervals computed: either \"probability\" \"quantile\"). CI.type Type confidence intervals : either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. CI.col color confidence intervals. CI.lty line type confidence intervals. CI.fill color fill confidence area. Default NULL corresponding filling. CI.logical whether plot empirical fitted distribution functions confidence intervals. Default FALSE. xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot, see also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datapch integer specifying symbol used plotting data points, see also points (non censored data). datacol specification color used plotting data points. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. fitlty (vector ) line type(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. horizontals TRUE, draws horizontal lines step empirical cdf function (non censored data). See also plot.stepfun. verticals TRUE, draws also vertical lines empirical cdf function. taken account horizontals=TRUE (non censored data). .points logical; TRUE, also draw points x-locations. Default TRUE (non censored data). use.ppoints TRUE, probability points empirical distribution defined using function ppoints (1:n - .ppoints)/(n - 2a.ppoints + 1) (non censored data). FALSE, probability points simply defined (1:n)/n. argument ignored discrete data. .ppoints use.ppoints=TRUE, passed function ppoints (non censored data). name.points Label vector points drawn .e. .points = TRUE (non censored data). lines01 logical plot two horizontal lines h=0 h=1 cdfcomp. plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). ... graphical arguments passed matlines polygon, respectively CI.fill=FALSE CI.fill=TRUE.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"CIcdfplot provides plot empirical distribution using cdfcomp cdfcompcens, bootstraped pointwise confidence intervals probabilities (y values) quantiles (x values). interval computed evaluating quantity interest (probability associated x value quantile associated y value) using bootstraped values parameters get bootstraped sample quantity interest calculating percentiles sample get confidence interval (classically 2.5 97.5 percentiles 95 percent confidence level). CI.fill != NULL, whole confidence area filled color CI.fill thanks function polygon, otherwise borders drawn thanks function matline. graphical arguments can passed functions using three dots arguments ....","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. if (requireNamespace (\"ggplot2\", quietly = TRUE)) {ggplotEx <- TRUE} # (1) Fit of an exponential distribution # set.seed(123) s1 <- rexp(50, 1) f1 <- fitdist(s1, \"exp\") b1 <- bootdist(f1, niter= 11) #voluntarily low to decrease computation time # plot 95 percent bilateral confidence intervals on y values (probabilities) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", plotstyle = \"ggplot\") # \\donttest{ # plot of the previous intervals as a band CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.fill = \"pink\", CI.col = \"red\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.fill = \"pink\", CI.col = \"red\", plotstyle = \"ggplot\") # plot of the previous intervals as a band without empirical and fitted dist. functions CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"red\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"red\", plotstyle = \"ggplot\") # same plot without contours CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"pink\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"pink\", plotstyle = \"ggplot\") # plot 95 percent bilateral confidence intervals on x values (quantiles) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quantile\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quantile\", plotstyle = \"ggplot\") # plot 95 percent unilateral confidence intervals on quantiles CIcdfplot(b1, CI.level = 95/100, CI.output = \"quant\", CI.type = \"less\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1) if (ggplotEx) CIcdfplot(b1, CI.level = 95/100, CI.output = \"quant\", CI.type = \"less\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1, plotstyle = \"ggplot\") CIcdfplot(b1, CI.level= 95/100, CI.output = \"quant\", CI.type = \"greater\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1) if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quant\", CI.type = \"greater\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1, plotstyle = \"ggplot\") # (2) Fit of a normal distribution on acute toxicity log-transformed values of # endosulfan for nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, with their # confidence intervals, from a small number of bootstrap # iterations to satisfy CRAN running times constraint and plot of the band # representing pointwise confidence intervals on any quantiles (any HCx values) # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(endosulfan) log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) namesATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa fln <- fitdist(log10ATV, \"norm\") bln <- bootdist(fln, bootmethod =\"param\", niter=101) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.844443 2.190122 2.565053 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.334340 1.697255 2.099378 #> 97.5 % 2.531564 2.770455 3.053706 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlim = c(0,5), name.points=namesATV) if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlim = c(0,5), name.points=namesATV, plotstyle = \"ggplot\") # (3) Same type of example as example (2) from ecotoxicology # with censored data # data(salinity) log10LC50 <-log10(salinity) fln <- fitdistcens(log10LC50,\"norm\") bln <- bootdistcens(fln, niter=101) (HC5ln <- quantile(bln,probs = 0.05)) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 #> estimate 1.11584 #> Median of bootstrap estimates #> p=0.05 #> estimate 1.120901 #> #> two-sided 95 % CI of each quantile #> p=0.05 #> 2.5 % 1.045539 #> 97.5 % 1.191979 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\",xlim=c(0.5,2),lines01 = TRUE) if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\",xlim=c(0.5,2),lines01 = TRUE, plotstyle = \"ggplot\") # zoom around the HC5 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\", lines01 = TRUE, xlim = c(0.8, 1.5), ylim = c(0, 0.1)) abline(h = 0.05, lty = 2) # line corresponding to a CDF of 5 percent if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\", lines01 = TRUE, xlim = c(0.8, 1.5), ylim = c(0, 0.1), plotstyle = \"ggplot\") + ggplot2::geom_hline(yintercept = 0.05, lty = 2) # line corresponding to a CDF of 5 percent # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Provide function prepare data frame needed fitdistcens() data classically coded using Surv() function survival package","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"","code":"Surv2fitdistcens(time, time2, event, type = c('right', 'left', 'interval', 'interval2'))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"time right censored data, follow time. interval data, first argument starting time interval. event status indicator, normally 0=alive, 1=dead. choices TRUE/FALSE (TRUE = death) 1/2 (2=death). interval censored data, status indicator 0=right censored, 1=event time, 2=left censored, 3=interval censored. factor data, assume two levels second level coding death. time2 ending time interval interval censored. Intervals assumed open left closed right, (start, end]. type character string specifying type censoring. Possible values \"right\", \"left\", \"interval\", \"interval2\".","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Surv2fitdistcens makes data.frame two columns respectively named left right, describing observed value interval required fitdistcens(): left column contains either NA left-censored observations, left bound interval interval-censored observations, observed value non-censored observations. right column contains either NA right-censored observations, right bound interval interval censored observations, observed value non-censored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Surv2fitdistcens returns data.frame two columns respectively named left right.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"","code":"# (1) randomized fictive survival data - right-censored # origdata <- data.frame(rbind( c( 43.01, 55.00, 0), c( 36.37, 47.17, 0), c( 33.10, 34.51, 0), c( 71.00, 81.15, 1), c( 80.89, 81.91, 1), c( 67.81, 78.48, 1), c( 73.98, 76.92, 1), c( 53.19, 54.80, 1))) colnames(origdata) <- c(\"AgeIn\", \"AgeOut\", \"Death\") # add of follow-up time (for type = \"right\" in Surv()) origdata$followuptime <- origdata$AgeOut - origdata$AgeIn origdata #> AgeIn AgeOut Death followuptime #> 1 43.01 55.00 0 11.99 #> 2 36.37 47.17 0 10.80 #> 3 33.10 34.51 0 1.41 #> 4 71.00 81.15 1 10.15 #> 5 80.89 81.91 1 1.02 #> 6 67.81 78.48 1 10.67 #> 7 73.98 76.92 1 2.94 #> 8 53.19 54.80 1 1.61 ### use of default survival type \"right\" # in Surv() survival::Surv(time = origdata$followuptime, event = origdata$Death, type = \"right\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 # for fitdistcens() Surv2fitdistcens(origdata$followuptime, event = origdata$Death, type = \"right\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # use of survival type \"interval\" # in Surv() survival::Surv(time = origdata$followuptime, time2 = origdata$followuptime, event = origdata$Death, type = \"interval\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 # for fitdistcens() Surv2fitdistcens(time = origdata$followuptime, time2 = origdata$followuptime, event = origdata$Death, type = \"interval\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # use of survival type \"interval2\" origdata$survivalt1 <- origdata$followuptime origdata$survivalt2 <- origdata$survivalt1 origdata$survivalt2[1:3] <- Inf origdata #> AgeIn AgeOut Death followuptime survivalt1 survivalt2 #> 1 43.01 55.00 0 11.99 11.99 Inf #> 2 36.37 47.17 0 10.80 10.80 Inf #> 3 33.10 34.51 0 1.41 1.41 Inf #> 4 71.00 81.15 1 10.15 10.15 10.15 #> 5 80.89 81.91 1 1.02 1.02 1.02 #> 6 67.81 78.48 1 10.67 10.67 10.67 #> 7 73.98 76.92 1 2.94 2.94 2.94 #> 8 53.19 54.80 1 1.61 1.61 1.61 survival::Surv(time = origdata$survivalt1, time2 = origdata$survivalt2, type = \"interval2\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 Surv2fitdistcens(origdata$survivalt1, time2 = origdata$survivalt2, type = \"interval2\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # (2) Other examples with various left, right and interval censored values # # with left censored data (d1 <- data.frame(time = c(2, 5, 3, 7), ind = c(0, 1, 1, 1))) #> time ind #> 1 2 0 #> 2 5 1 #> 3 3 1 #> 4 7 1 survival::Surv(time = d1$time, event = d1$ind, type = \"left\") #> [1] 2- 5 3 7 Surv2fitdistcens(time = d1$time, event = d1$ind, type = \"left\") #> left right #> 1 NA 2 #> 2 5 5 #> 3 3 3 #> 4 7 7 (d1bis <- data.frame(t1 = c(2, 5, 3, 7), t2 = c(2, 5, 3, 7), censtype = c(2, 1, 1, 1))) #> t1 t2 censtype #> 1 2 2 2 #> 2 5 5 1 #> 3 3 3 1 #> 4 7 7 1 survival::Surv(time = d1bis$t1, time2 = d1bis$t2, event = d1bis$censtype, type = \"interval\") #> [1] 2- 5 3 7 Surv2fitdistcens(time = d1bis$t1, time2 = d1bis$t2, event = d1bis$censtype, type = \"interval\") #> left right #> 1 NA 2 #> 2 5 5 #> 3 3 3 #> 4 7 7 # with interval, left and right censored data (d2 <- data.frame(t1 = c(-Inf, 2, 3, 4, 3, 7), t2 = c(2, 5, 3, 7, 8, Inf))) #> t1 t2 #> 1 -Inf 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 Inf survival::Surv(time = d2$t1, time2 = d2$t2, type = \"interval2\") #> [1] 2- [2, 5] 3 [4, 7] [3, 8] 7+ Surv2fitdistcens(time = d2$t1, time2 = d2$t2, type = \"interval2\") #> left right #> 1 NA 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 NA (d2bis <- data.frame(t1 = c(2, 2, 3, 4, 3, 7), t2 = c(2, 5, 3, 7, 8, 7), censtype = c(2,3,1,3,3,0))) #> t1 t2 censtype #> 1 2 2 2 #> 2 2 5 3 #> 3 3 3 1 #> 4 4 7 3 #> 5 3 8 3 #> 6 7 7 0 survival::Surv(time = d2bis$t1, time2 = d2bis$t2, event = d2bis$censtype, type = \"interval\") #> [1] 2- [2, 5] 3 [4, 7] [3, 8] 7+ Surv2fitdistcens(time = d2bis$t1, time2 = d2bis$t2, event = d2bis$censtype, type = \"interval\") #> left right #> 1 NA 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 NA"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap simulation of uncertainty for non-censored data — bootdist","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Uses parametric nonparametric bootstrap resampling order simulate uncertainty parameters distribution fitted non-censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"","code":"bootdist(f, bootmethod = \"param\", niter = 1001, silent = TRUE, parallel = c(\"no\", \"snow\", \"multicore\"), ncpus) # S3 method for class 'bootdist' print(x, ...) # S3 method for class 'bootdist' plot(x, main = \"Bootstrapped values of parameters\", enhance = FALSE, trueval = NULL, rampcol = NULL, nbgrid = 100, nbcol = 100, ...) # S3 method for class 'bootdist' summary(object, ...) # S3 method for class 'bootdist' density(..., bw = nrd0, adjust = 1, kernel = \"gaussian\") # S3 method for class 'density.bootdist' plot(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) # S3 method for class 'density.bootdist' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"f object class \"fitdist\", output fitdist function. bootmethod character string coding type resampling : \"param\" parametric resampling \"nonparam\" nonparametric resampling data. niter number samples drawn bootstrap. silent logical remove show warnings errors bootstraping. parallel type parallel operation used, \"snow\" \"multicore\" (second one available Windows), \"\" parallel operation. ncpus Number processes used parallel operation : typically one fix number available CPUs. x object class \"bootdist\" \"density.bootdist\". object object class \"bootdist\". main overall title plot: see title, default \"Bootstrapped values parameters\". enhance logical get enhanced plot. trueval relevant, numeric vector true value parameters (backfitting purposes). rampcol colors interpolate; must valid argument colorRampPalette(). nbgrid Number grid points direction. Can scalar length-2 integer vector. nbcol integer argument, required number colors ... arguments passed generic methods \"bootdist\" objects density. bw, adjust, kernel resp. smoothing bandwidth, scaling factor, kernel used, see density. mar numerical vector form c(bottom, left, top, right), see par. lty, col, lwd resp. line type, color, line width, see par.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Samples drawn parametric bootstrap (resampling distribution fitted fitdist) nonparametric bootstrap (resampling replacement data set). bootstrap sample function mledist (mmedist, qmedist, mgedist according component f$method object class \"fitdist\") used estimate bootstrapped values parameters. function fails converge, NA values returned. Medians 2.5 97.5 percentiles computed removing NA values. medians 95 percent confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations, number iterations function converges also printed summary. default (enhance=FALSE), plot object class \"bootdist\" consists scatterplot matrix scatterplots bootstrapped values parameters. uses function stripchart fitted distribution characterized one parameter, function plot two paramters function pairs cases. last cases, provides representation joint uncertainty distribution fitted parameters. enhance=TRUE, personalized plot version pairs used upper graphs scatterplots lower graphs heatmap image using image based kernel based estimator 2D density function (using kde2d MASS package). Arguments rampcol, nbgrid, nbcol can used customize plots. Defautls values rampcol=c(\"green\", \"yellow\", \"orange\", \"red\"), nbcol=100 (see colorRampPalette()), nbgrid=100 (see kde2d). addition, fitting parameters simulated datasets backtesting purposes, additional argument trueval can used plot cross true value. possible accelerate bootstrap using parallelization. recommend use parallel = \"multicore\", parallel = \"snow\" work Windows, fix ncpus number available processors. density computes empirical density bootdist objects using density function (Gaussian kernel default). returns object class density.bootdist print plot methods provided.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"bootdist returns object class \"bootdist\", list 6 components, estim data frame containing bootstrapped values parameters. converg vector containing codes convergence obtained iterative method used estimate parameters bootstraped data set (0 closed formula used). method character string coding type resampling : \"param\" parametric resampling \"nonparam\" nonparametric resampling. nbboot number samples drawn bootstrap. CI bootstrap medians 95 percent confidence percentile intervals parameters. fitpart object class \"fitdist\" bootstrap procedure applied. Generic functions: print print \"bootdist\" object shows bootstrap parameter estimates. inferior whole number bootstrap iterations, number iterations estimation converges also printed. summary summary provides median 2.5 97.5 percentiles parameter. inferior whole number bootstrap iterations, number iterations estimation converges also printed summary. plot plot shows bootstrap estimates stripchart function univariate parameters plot function multivariate parameters. density density computes empirical densities return object class density.bootdist.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 181-241. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # (1) Fit of a gamma distribution to serving size data # using default method (maximum likelihood estimation) # followed by parametric bootstrap # data(groundbeef) x1 <- groundbeef$serving f1 <- fitdist(x1, \"gamma\") b1 <- bootdist(f1, niter=51) print(b1) #> Parameter values obtained with parametric bootstrap #> shape rate #> 1 4.015562 0.05365499 #> 2 4.214437 0.05762101 #> 3 4.176366 0.05807901 #> 4 4.119164 0.05944029 #> 5 5.013486 0.07194809 #> 6 4.461409 0.05807600 plot(b1) plot(b1, enhance=TRUE) summary(b1) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.12112272 3.32325118 5.11745944 #> rate 0.05518452 0.04684843 0.07170367 quantile(b1) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.16733 42.32692 50.91831 59.15298 67.62801 76.88308 87.67764 #> p=0.8 p=0.9 #> estimate 101.5208 122.9543 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> estimate 32.71222 42.80078 50.98942 59.25093 67.5939 76.42124 87.17521 100.8405 #> p=0.9 #> estimate 121.5466 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> 2.5 % 27.77396 37.42586 45.73489 53.96687 62.26638 71.30894 81.64618 93.9737 #> 97.5 % 35.67197 45.22459 53.97730 62.58326 71.31751 81.30652 92.96508 107.7329 #> p=0.9 #> 2.5 % 113.9634 #> 97.5 % 130.6715 CIcdfplot(b1, CI.output = \"quantile\") density(b1) #> #> Bootstrap values for: gamma for 1 object(s) with 51 bootstrap values (original sample size 254). plot(density(b1)) # (2) non parametric bootstrap on the same fit # b1b <- bootdist(f1, bootmethod=\"nonparam\", niter=51) summary(b1b) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.08546931 3.47931694 4.71280030 #> rate 0.05561944 0.04797494 0.06302539 quantile(b1b) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.16733 42.32692 50.91831 59.15298 67.62801 76.88308 87.67764 #> p=0.8 p=0.9 #> estimate 101.5208 122.9543 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.25183 42.25577 51.01738 59.05788 67.47548 76.95389 87.65113 #> p=0.8 p=0.9 #> estimate 100.8612 121.7738 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> 2.5 % 28.77577 38.76800 47.17181 55.16178 63.29618 72.15077 82.21068 95.20268 #> 97.5 % 36.49366 46.74605 55.27953 63.37110 71.62773 80.58611 91.32593 105.92939 #> p=0.9 #> 2.5 % 115.0083 #> 97.5 % 128.1651 # (3) Fit of a normal distribution on acute toxicity values of endosulfan in log10 for # nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution, what is called the 5 percent hazardous concentration (HC5) # in ecotoxicology, with its two-sided 95 percent confidence interval calculated by # parametric bootstrap # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") bln <- bootdist(fln, bootmethod = \"param\", niter=51) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.811067 2.156258 2.529461 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.187935 1.634263 2.095273 #> 97.5 % 2.276507 2.563692 2.917189 # (4) comparison of sequential and parallel versions of bootstrap # to be tried with a greater number of iterations (1001 or more) # # \\donttest{ niter <- 1001 data(groundbeef) x1 <- groundbeef$serving f1 <- fitdist(x1, \"gamma\") # sequential version ptm <- proc.time() summary(bootdist(f1, niter = niter)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.02609408 3.46463055 4.71706986 #> rate 0.05458836 0.04622389 0.06476728 proc.time() - ptm #> user system elapsed #> 3.879 0.089 3.859 # parallel version using snow require(parallel) #> Loading required package: parallel ptm <- proc.time() summary(bootdist(f1, niter = niter, parallel = \"snow\", ncpus = 2)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.02321963 3.45598967 4.80078519 #> rate 0.05450354 0.04632331 0.06524721 proc.time() - ptm #> user system elapsed #> 0.037 0.001 3.675 # parallel version using multicore (not available on Windows) ptm <- proc.time() summary(bootdist(f1, niter = niter, parallel = \"multicore\", ncpus = 2)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.04947721 3.47970416 4.71828189 #> rate 0.05496497 0.04672265 0.06498123 proc.time() - ptm #> user system elapsed #> 0.030 0.020 2.022 # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap simulation of uncertainty for censored data — bootdistcens","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Uses nonparametric bootstrap resampling order simulate uncertainty parameters distribution fitted censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"","code":"bootdistcens(f, niter = 1001, silent = TRUE, parallel = c(\"no\", \"snow\", \"multicore\"), ncpus) # S3 method for class 'bootdistcens' print(x, ...) # S3 method for class 'bootdistcens' plot(x, ...) # S3 method for class 'bootdistcens' summary(object, ...) # S3 method for class 'bootdistcens' density(..., bw = nrd0, adjust = 1, kernel = \"gaussian\") # S3 method for class 'density.bootdistcens' plot(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) # S3 method for class 'density.bootdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"f object class \"fitdistcens\", output fitdistcens function. niter number samples drawn bootstrap. silent logical remove show warnings errors bootstraping. parallel type parallel operation used, \"snow\" \"multicore\" (second one available Windows), \"\" parallel operation. ncpus Number processes used parallel operation : typically one fix number available CPUs. x object class \"bootdistcens\". object object class \"bootdistcens\". ... arguments passed generic methods \"bootdistcens\" objects density. bw, adjust, kernel resp. smoothing bandwidth, scaling factor, kernel used, see density. mar numerical vector form c(bottom, left, top, right), see par. lty, col, lwd resp. line type, color, line width, see par.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Samples drawn nonparametric bootstrap (resampling replacement data set). bootstrap sample function mledist used estimate bootstrapped values parameters. mledist fails converge, NA values returned. Medians 2.5 97.5 percentiles computed removing NA values. medians 95 percent confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations, number iterations mledist converges also printed summary. plot object class \"bootdistcens\" consists scatterplot matrix scatterplots bootstrapped values parameters. uses function stripchart fitted distribution characterized one parameter, function plot cases. last cases, provides representation joint uncertainty distribution fitted parameters. possible accelerate bootstrap using parallelization. recommend use parallel = \"multicore\", parallel = \"snow\" work Windows, fix ncpus number available processors. density computes empirical density bootdistcens objects using density function (Gaussian kernel default). returns object class density.bootdistcens print plot methods provided.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"bootdistcens returns object class \"bootdistcens\", list 6 components, estim data frame containing bootstrapped values parameters. converg vector containing codes convergence iterative method used estimate parameters bootstraped data set. method character string coding type resampling : case \"nonparam\" available method censored data. nbboot number samples drawn bootstrap. CI bootstrap medians 95 percent confidence percentile intervals parameters. fitpart object class \"fitdistcens\" bootstrap procedure applied. Generic functions: print print \"bootdistcens\" object shows bootstrap parameter estimates. inferior whole number bootstrap iterations, number iterations estimation converges also printed. summary summary provides median 2.5 97.5 percentiles parameter. inferior whole number bootstrap iterations, number iterations estimation converges also printed summary. plot plot shows bootstrap estimates stripchart function univariate parameters plot function multivariate parameters. density density computes empirical densities return object class density.bootdistcens.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 181-241. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # (1) Fit of a normal distribution to fluazinam data in log10 # followed by nonparametric bootstrap and calculation of quantiles # with 95 percent confidence intervals # data(fluazinam) (d1 <-log10(fluazinam)) #> left right #> 1 0.5797836 0.5797836 #> 2 1.5263393 1.5263393 #> 3 1.9395193 1.9395193 #> 4 3.2304489 NA #> 5 2.8061800 2.8061800 #> 6 3.0625820 NA #> 7 2.0530784 2.0530784 #> 8 2.1105897 2.1105897 #> 9 2.7678976 2.7678976 #> 10 3.2685780 NA #> 11 0.2041200 0.2041200 #> 12 0.6812412 0.6812412 #> 13 1.9138139 1.9138139 #> 14 2.1903317 2.1903317 f1 <- fitdistcens(d1, \"norm\") b1 <- bootdistcens(f1, niter = 51) b1 #> Parameter values obtained with nonparametric bootstrap #> mean sd #> 1 2.148176 1.2301856 #> 2 2.359487 1.1144722 #> 3 1.886811 0.7960468 #> 4 1.983487 0.9941790 #> 5 1.912052 0.9906398 #> 6 2.189226 0.9088450 #> 7 2.287131 1.2049569 #> 8 2.288832 0.7645444 #> 9 1.787691 1.0077846 #> 10 2.893830 1.2229467 #> 11 2.569893 0.9597859 #> 12 2.343772 1.2402711 #> 13 2.645568 1.2934746 #> 14 1.942141 0.5982854 #> 15 1.932680 1.0077309 #> 16 1.824771 1.0653955 #> 17 2.983895 1.8018944 #> 18 2.347785 1.3994097 #> 19 1.845464 0.8555560 #> 20 2.427059 1.5893095 #> 21 1.948223 0.8705864 #> 22 1.692356 1.0223265 #> 23 2.275639 0.8147514 #> 24 2.148972 1.0345423 #> 25 2.348520 1.1739100 #> 26 1.893396 1.1106869 #> 27 1.911591 1.1574565 #> 28 2.610027 1.0803468 #> 29 2.080525 1.3340362 #> 30 1.985938 0.9870137 #> 31 1.742953 1.0956522 #> 32 2.549440 1.0330325 #> 33 2.268481 0.4832085 #> 34 2.144250 1.3228431 #> 35 2.184267 1.2698264 #> 36 1.821893 1.5316162 #> 37 2.085662 1.1654912 #> 38 1.868720 1.0912928 #> 39 2.138497 1.1356628 #> 40 2.119477 0.9868753 #> 41 2.153767 1.1818298 #> 42 1.933517 0.5773863 #> 43 2.074073 0.7280150 #> 44 2.421981 1.1254148 #> 45 2.486787 0.6096348 #> 46 2.030623 1.0934793 #> 47 1.938514 1.0258803 #> 48 1.678181 1.2224439 #> 49 2.339840 1.3061770 #> 50 2.278660 0.7921537 #> 51 2.195027 1.1382020 summary(b1) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.144250 1.7050054 2.831765 #> sd 1.091293 0.5826111 1.574886 plot(b1) quantile(b1) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.6655064 1.179033 1.549321 1.86572 2.161449 2.457179 2.773577 #> p=0.8 p=0.9 #> estimate 3.143865 3.657392 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.7210295 1.215519 1.593624 1.854354 2.14425 2.418499 2.691487 #> p=0.8 p=0.9 #> estimate 2.961351 3.394931 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> 2.5 % 0.1683713 0.6922166 1.066910 1.433480 1.705005 1.996046 2.241195 #> 97.5 % 1.5718878 1.8638800 2.141966 2.479624 2.831765 3.146062 3.482325 #> p=0.8 p=0.9 #> 2.5 % 2.472445 2.753590 #> 97.5 % 3.883480 4.463155 CIcdfplot(b1, CI.output = \"quantile\") plot(density(b1)) #> List of 1 #> $ :List of 6 #> ..$ estim :'data.frame':\t51 obs. of 2 variables: #> .. ..$ mean: num [1:51] 2.15 2.36 1.89 1.98 1.91 ... #> .. ..$ sd : num [1:51] 1.23 1.114 0.796 0.994 0.991 ... #> ..$ converg: num [1:51] 0 0 0 0 0 0 0 0 0 0 ... #> ..$ method : chr \"nonparam\" #> ..$ nbboot : num 51 #> ..$ CI : num [1:2, 1:3] 2.144 1.091 1.705 0.583 2.832 ... #> .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. ..$ : chr [1:3] \"Median\" \"2.5%\" \"97.5%\" #> ..$ fitpart:List of 17 #> .. ..$ estimate : Named num [1:2] 2.16 1.17 #> .. .. ..- attr(*, \"names\")= chr [1:2] \"mean\" \"sd\" #> .. ..$ method : chr \"mle\" #> .. ..$ sd : Named num [1:2] 0.322 0.263 #> .. .. ..- attr(*, \"names\")= chr [1:2] \"mean\" \"sd\" #> .. ..$ cor : num [1:2, 1:2] 1 0.135 0.135 1 #> .. .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. ..$ vcov : num [1:2, 1:2] 0.1039 0.0114 0.0114 0.0692 #> .. .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. ..$ loglik : num -20.4 #> .. ..$ aic : num 44.8 #> .. ..$ bic : num 46.1 #> .. ..$ n : int 14 #> .. ..$ censdata :'data.frame':\t14 obs. of 2 variables: #> .. .. ..$ left : num [1:14] 0.58 1.53 1.94 3.23 2.81 ... #> .. .. ..$ right: num [1:14] 0.58 1.53 1.94 NA 2.81 ... #> .. ..$ distname : chr \"norm\" #> .. ..$ fix.arg : NULL #> .. ..$ fix.arg.fun: NULL #> .. ..$ dots : NULL #> .. ..$ convergence: int 0 #> .. ..$ discrete : logi FALSE #> .. ..$ weights : NULL #> .. ..- attr(*, \"class\")= chr \"fitdistcens\" #> ..- attr(*, \"class\")= chr \"bootdistcens\" #> NULL # (2) Estimation of the mean of the normal distribution # by maximum likelihood with the standard deviation fixed at 1 # using the argument fix.arg # followed by nonparametric bootstrap # and calculation of quantiles with 95 percent confidence intervals # f1b <- fitdistcens(d1, \"norm\", start = list(mean = 1),fix.arg = list(sd = 1)) b1b <- bootdistcens(f1b, niter = 51) summary(b1b) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> 2.175510 1.729164 2.788775 plot(b1b) quantile(b1b) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.8527324 1.292663 1.609883 1.880937 2.134284 2.387631 2.658684 #> p=0.8 p=0.9 #> estimate 2.975905 3.415836 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.8939584 1.333889 1.651109 1.922163 2.17551 2.428857 2.699911 #> p=0.8 p=0.9 #> estimate 3.017131 3.457062 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> 2.5 % 0.4476124 0.8875427 1.204763 1.475817 1.729164 1.982511 2.253564 #> 97.5 % 1.5072239 1.9471543 2.264375 2.535428 2.788775 3.042123 3.313176 #> p=0.8 p=0.9 #> 2.5 % 2.570785 3.010716 #> 97.5 % 3.630397 4.070327 # (3) comparison of sequential and parallel versions of bootstrap # to be tried with a greater number of iterations (1001 or more) # # \\donttest{ niter <- 1001 data(fluazinam) d1 <-log10(fluazinam) f1 <- fitdistcens(d1, \"norm\") # sequential version ptm <- proc.time() summary(bootdistcens(f1, niter = niter)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.146743 1.5792689 2.877993 #> sd 1.129426 0.6853478 1.709083 proc.time() - ptm #> user system elapsed #> 4.177 0.089 4.156 # parallel version using snow require(parallel) ptm <- proc.time() summary(bootdistcens(f1, niter = niter, parallel = \"snow\", ncpus = 2)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.144793 1.5914352 2.899763 #> sd 1.108123 0.6912424 1.673702 proc.time() - ptm #> user system elapsed #> 0.005 0.003 3.173 # parallel version using multicore (not available on Windows) ptm <- proc.time() summary(bootdistcens(f1, niter = niter, parallel = \"multicore\", ncpus = 2)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.163302 1.5524788 2.874380 #> sd 1.119044 0.7072572 1.656059 proc.time() - ptm #> user system elapsed #> 0.012 0.012 2.262 # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":null,"dir":"Reference","previous_headings":"","what":"Danish reinsurance claim dataset — danish","title":"Danish reinsurance claim dataset — danish","text":"univariate dataset collected Copenhagen Reinsurance comprise 2167 fire losses period 1980 1990. adjusted inflation reflect 1985 values expressed millions Danish Krone. multivariate data set data total claim divided building loss, loss contents loss profits.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Danish reinsurance claim dataset — danish","text":"","code":"data(danishuni) data(danishmulti)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Danish reinsurance claim dataset — danish","text":"danishuni contains two columns: Date day claim occurence. Loss total loss amount millions Danish Krone (DKK). danishmulti contains five columns: Date day claim occurence. Building loss amount (mDKK) building coverage. Contents loss amount (mDKK) contents coverage. Profits loss amount (mDKK) profit coverage. Total total loss amount (mDKK). columns numeric except Date columns class Date.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Danish reinsurance claim dataset — danish","text":"Embrechts, P., Kluppelberg, C. Mikosch, T. (1997) Modelling Extremal Events Insurance Finance. Berlin: Springer.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Danish reinsurance claim dataset — danish","text":"Dataset used McNeil (1996), Estimating Tails Loss Severity Distributions using Extreme Value Theory, ASTIN Bull. Davison, . C. (2003) Statistical Models. Cambridge University Press. Page 278.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Danish reinsurance claim dataset — danish","text":"","code":"# (1) load of data # data(danishuni) # (2) plot and description of data # plotdist(danishuni$Loss) # (3) load of data # data(danishmulti) # (4) plot and description of data # idx <- sample(1:NROW(danishmulti), 10) barplot(danishmulti$Building[idx], col = \"grey25\", ylim = c(0, max(danishmulti$Total[idx])), main = \"Some claims of danish data set\") barplot(danishmulti$Content[idx], add = TRUE, col = \"grey50\", axes = FALSE) barplot(danishmulti$Profits[idx], add = TRUE, col = \"grey75\", axes = FALSE) legend(\"topleft\", legend = c(\"Building\", \"Content\", \"Profits\"), fill = c(\"grey25\", \"grey50\", \"grey75\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":null,"dir":"Reference","previous_headings":"","what":"Datasets for the FAQ — dataFAQ","title":"Datasets for the FAQ — dataFAQ","text":"Datasets used FAQ vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Datasets for the FAQ — dataFAQ","text":"","code":"data(dataFAQlog1) data(dataFAQscale1) data(dataFAQscale2)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Datasets for the FAQ — dataFAQ","text":"dataFAQlog1 dataFAQscale1 dataFAQscale2 vectors numeric data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Datasets for the FAQ — dataFAQ","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Description of an empirical distribution for non-censored data — descdist","title":"Description of an empirical distribution for non-censored data — descdist","text":"Computes descriptive parameters empirical distribution non-censored data provides skewness-kurtosis plot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Description of an empirical distribution for non-censored data — descdist","text":"","code":"descdist(data, discrete = FALSE, boot = NULL, method = \"unbiased\", graph = TRUE, print = TRUE, obs.col = \"red\", obs.pch = 16, boot.col = \"orange\") # S3 method for class 'descdist' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Description of an empirical distribution for non-censored data — descdist","text":"data numeric vector. discrete TRUE, distribution considered discrete. boot NULL, boot values skewness kurtosis plotted bootstrap samples data. boot must fixed case integer 10. method \"unbiased\" unbiased estimated values statistics \"sample\" sample values. graph FALSE, skewness-kurtosis graph plotted. print FALSE, descriptive parameters computed printed. obs.col Color used observed point skewness-kurtosis graph. obs.pch plotting character used observed point skewness-kurtosis graph. boot.col Color used bootstrap sample points skewness-kurtosis graph. x object class \"descdist\". ... arguments passed generic functions","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Description of an empirical distribution for non-censored data — descdist","text":"Minimum, maximum, median, mean, sample sd, sample (method==\"sample\") default unbiased estimations skewness Pearsons's kurtosis values printed (Sokal Rohlf, 1995). skewness-kurtosis plot one proposed Cullen Frey (1999) given empirical distribution. plot, values common distributions also displayed tools help choice distributions fit data. distributions (normal, uniform, logistic, exponential example), one possible value skewness kurtosis (normal distribution example, skewness = 0 kurtosis = 3), distribution thus represented point plot. distributions, areas possible values represented, consisting lines (gamma lognormal distributions example), larger areas (beta distribution example). Weibull distribution represented graph indicated legend shapes close lognormal gamma distributions may obtained distribution. order take account uncertainty estimated values kurtosis skewness data, data set may bootstraped fixing argument boot integer 10. boot values skewness kurtosis corresponding boot bootstrap samples computed reported blue color skewness-kurtosis plot. discrete TRUE, represented distributions Poisson, negative binomial distributions, normal distribution previous discrete distributions may converge. discrete FALSE, uniform, normal, logistic, lognormal, beta gamma distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Description of an empirical distribution for non-censored data — descdist","text":"descdist returns list 7 components, min minimum value max maximum value median median value mean mean value sd standard deviation sample estimated value skewness skewness sample estimated value kurtosis kurtosis sample estimated value method method specified input (\"unbiased\" unbiased estimated values statistics \"sample\" sample values.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Description of an empirical distribution for non-censored data — descdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-159. Evans M, Hastings N Peacock B (2000), Statistical distributions. John Wiley Sons Inc, doi:10.1002/9780470627242 . Sokal RR Rohlf FJ (1995), Biometry. W.H. Freeman Company, USA, pp. 111-115. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Description of an empirical distribution for non-censored data — descdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Description of an empirical distribution for non-censored data — descdist","text":"","code":"# (1) Description of a sample from a normal distribution # with and without uncertainty on skewness and kurtosis estimated by bootstrap # set.seed(1234) x1 <- rnorm(100) descdist(x1) #> summary statistics #> ------ #> min: -2.345698 max: 2.548991 #> median: -0.384628 #> mean: -0.1567617 #> estimated sd: 1.004405 #> estimated skewness: 0.6052442 #> estimated kurtosis: 3.102441 descdist(x1,boot=11) #> summary statistics #> ------ #> min: -2.345698 max: 2.548991 #> median: -0.384628 #> mean: -0.1567617 #> estimated sd: 1.004405 #> estimated skewness: 0.6052442 #> estimated kurtosis: 3.102441 # (2) Description of a sample from a beta distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # with changing of default colors and plotting character for observed point # descdist(rbeta(100,shape1=0.05,shape2=1),boot=11, obs.col=\"blue\", obs.pch = 15, boot.col=\"darkgreen\") #> summary statistics #> ------ #> min: 3.937372e-36 max: 0.8890347 #> median: 5.660314e-06 #> mean: 0.04094397 #> estimated sd: 0.1281058 #> estimated skewness: 4.368522 #> estimated kurtosis: 25.02241 # (3) Description of a sample from a gamma distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # without plotting # descdist(rgamma(100,shape=2,rate=1),boot=11,graph=FALSE) #> summary statistics #> ------ #> min: 0.0753002 max: 8.631328 #> median: 1.627968 #> mean: 1.989657 #> estimated sd: 1.443636 #> estimated skewness: 1.509842 #> estimated kurtosis: 6.691933 # (4) Description of a sample from a Poisson distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # descdist(rpois(100,lambda=2),discrete=TRUE,boot=11) #> summary statistics #> ------ #> min: 0 max: 6 #> median: 2 #> mean: 1.98 #> estimated sd: 1.377892 #> estimated skewness: 0.5802731 #> estimated kurtosis: 3.037067 # (5) Description of serving size data # with uncertainty on skewness and kurtosis estimated by bootstrap # data(groundbeef) serving <- groundbeef$serving descdist(serving, boot=11) #> summary statistics #> ------ #> min: 10 max: 200 #> median: 79 #> mean: 73.64567 #> estimated sd: 35.88487 #> estimated skewness: 0.7352745 #> estimated kurtosis: 3.551384"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect bounds for density function — detectbound","title":"Detect bounds for density function — detectbound","text":"Manual detection bounds parameter density function/","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect bounds for density function — detectbound","text":"","code":"detectbound(distname, vstart, obs, fix.arg=NULL, echo=FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect bounds for density function — detectbound","text":"distname character string \"name\" naming distribution corresponding density function dname must classically defined. vstart named vector giving initial values parameters named distribution. obs numeric vector non censored data. fix.arg optional named vector giving values fixed parameters named distribution. Default NULL. echo logical show traces.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Detect bounds for density function — detectbound","text":"function manually tests following bounds : -1, 0, 1.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect bounds for density function — detectbound","text":"detectbound returns 2-row matrix lower bounds first row upper bounds second row.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Detect bounds for density function — detectbound","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Detect bounds for density function — detectbound","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect bounds for density function — detectbound","text":"","code":"# case where the density returns a Not-an-Numeric value. detectbound(\"exp\", c(rate=3), 1:10) #> rate #> lowb 0 #> uppb Inf detectbound(\"binom\", c(size=3, prob=1/2), 1:10) #> size prob #> lowb -Inf 0 #> uppb Inf 1 detectbound(\"nbinom\", c(size=3, prob=1/2), 1:10) #> size prob mu #> lowb 0 0 -Inf #> uppb Inf 1 Inf"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":null,"dir":"Reference","previous_headings":"","what":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"Summary 48- 96-hour acute toxicity values (LC50 EC50 values) exposure Australian Non-Australian taxa endosulfan.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"","code":"data(endosulfan)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"endosulfan data frame 4 columns, named ATV Acute Toxicity Value (geometric mean LC50 ou EC50 values micrograms per liter), Australian (coding Australian another origin), group (arthropods, fish non-arthropod invertebrates) taxa.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"Hose, G.C., Van den Brink, P.J. 2004. Confirming Species-Sensitivity Distribution Concept Endosulfan Using Laboratory, Mesocosms, Field Data. Archives Environmental Contamination Toxicology, 47, 511-520.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"","code":"# (1) load of data # data(endosulfan) # (2) plot and description of data for non Australian fish in decimal logarithm # log10ATV <-log10(subset(endosulfan,(Australian == \"no\") & (group == \"Fish\"))$ATV) plotdist(log10ATV) descdist(log10ATV,boot=11) #> summary statistics #> ------ #> min: -0.69897 max: 3.60206 #> median: 0.4911356 #> mean: 0.5657595 #> estimated sd: 0.7034928 #> estimated skewness: 1.764601 #> estimated kurtosis: 9.759505 # (3) fit of a normal and a logistic distribution to data in log10 # (classical distributions used for SSD) # and visual comparison of the fits # fln <- fitdist(log10ATV,\"norm\") summary(fln) #> Fitting of the distribution ' norm ' by maximum likelihood #> Parameters : #> estimate Std. Error #> mean 0.5657595 0.10259072 #> sd 0.6958041 0.07254192 #> Loglikelihood: -48.58757 AIC: 101.1751 BIC: 104.8324 #> Correlation matrix: #> mean sd #> mean 1 0 #> sd 0 1 #> fll <- fitdist(log10ATV,\"logis\") summary(fll) #> Fitting of the distribution ' logis ' by maximum likelihood #> Parameters : #> estimate Std. Error #> location 0.5082818 0.08701594 #> scale 0.3457256 0.04301025 #> Loglikelihood: -44.31825 AIC: 92.6365 BIC: 96.29378 #> Correlation matrix: #> location scale #> location 1.00000000 0.04028287 #> scale 0.04028287 1.00000000 #> cdfcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\"), xlab=\"log10ATV\") denscomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\"), xlab=\"log10ATV\") qqcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\")) ppcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\")) gofstat(list(fln,fll), fitnames = c(\"lognormal\", \"loglogistic\")) #> Goodness-of-fit statistics #> lognormal loglogistic #> Kolmogorov-Smirnov statistic 0.1267649 0.08457997 #> Cramer-von Mises statistic 0.1555576 0.04058514 #> Anderson-Darling statistic 1.0408045 0.37407465 #> #> Goodness-of-fit criteria #> lognormal loglogistic #> Akaike's Information Criterion 101.1751 92.63650 #> Bayesian Information Criterion 104.8324 96.29378 # (4) estimation of the 5 percent quantile value of # logistic fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # parametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(ATV) bll <- bootdist(fll,niter=51) HC5ll <- quantile(bll,probs = 0.05) # in ATV 10^(HC5ll$quantiles) #> p=0.05 #> estimate 0.309253 10^(HC5ll$quantCI) #> p=0.05 #> 2.5 % 0.1891451 #> 97.5 % 0.5457214 # (5) estimation of the 5 percent quantile value of # the fitted logistic distribution (5 percent hazardous concentration : HC5) # with its one-sided 95 percent confidence interval (type \"greater\") # calculated by # nonparametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(ATV) bllnonpar <- bootdist(fll,niter=51,bootmethod = \"nonparam\") HC5llgreater <- quantile(bllnonpar,probs = 0.05, CI.type=\"greater\") # in ATV 10^(HC5llgreater$quantiles) #> p=0.05 #> estimate 0.309253 10^(HC5llgreater$quantCI) #> p=0.05 #> 5 % 0.1860103 # (6) fit of a logistic distribution # by minimizing the modified Anderson-Darling AD2L distance # cf. ?mgedist for definition of this distance # fllAD2L <- fitdist(log10ATV,\"logis\",method=\"mge\",gof=\"AD2L\") summary(fllAD2L) #> Fitting of the distribution ' logis ' by maximum goodness-of-fit #> Parameters : #> estimate #> location 0.4965288 #> scale 0.3013154 #> Loglikelihood: -44.96884 AIC: 93.93767 BIC: 97.59496 plot(fllAD2L)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit of univariate distributions to non-censored data — fitdist","title":"Fit of univariate distributions to non-censored data — fitdist","text":"Fit univariate distributions non-censored data maximum likelihood (mle), moment matching (mme), quantile matching (qme) maximizing goodness--fit estimation (mge). latter also known minimizing distance estimation. Generic methods print, plot, summary, quantile, logLik, AIC, BIC, vcov coef.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit of univariate distributions to non-censored data — fitdist","text":"","code":"fitdist(data, distr, method = c(\"mle\", \"mme\", \"qme\", \"mge\", \"mse\"), start=NULL, fix.arg=NULL, discrete, keepdata = TRUE, keepdata.nb=100, calcvcov=TRUE, ...) # S3 method for class 'fitdist' print(x, ...) # S3 method for class 'fitdist' plot(x, breaks=\"default\", ...) # S3 method for class 'fitdist' summary(object, ...) # S3 method for class 'fitdist' logLik(object, ...) # S3 method for class 'fitdist' AIC(object, ..., k = 2) # S3 method for class 'fitdist' BIC(object, ...) # S3 method for class 'fitdist' vcov(object, ...) # S3 method for class 'fitdist' coef(object, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit of univariate distributions to non-censored data — fitdist","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. method character string coding fitting method: \"mle\" 'maximum likelihood estimation', \"mme\" 'moment matching estimation', \"qme\" 'quantile matching estimation', \"mge\" 'maximum goodness--fit estimation' \"mse\" 'maximum spacing estimation'. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). may account closed-form formulas. fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. use argument possible method=\"mme\" closed-form formula used. keepdata logical. TRUE, dataset returned, otherwise sample subset returned. keepdata.nb keepdata=FALSE, length (>1) subset returned. calcvcov logical indicating (asymptotic) covariance matrix required. discrete TRUE, distribution considered discrete. discrete missing, \t discrete automaticaly set TRUE distr belongs \t \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\" FALSE cases. thus recommended enter argument using another discrete distribution. argument directly affect results fit passed functions gofstat, plotdist cdfcomp. x object class \"fitdist\". object object class \"fitdist\". breaks \"default\" histogram plotted function hist default breaks definition. Else breaks passed function hist. argument taken account discrete distributions: \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\". k penalty per parameter passed AIC generic function (2 default). ... arguments passed generic functions, one functions \"mledist\", \"mmedist\", \"qmedist\" \"mgedist\" depending chosen method. See mledist, mmedist, qmedist, mgedist details parameter estimation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit of univariate distributions to non-censored data — fitdist","text":"assumed distr argument specifies distribution probability density function, cumulative distribution function quantile function (d, p, q). four possible fitting methods described : method=\"mle\" Maximum likelihood estimation consists maximizing log-likelihood. numerical optimization carried mledist via optim find best values (see mledist details). method=\"mme\" Moment matching estimation consists equalizing theoretical empirical moments. Estimated values distribution parameters computed closed-form formula following distributions : \"norm\", \"lnorm\", \"pois\", \"exp\", \"gamma\", \"nbinom\", \"geom\", \"beta\", \"unif\" \"logis\". Otherwise theoretical empirical moments matched numerically, minimization sum squared differences observed theoretical moments. last case, arguments needed call fitdist: order memp (see mmedist details). Since Version 1.2-0, mmedist automatically computes asymptotic covariance matrix, hence theoretical moments mdist defined order equals twice maximal order given order. method = \"qme\" Quantile matching estimation consists equalizing theoretical empirical quantile. numerical optimization carried qmedist via optim minimize sum squared differences observed theoretical quantiles. use method requires additional argument probs, defined numeric vector probabilities quantile(s) () matched (see qmedist details). method = \"mge\" Maximum goodness--fit estimation consists maximizing goodness--fit statistics. numerical optimization carried mgedist via optim minimize goodness--fit distance. use method requires additional argument gof coding goodness--fit distance chosen. One can use classical Cramer-von Mises distance (\"CvM\"), classical Kolmogorov-Smirnov distance (\"KS\"), classical Anderson-Darling distance (\"AD\") gives weight tails distribution, one variants last distance proposed Luceno (2006) (see mgedist details). method suitable discrete distributions. method = \"mse\" Maximum goodness--fit estimation consists maximizing average log spacing. numerical optimization carried msedist via optim. default, direct optimization log-likelihood (criteria depending chosen method) performed using optim, \"Nelder-Mead\" method distributions characterized one parameter \"BFGS\" method distributions characterized one parameter. optimization algorithm used optim can chosen another optimization function can specified using ... argument (see mledist details). start may omitted (.e. NULL) classic distributions (see 'details' section mledist). Note errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1) ... argument. parameter(s) () estimated, fitdist computes log-likelihood every estimation method maximum likelihood estimation standard errors estimates calculated Hessian solution found optim user-supplied function passed mledist. default (keepdata = TRUE), object returned fitdist contains data vector given input. dealing large datasets, can remove original dataset output setting keepdata = FALSE. case, keepdata.nb points () kept random subsampling keepdata.nb-2 points dataset adding minimum maximum. combined bootdist, use non-parametric bootstrap aware bootstrap performed subset randomly selected fitdist. Currently, graphical comparisons multiple fits available framework. Weighted version estimation process available method = \"mle\", \"mme\", \"qme\" using weights=.... See corresponding man page details. Weighted maximum GOF estimation (method = \"mge\") allowed. yet possible take account weighths functions plotdist, plot.fitdist, cdfcomp, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). parameter(s) () estimated, gofstat allows compute goodness--fit statistics. NB: data values particularly small large, scaling may needed optimization process. See example (14) man page examples (14,15) test file package. Please also take look Rmpfr package available CRAN numerical accuracy issues.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit of univariate distributions to non-censored data — fitdist","text":"fitdist returns object class \"fitdist\", list following components: estimate parameter estimates. method character string coding fitting method : \"mle\" 'maximum likelihood estimation', \"mme\" 'matching moment estimation', \"qme\" 'matching quantile estimation' \"mge\" 'maximum goodness--fit estimation' \"mse\" 'maximum spacing estimation'. sd estimated standard errors, NA numerically computable NULL available. cor estimated correlation matrix, NA numerically computable NULL available. vcov estimated variance-covariance matrix, NULL available estimation method considered. loglik log-likelihood. aic Akaike information criterion. bic -called BIC SBC (Schwarz Bayesian criterion). n length data set. data data set. distname name distribution. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. dots list arguments passed ... used bootdist iterative calls mledist, mmedist, qmedist, mgedist NULL arguments. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. discrete input argument automatic definition function passed functions gofstat, plotdist cdfcomp. weights vector weigths used estimation process NULL. Generic functions: print print \"fitdist\" object shows traces fitting method fitted distribution. summary summary provides parameter estimates fitted distribution, log-likelihood, AIC BIC statistics maximum likelihood used, standard errors parameter estimates correlation matrix parameter estimates. plot plot object class \"fitdist\" returned fitdist uses function plotdist. object class \"fitdist\" list objects class \"fitdist\" corresponding various fits using data set may also plotted using cdf plot (function cdfcomp), density plot(function denscomp), density Q-Q plot (function qqcomp), P-P plot (function ppcomp). logLik Extracts estimated log-likelihood \"fitdist\" object. AIC Extracts AIC \"fitdist\" object. BIC Extracts estimated BIC \"fitdist\" object. vcov Extracts estimated var-covariance matrix \"fitdist\" object (available method = \"mle\"). coef Extracts fitted coefficients \"fitdist\" object.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit of univariate distributions to non-censored data — fitdist","text":". Ibragimov R. 'minskii (1981), Statistical Estimation - Asymptotic Theory, Springer-Verlag, doi:10.1007/978-1-4899-0027-2 Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Fit of univariate distributions to non-censored data — fitdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit of univariate distributions to non-censored data — fitdist","text":"","code":"# (1) fit of a gamma distribution by maximum likelihood estimation # data(groundbeef) serving <- groundbeef$serving fitg <- fitdist(serving, \"gamma\") summary(fitg) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> plot(fitg) plot(fitg, demp = TRUE) plot(fitg, histo = FALSE, demp = TRUE) cdfcomp(fitg, addlegend=FALSE) denscomp(fitg, addlegend=FALSE) ppcomp(fitg, addlegend=FALSE) qqcomp(fitg, addlegend=FALSE) # (2) use the moment matching estimation (using a closed formula) # fitgmme <- fitdist(serving, \"gamma\", method=\"mme\") summary(fitgmme) #> Fitting of the distribution ' gamma ' by matching moments #> Parameters : #> estimate Std. Error #> shape 4.22848617 6.64959843 #> rate 0.05741663 0.09451052 #> Loglikelihood: -1253.825 AIC: 2511.65 BIC: 2518.724 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9553622 #> rate 0.9553622 1.0000000 #> # (3) Comparison of various fits # fitW <- fitdist(serving, \"weibull\") fitg <- fitdist(serving, \"gamma\") fitln <- fitdist(serving, \"lnorm\") summary(fitW) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> summary(fitg) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> summary(fitln) #> Fitting of the distribution ' lnorm ' by maximum likelihood #> Parameters : #> estimate Std. Error #> meanlog 4.1693701 0.03366988 #> sdlog 0.5366095 0.02380783 #> Loglikelihood: -1261.319 AIC: 2526.639 BIC: 2533.713 #> Correlation matrix: #> meanlog sdlog #> meanlog 1 0 #> sdlog 0 1 #> cdfcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) denscomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) qqcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) ppcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) gofstat(list(fitW, fitg, fitln), fitnames=c(\"Weibull\", \"gamma\", \"lognormal\")) #> Goodness-of-fit statistics #> Weibull gamma lognormal #> Kolmogorov-Smirnov statistic 0.1396646 0.1281486 0.1493090 #> Cramer-von Mises statistic 0.6840994 0.6936274 0.8277358 #> Anderson-Darling statistic 3.5736460 3.5672625 4.5436542 #> #> Goodness-of-fit criteria #> Weibull gamma lognormal #> Akaike's Information Criterion 2514.449 2511.250 2526.639 #> Bayesian Information Criterion 2521.524 2518.325 2533.713 # (4) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view # dedicated to probability distributions # dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q, a, b) exp(-exp((a-q)/b)) qgumbel <- function(p, a, b) a-b*log(-log(p)) fitgumbel <- fitdist(serving, \"gumbel\", start=list(a=10, b=10)) #> Error in fitdist(serving, \"gumbel\", start = list(a = 10, b = 10)): The dgumbel function must be defined summary(fitgumbel) #> Error in eval(expr, envir, enclos): object 'fitgumbel' not found plot(fitgumbel) #> Error in eval(expr, envir, enclos): object 'fitgumbel' not found # (5) fit discrete distributions (Poisson and negative binomial) # data(toxocara) number <- toxocara$number fitp <- fitdist(number,\"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) fitnb <- fitdist(number,\"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb) cdfcomp(list(fitp,fitnb)) gofstat(list(fitp,fitnb)) #> Chi-squared statistic: 31256.96 7.48606 #> Degree of freedom of the Chi-squared distribution: 5 4 #> Chi-squared p-value: 0 0.1123255 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo 1-mle-pois theo 2-mle-nbinom #> <= 0 14 0.009014207 15.295027 #> <= 1 8 0.078236512 5.808596 #> <= 3 6 1.321767215 6.845015 #> <= 4 6 2.131297776 2.407815 #> <= 9 6 29.827829221 7.835196 #> <= 21 6 19.626223732 8.271110 #> > 21 7 0.005631339 6.537242 #> #> Goodness-of-fit criteria #> 1-mle-pois 2-mle-nbinom #> Akaike's Information Criterion 1017.067 322.6882 #> Bayesian Information Criterion 1019.037 326.6288 # (6) how to change the optimisation method? # data(groundbeef) serving <- groundbeef$serving fitdist(serving, \"gamma\", optim.method=\"Nelder-Mead\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 fitdist(serving, \"gamma\", optim.method=\"BFGS\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.21183435 0.359345536 #> rate 0.05719546 0.005181142 fitdist(serving, \"gamma\", optim.method=\"SANN\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.02694892 0.342992129 #> rate 0.05478925 0.004968512 # (7) custom optimization function # # \\donttest{ #create the sample set.seed(1234) mysample <- rexp(100, 5) mystart <- list(rate=8) res1 <- fitdist(mysample, dexp, start= mystart, optim.method=\"Nelder-Mead\") #show the result summary(res1) #> Fitting of the distribution ' exp ' by maximum likelihood #> Parameters : #> estimate Std. Error #> rate 5.120312 0.5120312 #> Loglikelihood: 63.32596 AIC: -124.6519 BIC: -122.0467 #the warning tell us to use optimise, because the Nelder-Mead is not adequate. #to meet the standard 'fn' argument and specific name arguments, we wrap optimize, myoptimize <- function(fn, par, ...) { res <- optimize(f=fn, ..., maximum=FALSE) #assume the optimization function minimize standardres <- c(res, convergence=0, value=res$objective, par=res$minimum, hessian=NA) return(standardres) } #call fitdist with a 'custom' optimization function res2 <- fitdist(mysample, \"exp\", start=mystart, custom.optim=myoptimize, interval=c(0, 100)) #show the result summary(res2) #> Fitting of the distribution ' exp ' by maximum likelihood #> Parameters : #> estimate #> rate 5.120531 #> Loglikelihood: 63.32596 AIC: -124.6519 BIC: -122.0467 # } # (8) custom optimization function - another example with the genetic algorithm # # \\donttest{ #set a sample fit1 <- fitdist(serving, \"gamma\") summary(fit1) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> #wrap genoud function rgenoud package mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values=par, ...) standardres <- c(res, convergence=0) return(standardres) } #call fitdist with a 'custom' optimization function fit2 <- fitdist(serving, \"gamma\", custom.optim=mygenoud, nvars=2, Domains=cbind(c(0, 0), c(10, 10)), boundary.enforcement=1, print.level=1, hessian=TRUE) #> Loading required package: rgenoud #> ## rgenoud (Version 5.9-0.10, Build Date: 2023-12-13) #> ## See http://sekhon.berkeley.edu/rgenoud for additional documentation. #> ## Please cite software as: #> ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011. #> ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.'' #> ## Journal of Statistical Software, 42(11): 1-26. #> ## #> #> #> Wed Aug 28 14:20:24 2024 #> Domains: #> 0.000000e+00 <= X1 <= 1.000000e+01 #> 0.000000e+00 <= X2 <= 1.000000e+01 #> #> Data Type: Floating Point #> Operators (code number, name, population) #> \t(1) Cloning........................... \t122 #> \t(2) Uniform Mutation.................. \t125 #> \t(3) Boundary Mutation................. \t125 #> \t(4) Non-Uniform Mutation.............. \t125 #> \t(5) Polytope Crossover................ \t125 #> \t(6) Simple Crossover.................. \t126 #> \t(7) Whole Non-Uniform Mutation........ \t125 #> \t(8) Heuristic Crossover............... \t126 #> \t(9) Local-Minimum Crossover........... \t0 #> #> HARD Maximum Number of Generations: 100 #> Maximum Nonchanging Generations: 10 #> Population size : 1000 #> Convergence Tolerance: 1.000000e-03 #> #> Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation. #> Checking Gradients before Stopping. #> Not Using Out of Bounds Individuals But Allowing Trespassing. #> #> Minimization Problem. #> #> #> Generation#\t Solution Value #> #> 0 \t1.253796e+03 #> 1 \t1.253625e+03 #> #> 'wait.generations' limit reached. #> No significant improvement in 10 generations. #> #> Solution Fitness Value: 1.253625e+03 #> #> Parameters at the Solution (parameter, gradient): #> #> X[ 1] :\t4.008339e+00\tG[ 1] :\t-2.378827e-07 #> X[ 2] :\t5.442736e-02\tG[ 2] :\t6.872187e-05 #> #> Solution Found Generation 1 #> Number of Generations Run 12 #> #> Wed Aug 28 14:20:25 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit2) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00833912 0.341343848 #> rate 0.05442736 0.004936215 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384395 #> rate 0.9384395 1.0000000 #> # } # (9) estimation of the standard deviation of a gamma distribution # by maximum likelihood with the shape fixed at 4 using the argument fix.arg # data(groundbeef) serving <- groundbeef$serving f1c <- fitdist(serving,\"gamma\",start=list(rate=0.1),fix.arg=list(shape=4)) summary(f1c) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> rate 0.05431772 0.001703521 #> Fixed parameters: #> value #> shape 4 #> Loglikelihood: -1253.625 AIC: 2509.251 BIC: 2512.788 plot(f1c) # (10) fit of a Weibull distribution to serving size data # by maximum likelihood estimation # or by quantile matching estimation (in this example # matching first and third quartiles) # data(groundbeef) serving <- groundbeef$serving fWmle <- fitdist(serving, \"weibull\") summary(fWmle) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> plot(fWmle) gofstat(fWmle) #> Goodness-of-fit statistics #> 1-mle-weibull #> Kolmogorov-Smirnov statistic 0.1396646 #> Cramer-von Mises statistic 0.6840994 #> Anderson-Darling statistic 3.5736460 #> #> Goodness-of-fit criteria #> 1-mle-weibull #> Akaike's Information Criterion 2514.449 #> Bayesian Information Criterion 2521.524 fWqme <- fitdist(serving, \"weibull\", method=\"qme\", probs=c(0.25, 0.75)) summary(fWqme) #> Fitting of the distribution ' weibull ' by matching quantiles #> Parameters : #> estimate #> shape 2.268699 #> scale 86.590853 #> Loglikelihood: -1256.129 AIC: 2516.258 BIC: 2523.332 plot(fWqme) gofstat(fWqme) #> Goodness-of-fit statistics #> 1-qme-weibull #> Kolmogorov-Smirnov statistic 0.1692858 #> Cramer-von Mises statistic 0.9664709 #> Anderson-Darling statistic 4.8479858 #> #> Goodness-of-fit criteria #> 1-qme-weibull #> Akaike's Information Criterion 2516.258 #> Bayesian Information Criterion 2523.332 # (11) Fit of a Pareto distribution by numerical moment matching estimation # # \\donttest{ require(actuar) #> Loading required package: actuar #> #> Attaching package: ‘actuar’ #> The following objects are masked from ‘package:stats’: #> #> sd, var #> The following object is masked from ‘package:grDevices’: #> #> cm #simulate a sample x4 <- rpareto(1000, 6, 2) #empirical raw moment memp <- function(x, order) mean(x^order) #fit fP <- fitdist(x4, \"pareto\", method=\"mme\", order=c(1, 2), memp=\"memp\", start=list(shape=10, scale=10), lower=1, upper=Inf) #> Error in mmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, checkstartfix = TRUE, calcvcov = calcvcov, ...): the empirical moment must be defined as a function summary(fP) #> Error in eval(expr, envir, enclos): object 'fP' not found plot(fP) #> Error in eval(expr, envir, enclos): object 'fP' not found # } # (12) Fit of a Weibull distribution to serving size data by maximum # goodness-of-fit estimation using all the distances available # # \\donttest{ data(groundbeef) serving <- groundbeef$serving (f1 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"CvM\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.093204 #> scale 82.660014 (f2 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"KS\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.065634 #> scale 81.450487 (f3 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.125473 #> scale 82.890260 (f4 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"ADR\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.072087 #> scale 82.761868 (f5 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"ADL\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.197498 #> scale 82.016005 (f6 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2R\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 1.90328 #> scale 81.33464 (f7 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2L\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.483836 #> scale 78.252113 (f8 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.081168 #> scale 85.281194 cdfcomp(list(f1, f2, f3, f4, f5, f6, f7, f8)) cdfcomp(list(f1, f2, f3, f4, f5, f6, f7, f8), xlogscale=TRUE, xlim=c(8, 250), verticals=TRUE) denscomp(list(f1, f2, f3, f4, f5, f6, f7, f8)) # } # (13) Fit of a uniform distribution using maximum likelihood # (a closed formula is used in this special case where the loglikelihood is not defined), # or maximum goodness-of-fit with Cramer-von Mises or Kolmogorov-Smirnov distance # set.seed(1234) u <- runif(50, min=5, max=10) fumle <- fitdist(u, \"unif\", method=\"mle\") summary(fumle) #> Fitting of the distribution ' unif ' by maximum likelihood #> Parameters : #> estimate #> min 5.047479 #> max 9.960752 #> Loglikelihood: -79.59702 AIC: 163.194 BIC: 167.0181 plot(fumle) gofstat(fumle) #> Goodness-of-fit statistics #> 1-mle-unif #> Kolmogorov-Smirnov statistic 0.1340723 #> Cramer-von Mises statistic 0.1566892 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mle-unif #> Akaike's Information Criterion 163.1940 #> Bayesian Information Criterion 167.0181 fuCvM <- fitdist(u, \"unif\", method=\"mge\", gof=\"CvM\") summary(fuCvM) #> Fitting of the distribution ' unif ' by maximum goodness-of-fit #> Parameters : #> estimate #> min 5.110497 #> max 9.552878 #> Loglikelihood: -Inf AIC: Inf BIC: Inf plot(fuCvM) gofstat(fuCvM) #> Goodness-of-fit statistics #> 1-mge-unif #> Kolmogorov-Smirnov statistic 0.11370966 #> Cramer-von Mises statistic 0.07791651 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mge-unif #> Akaike's Information Criterion Inf #> Bayesian Information Criterion Inf fuKS <- fitdist(u, \"unif\", method=\"mge\", gof=\"KS\") summary(fuKS) #> Fitting of the distribution ' unif ' by maximum goodness-of-fit #> Parameters : #> estimate #> min 5.092357 #> max 9.323818 #> Loglikelihood: -Inf AIC: Inf BIC: Inf plot(fuKS) gofstat(fuKS) #> Goodness-of-fit statistics #> 1-mge-unif #> Kolmogorov-Smirnov statistic 0.09216159 #> Cramer-von Mises statistic 0.12241830 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mge-unif #> Akaike's Information Criterion Inf #> Bayesian Information Criterion Inf # (14) scaling problem # the simulated dataset (below) has particularly small values, hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 0:6) cat(i, try(fitdist(x2*10^i, \"cauchy\", method=\"mle\")$estimate, silent=TRUE), \"\\n\") #> #> 0 Error in fitdist(x2 * 10^i, \"cauchy\", method = \"mle\") : #> the function mle failed to estimate the parameters, #> with the error code 100 #> #> #> #> 1 Error in fitdist(x2 * 10^i, \"cauchy\", method = \"mle\") : #> the function mle failed to estimate the parameters, #> with the error code 100 #> #> #> 2 0.001870693 0.01100646 #> 3 0.01871473 0.1100713 #> 4 0.1870693 1.100646 #> 5 1.876032 11.0131 #> 6 18.76032 110.131 # (15) Fit of a normal distribution on acute toxicity values of endosulfan in log10 for # nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution (which is called the 5 percent hazardous concentration, HC5, # in ecotoxicology) and estimation of other quantiles. # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") quantile(fln, probs = 0.05) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 #> estimate 1.744227 quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 # (16) Fit of a triangular distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # # \\donttest{ set.seed(1234) require(mc2d) #> Loading required package: mc2d #> Loading required package: mvtnorm #> #> Attaching package: ‘mc2d’ #> The following objects are masked from ‘package:base’: #> #> pmax, pmin t <- rtriang(100, min=5, mode=6, max=10) fCvM <- fitdist(t, \"triang\", method=\"mge\", start = list(min=4, mode=6,max=9), gof=\"CvM\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. fKS <- fitdist(t, \"triang\", method=\"mge\", start = list(min=4, mode=6,max=9), gof=\"KS\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. cdfcomp(list(fCvM,fKS)) # } # (17) fit a non classical discrete distribution (the zero inflated Poisson distribution) # # \\donttest{ require(gamlss.dist) #> Loading required package: gamlss.dist set.seed(1234) x <- rZIP(n = 30, mu = 5, sigma = 0.2) plotdist(x, discrete = TRUE) fitzip <- fitdist(x, \"ZIP\", start = list(mu = 4, sigma = 0.15), discrete = TRUE, optim.method = \"L-BFGS-B\", lower = c(0, 0), upper = c(Inf, 1)) #> Warning: The dZIP function should return a zero-length vector when input has length zero #> Warning: The pZIP function should return a zero-length vector when input has length zero summary(fitzip) #> Fitting of the distribution ' ZIP ' by maximum likelihood #> Parameters : #> estimate Std. Error #> mu 4.3166139 0.4341218 #> sigma 0.1891806 0.0741692 #> Loglikelihood: -67.13886 AIC: 138.2777 BIC: 141.0801 #> Correlation matrix: #> mu sigma #> mu 1.00000000 0.06418863 #> sigma 0.06418863 1.00000000 #> plot(fitzip) fitp <- fitdist(x, \"pois\") cdfcomp(list(fitzip, fitp)) gofstat(list(fitzip, fitp)) #> Chi-squared statistic: 3.579721 35.91516 #> Degree of freedom of the Chi-squared distribution: 3 4 #> Chi-squared p-value: 0.3105689 3.012341e-07 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo 1-mle-ZIP theo 2-mle-pois #> <= 0 6 6.000030 0.9059215 #> <= 2 7 4.425490 8.7194943 #> <= 4 5 9.047502 12.1379326 #> <= 5 5 4.054138 3.9650580 #> <= 7 5 4.715296 3.4694258 #> > 7 2 1.757544 0.8021677 #> #> Goodness-of-fit criteria #> 1-mle-ZIP 2-mle-pois #> Akaike's Information Criterion 138.2777 153.7397 #> Bayesian Information Criterion 141.0801 155.1409 # } # (18) examples with distributions in actuar (predefined starting values) # # \\donttest{ require(actuar) x <- c(2.3,0.1,2.7,2.2,0.4,2.6,0.2,1.,7.3,3.2,0.8,1.2,33.7,14., 21.4,7.7,1.,1.9,0.7,12.6,3.2,7.3,4.9,4000.,2.5,6.7,3.,63., 6.,1.6,10.1,1.2,1.5,1.2,30.,3.2,3.5,1.2,0.2,1.9,0.7,17., 2.8,4.8,1.3,3.7,0.2,1.8,2.6,5.9,2.6,6.3,1.4,0.8) #log logistic ft_llogis <- fitdist(x,'llogis') x <- c(0.3837053, 0.8576858, 0.3552237, 0.6226119, 0.4783756, 0.3139799, 0.4051403, 0.4537631, 0.4711057, 0.5647414, 0.6479617, 0.7134207, 0.5259464, 0.5949068, 0.3509200, 0.3783077, 0.5226465, 1.0241043, 0.4384580, 1.3341520) #inverse weibull ft_iw <- fitdist(x,'invweibull') # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Fitting of univariate distributions to censored data — fitdistcens","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Fits univariate distribution censored data maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"","code":"fitdistcens(censdata, distr, start=NULL, fix.arg=NULL, keepdata = TRUE, keepdata.nb=100, calcvcov=TRUE, ...) # S3 method for class 'fitdistcens' print(x, ...) # S3 method for class 'fitdistcens' plot(x, ...) # S3 method for class 'fitdistcens' summary(object, ...) # S3 method for class 'fitdistcens' logLik(object, ...) # S3 method for class 'fitdistcens' AIC(object, ..., k = 2) # S3 method for class 'fitdistcens' BIC(object, ...) # S3 method for class 'fitdistcens' vcov(object, ...) # S3 method for class 'fitdistcens' coef(object, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"censdata dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution, corresponding density function dname corresponding distribution function pname must defined, directly density function. start named list giving initial values parameters named distribution. argument may omitted distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood. x object class \"fitdistcens\". object object class \"fitdistcens\". keepdata logical. TRUE, dataset returned, otherwise sample subset returned. keepdata.nb keepdata=FALSE, length subset returned. calcvcov logical indicating (asymptotic) covariance matrix required. k penalty per parameter passed AIC generic function (2 default). ... arguments passed generic functions, function plotdistcens order control type ecdf-plot used censored data, function mledist order control optimization method.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Maximum likelihood estimations distribution parameters computed using function mledist. default direct optimization log-likelihood performed using optim, \"Nelder-Mead\" method distributions characterized one parameter \"BFGS\" method distributions characterized one parameter. algorithm used optim can chosen another optimization function can specified using ... argument (see mledist details). start may omitted (.e. NULL) classic distributions (see 'details' section mledist). Note errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1) ... argument. function able fit uniform distribution. parameter estimates, function returns log-likelihood standard errors estimates calculated Hessian solution found optim user-supplied function passed mledist. default (keepdata = TRUE), object returned fitdist contains data vector given input. dealing large datasets, can remove original dataset output setting keepdata = FALSE. case, keepdata.nb points () kept random subsampling keepdata.nb-4 points dataset adding component-wise minimum maximum. combined bootdistcens, aware bootstrap performed subset randomly selected fitdistcens. Currently, graphical comparisons multiple fits available framework. Weighted version estimation process available method = \"mle\" using weights=.... See corresponding man page details. yet possible take account weighths functions plotdistcens, plot.fitdistcens cdfcompcens (developments planned future). parameter(s) () estimated, gofstat allows compute goodness--fit statistics.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"fitdistcens returns object class \"fitdistcens\", list following components: estimate parameter estimates. method character string coding fitting method : \"mle\" 'maximum likelihood estimation'. sd estimated standard errors. cor estimated correlation matrix, NA numerically computable NULL available. vcov estimated variance-covariance matrix, NULL available. loglik log-likelihood. aic Akaike information criterion. bic -called BIC SBC (Schwarz Bayesian criterion). censdata censored data set. distname name distribution. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. dots list arguments passed ... used bootdistcens control optimization method used iterative calls mledist NULL arguments. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. discrete always FALSE. weights vector weigths used estimation process NULL. Generic functions: print print \"fitdist\" object shows traces fitting method fitted distribution. summary summary provides parameter estimates fitted distribution, log-likelihood, AIC BIC statistics, standard errors parameter estimates correlation matrix parameter estimates. plot plot object class \"fitdistcens\" returned fitdistcens uses function plotdistcens. logLik Extracts estimated log-likelihood \"fitdistcens\" object. AIC Extracts AIC \"fitdistcens\" object. BIC Extracts BIC \"fitdistcens\" object. vcov Extracts estimated var-covariance matrix \"fitdistcens\" object (available method = \"mle\"). coef Extracts fitted coefficients \"fitdistcens\" object.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"","code":"# (1) Fit of a lognormal distribution to bacterial contamination data # data(smokedfish) fitsf <- fitdistcens(smokedfish,\"lnorm\") summary(fitsf) #> Fitting of the distribution ' lnorm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> meanlog -3.627606 0.4637122 #> sdlog 3.544570 0.4876610 #> Loglikelihood: -90.65154 AIC: 185.3031 BIC: 190.5725 #> Correlation matrix: #> meanlog sdlog #> meanlog 1.0000000 -0.4325873 #> sdlog -0.4325873 1.0000000 #> # default plot using the Wang technique (see ?plotdiscens for details) plot(fitsf) # plot using the Turnbull algorithm (see ?plotdiscens for details) # with confidence intervals for the empirical distribution plot(fitsf, NPMLE = TRUE, NPMLE.method = \"Turnbull\", Turnbull.confint = TRUE) #> Warning: Turnbull is now a deprecated option for NPMLE.method. You should use Turnbull.middlepoints #> of Turnbull.intervals. It was here fixed as Turnbull.middlepoints, equivalent to former Turnbull. #> Warning: Q-Q plot and P-P plot are available only #> with the arguments NPMLE.method at Wang (default value) or Turnbull.intervals. # basic plot using intervals and points (see ?plotdiscens for details) plot(fitsf, NPMLE = FALSE) #> Warning: When NPMLE is FALSE the nonparametric maximum likelihood estimation #> of the cumulative distribution function is not computed. #> Q-Q plot and P-P plot are available only with the arguments NPMLE.method at Wang #> (default value) or Turnbull.intervals. # plot of the same fit using the Turnbull algorithm in logscale cdfcompcens(fitsf,main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", addlegend = FALSE,lines01 = TRUE, xlogscale = TRUE, xlim = c(1e-2,1e2)) # zoom on large values of F cdfcompcens(fitsf,main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", addlegend = FALSE,lines01 = TRUE, xlogscale = TRUE, xlim = c(1e-2,1e2),ylim=c(0.4,1)) # (2) Fit of a normal distribution on acute toxicity values # of fluazinam (in decimal logarithm) for # macroinvertebrates and zooplancton, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology # data(fluazinam) log10EC50 <-log10(fluazinam) fln <- fitdistcens(log10EC50,\"norm\") fln #> Fitting of the distribution ' norm ' on censored data by maximum likelihood #> Parameters: #> estimate #> mean 2.161449 #> sd 1.167290 summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 2.161449 0.3223366 #> sd 1.167290 0.2630390 #> Loglikelihood: -20.41212 AIC: 44.82424 BIC: 46.10235 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.1350237 #> sd 0.1350237 1.0000000 #> plot(fln) # (3) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view dedicated to # probability distributions # dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) fg <- fitdistcens(log10EC50,\"gumbel\",start=list(a=1,b=1)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. summary(fg) #> Error in eval(expr, envir, enclos): object 'fg' not found plot(fg) #> Error in eval(expr, envir, enclos): object 'fg' not found # (4) comparison of fits of various distributions # fll <- fitdistcens(log10EC50,\"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1518291 0.3222830 #> scale 0.6910423 0.1745231 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05097494 #> scale 0.05097494 1.00000000 #> cdfcompcens(list(fln,fll,fg),legendtext=c(\"normal\",\"logistic\",\"gumbel\"), xlab = \"log10(EC50)\") #> Error in eval(expr, envir, enclos): object 'fg' not found # (5) how to change the optimisation method? # fitdistcens(log10EC50,\"logis\",optim.method=\"Nelder-Mead\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1518291 #> scale 0.6910423 fitdistcens(log10EC50,\"logis\",optim.method=\"BFGS\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1519103 #> scale 0.6909664 fitdistcens(log10EC50,\"logis\",optim.method=\"SANN\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1569256 #> scale 0.6925307 # (6) custom optimisation function - example with the genetic algorithm # # \\donttest{ #wrap genoud function rgenoud package mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values=par, ...) standardres <- c(res, convergence=0) return(standardres) } # call fitdistcens with a 'custom' optimization function fit.with.genoud <- fitdistcens(log10EC50,\"logis\", custom.optim=mygenoud, nvars=2, Domains=cbind(c(0,0), c(5, 5)), boundary.enforcement=1, print.level=1, hessian=TRUE) #> #> #> Wed Aug 28 14:20:32 2024 #> Domains: #> 0.000000e+00 <= X1 <= 5.000000e+00 #> 0.000000e+00 <= X2 <= 5.000000e+00 #> #> Data Type: Floating Point #> Operators (code number, name, population) #> \t(1) Cloning........................... \t122 #> \t(2) Uniform Mutation.................. \t125 #> \t(3) Boundary Mutation................. \t125 #> \t(4) Non-Uniform Mutation.............. \t125 #> \t(5) Polytope Crossover................ \t125 #> \t(6) Simple Crossover.................. \t126 #> \t(7) Whole Non-Uniform Mutation........ \t125 #> \t(8) Heuristic Crossover............... \t126 #> \t(9) Local-Minimum Crossover........... \t0 #> #> HARD Maximum Number of Generations: 100 #> Maximum Nonchanging Generations: 10 #> Population size : 1000 #> Convergence Tolerance: 1.000000e-03 #> #> Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation. #> Checking Gradients before Stopping. #> Not Using Out of Bounds Individuals But Allowing Trespassing. #> #> Minimization Problem. #> #> #> Generation#\t Solution Value #> #> 0 \t2.076816e+01 #> 1 \t2.055391e+01 #> #> 'wait.generations' limit reached. #> No significant improvement in 10 generations. #> #> Solution Fitness Value: 2.055391e+01 #> #> Parameters at the Solution (parameter, gradient): #> #> X[ 1] :\t2.151910e+00\tG[ 1] :\t-4.027682e-06 #> X[ 2] :\t6.909667e-01\tG[ 2] :\t1.633468e-05 #> #> Solution Found Generation 1 #> Number of Generations Run 12 #> #> Wed Aug 28 14:20:33 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit.with.genoud) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1519100 0.3222568 #> scale 0.6909667 0.1744837 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05106485 #> scale 0.05106485 1.00000000 #> # } # (7) estimation of the mean of a normal distribution # by maximum likelihood with the standard deviation fixed at 1 using the argument fix.arg # flnb <- fitdistcens(log10EC50, \"norm\", start = list(mean = 1),fix.arg = list(sd = 1)) # (8) Fit of a lognormal distribution on acute toxicity values of fluazinam for # macroinvertebrates and zooplancton, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution (which is called the 5 percent hazardous concentration, HC5, # in ecotoxicology) and estimation of other quantiles. data(fluazinam) log10EC50 <-log10(fluazinam) fln <- fitdistcens(log10EC50,\"norm\") quantile(fln, probs = 0.05) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 #> estimate 0.2414275 quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 0.2414275 0.6655064 1.179033 # (9) Fit of a lognormal distribution on 72-hour acute salinity tolerance (LC50 values) # of riverine macro-invertebrates using maximum likelihood estimation data(salinity) log10LC50 <-log10(salinity) fln <- fitdistcens(log10LC50,\"norm\") plot(fln)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistrplus.html","id":null,"dir":"Reference","previous_headings":"","what":"Overview of the fitdistrplus package — fitdistrplus-package","title":"Overview of the fitdistrplus package — fitdistrplus-package","text":"idea package emerged 2008 collaboration JB Denis, R Pouillot ML Delignette time worked area quantitative risk assessment. implementation package part general project named \"Risk assessment R\" gathering different packages hosted R-forge. fitdistrplus package first written ML Delignette-Muller made available CRAN 2009 presented 2009 useR conference Rennes. months , C Dutang joined project starting participate implementation fitdistrplus package. package also presented 2011 useR conference 2eme rencontres R 2013 (https://r2013-lyon.sciencesconf.org/). Four vignettes available within package: general overview package published Journal Statistical Software (doi:10.18637/jss.v064.i04 ), document answering Frequently Asked Questions, document presenting benchmark optimization algorithms finding parameters, document starting values. fitdistrplus package general package aims helping fit univariate parametric distributions censored non-censored data. two main functions fitdist fit non-censored data fitdistcens fit censored data. choice candidate distributions fit may helped using functions descdist plotdist non-censored data plotdistcens censored data). Using functions fitdist fitdistcens, different methods can used estimate distribution parameters: maximum likelihood estimation default (mledist), moment matching estimation (mmedist), quantile matching estimation (qmedist), maximum goodness--fit estimation (mgedist). classical distributions initial values automatically calculated provided user. Graphical functions plotdist plotdistcens can used help manual calibration initial values parameters non-classical distributions. Function prefit proposed help definition good starting values special case constrained parameters. case maximum likelihood chosen estimation method, function llplot enables visualize loglikelihood contours. goodness--fit fitted distributions (single fit multiple fits) can explored using different graphical functions (cdfcomp, denscomp, qqcomp ppcomp non-censored data cdfcompcens censored data). Goodness--fit statistics also provided non-censored data using function gofstat. Bootstrap proposed quantify uncertainty parameter estimates (functions bootdist bootdistcens) also quantify uncertainty CDF quantiles estimated fitted distribution (quantile CIcdfplot).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistrplus.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Overview of the fitdistrplus package — fitdistrplus-package","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":null,"dir":"Reference","previous_headings":"","what":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"48-hour acute toxicity values (EC50 values) exposure macroinvertebrates zooplancton fluazinam.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"","code":"data(fluazinam)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"fluazinam data frame 2 columns named left right, describing observed EC50 value (micrograms per liter) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value noncensored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"Hose, G.C., Van den Brink, P.J. 2004. species sensitivity distribution approach compared microcosm study: case study fungicide fluazinam. Ecotoxicology Environmental Safety, 73, 109-122.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"","code":"# (1) load of data # data(fluazinam) # (2) plot of data using Turnbull cdf plot # log10EC50 <- log10(fluazinam) plotdistcens(log10EC50) # (3) fit of a lognormal and a logistic distribution to data # (classical distributions used for species sensitivity # distributions, SSD, in ecotoxicology) # and visual comparison of the fits using Turnbull cdf plot # fln <- fitdistcens(log10EC50, \"norm\") summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 2.161449 0.3223366 #> sd 1.167290 0.2630390 #> Loglikelihood: -20.41212 AIC: 44.82424 BIC: 46.10235 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.1350237 #> sd 0.1350237 1.0000000 #> fll <- fitdistcens(log10EC50, \"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1518291 0.3222830 #> scale 0.6910423 0.1745231 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05097494 #> scale 0.05097494 1.00000000 #> cdfcompcens(list(fln,fll), legendtext = c(\"normal\", \"logistic\"), xlab = \"log10(EC50)\") # (4) estimation of the 5 percent quantile value of # the normal fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # non parametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(EC50) bln <- bootdistcens(fln, niter = 101) HC5ln <- quantile(bln, probs = 0.05) # in EC50 10^(HC5ln$quantiles) #> p=0.05 #> estimate 1.743522 10^(HC5ln$quantCI) #> p=0.05 #> 2.5 % 0.2358685 #> 97.5 % 11.5133957 # (5) estimation of the HC5 value # with its one-sided 95 percent confidence interval (type \"greater\") # # in log10(EC50) HC5lnb <- quantile(bln, probs = 0.05, CI.type = \"greater\") # in LC50 10^(HC5lnb$quantiles) #> p=0.05 #> estimate 1.743522 10^(HC5lnb$quantCI) #> p=0.05 #> 5 % 0.3474539"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":null,"dir":"Reference","previous_headings":"","what":"Fictive survival dataset of a french Male population — fremale","title":"Fictive survival dataset of a french Male population — fremale","text":"100 male individuals randomly taken frefictivetable CASdatasets package","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fictive survival dataset of a french Male population — fremale","text":"","code":"data(fremale)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Fictive survival dataset of a french Male population — fremale","text":"fremale data frame 3 columns names AgeIn, AgeOut respectively entry age exit age; Death binary dummy: 1 indicating death individual; 0 censored observation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fictive survival dataset of a french Male population — fremale","text":"See full dataset frefictivetable CASdatasets http://dutangc.perso.math.cnrs.fr/RRepository/","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fictive survival dataset of a french Male population — fremale","text":"","code":"# (1) load of data # data(fremale) summary(fremale) #> AgeIn AgeOut Death #> Min. :23.87 Min. :30.20 Min. :0.0 #> 1st Qu.:47.29 1st Qu.:53.82 1st Qu.:1.0 #> Median :63.95 Median :69.49 Median :1.0 #> Mean :60.34 Mean :67.00 Mean :0.8 #> 3rd Qu.:72.00 3rd Qu.:80.23 3rd Qu.:1.0 #> Max. :89.17 Max. :97.11 Max. :1.0"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":null,"dir":"Reference","previous_headings":"","what":"Goodness-of-fit statistics — gofstat","title":"Goodness-of-fit statistics — gofstat","text":"Computes goodness--fit statistics parametric distributions fitted censored non-censored data set.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Goodness-of-fit statistics — gofstat","text":"","code":"gofstat(f, chisqbreaks, meancount, discrete, fitnames=NULL) # S3 method for class 'gofstat.fitdist' print(x, ...) # S3 method for class 'gofstat.fitdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Goodness-of-fit statistics — gofstat","text":"f object class \"fitdist\" (\"fitdistcens\" ), output function fitdist() (resp. \"fitdist()\"), \tlist \"fitdist\" objects, list \"fitdistcens\" objects. chisqbreaks usable non censored data, numeric vector defining breaks cells used compute chi-squared statistic. omitted, breaks automatically computed data order reach roughly number observations per cell, roughly equal argument meancount, sligthly ties. meancount usable non censored data, mean number observations per cell expected definition breaks cells used compute chi-squared statistic. argument taken account breaks directly defined argument chisqbreaks. chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells \\(n\\) length dataset. discrete TRUE, Chi-squared statistic information criteria computed. \tmissing, discrete passed first object class \"fitdist\" list f. \tcensored data argument ignored, censored data considered continuous. fitnames vector defining names fits. x object class \"gofstat.fitdist\" \"gofstat.fitdistcens\". ... arguments passed generic functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Goodness-of-fit statistics — gofstat","text":"type data (censored ), information criteria calculated. non censored data, added Goodness--fit statistics computed described . Chi-squared statistic computed using cells defined argument chisqbreaks cells automatically defined data, order reach roughly number observations per cell, roughly equal argument meancount, sligthly ties. choice define cells empirical distribution (data), theoretical distribution, done enable comparison Chi-squared values obtained different distributions fitted data set. chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells, \\(n\\) length data set (Vose, 2000). Chi-squared statistic computed program fails define enough cells due small dataset. Chi-squared statistic computed, degree freedom (nb cells - nb parameters - 1) corresponding distribution strictly positive, p-value Chi-squared test returned. continuous distributions, Kolmogorov-Smirnov, Cramer-von Mises \tAnderson-Darling statistics also computed, defined Stephens (1986). approximate Kolmogorov-Smirnov test performed assuming distribution parameters known. critical value defined Stephens (1986) completely specified distribution used reject distribution significance level 0.05. approximation, result test (decision rejection distribution ) returned data sets 30 observations. Note approximate test may conservative. data sets 5 observations distributions test described Stephens (1986) maximum likelihood estimations (\"exp\", \"cauchy\", \"gamma\" \"weibull\"), Cramer-von Mises Anderson-darling tests performed described Stephens (1986). tests take account fact parameters known estimated data maximum likelihood. result decision reject distribution significance level 0.05. tests available maximum likelihood estimations. recommended statistics automatically printed, .e. Cramer-von Mises, Anderson-Darling Kolmogorov statistics continuous distributions Chi-squared statistics discrete ones ( \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\" ). Results tests printed stored output function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Goodness-of-fit statistics — gofstat","text":"gofstat() returns object class \"gofstat.fitdist\" \"gofstat.fitdistcens\" following components sublist (aic, bic nbfit censored data) , chisq named vector Chi-squared statistics NULL computed chisqbreaks common breaks used define cells Chi-squared statistic chisqpvalue named vector p-values Chi-squared statistic NULL computed chisqdf named vector degrees freedom Chi-squared distribution NULL computed chisqtable table observed theoretical counts used Chi-squared calculations cvm named vector Cramer-von Mises statistics \"computed\" computed cvmtest named vector decisions Cramer-von Mises test \"computed\" computed ad named vector Anderson-Darling statistics \"computed\" computed adtest named vector decisions Anderson-Darling test \"computed\" computed ks named vector Kolmogorov-Smirnov statistic \"computed\" computed kstest named vector decisions Kolmogorov-Smirnov test \"computed\" computed aic named vector values Akaike's Information Criterion. bic named vector values Bayesian Information Criterion. discrete input argument automatic definition function first object class \"fitdist\" list input. nbfit Number fits argument.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Goodness-of-fit statistics — gofstat","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Stephens MA (1986), Tests based edf statistics. Goodness--fit techniques (D'Agostino RB Stephens MA, eds), Marcel Dekker, New York, pp. 97-194. Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Goodness-of-fit statistics — gofstat","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Goodness-of-fit statistics — gofstat","text":"","code":"# (1) fit of two distributions to the serving size data # by maximum likelihood estimation # and comparison of goodness-of-fit statistics # data(groundbeef) serving <- groundbeef$serving (fitg <- fitdist(serving, \"gamma\")) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 gofstat(fitg) #> Goodness-of-fit statistics #> 1-mle-gamma #> Kolmogorov-Smirnov statistic 0.1281486 #> Cramer-von Mises statistic 0.6936274 #> Anderson-Darling statistic 3.5672625 #> #> Goodness-of-fit criteria #> 1-mle-gamma #> Akaike's Information Criterion 2511.250 #> Bayesian Information Criterion 2518.325 (fitln <- fitdist(serving, \"lnorm\")) #> Fitting of the distribution ' lnorm ' by maximum likelihood #> Parameters: #> estimate Std. Error #> meanlog 4.1693701 0.03366988 #> sdlog 0.5366095 0.02380783 gofstat(fitln) #> Goodness-of-fit statistics #> 1-mle-lnorm #> Kolmogorov-Smirnov statistic 0.1493090 #> Cramer-von Mises statistic 0.8277358 #> Anderson-Darling statistic 4.5436542 #> #> Goodness-of-fit criteria #> 1-mle-lnorm #> Akaike's Information Criterion 2526.639 #> Bayesian Information Criterion 2533.713 gofstat(list(fitg, fitln)) #> Goodness-of-fit statistics #> 1-mle-gamma 2-mle-lnorm #> Kolmogorov-Smirnov statistic 0.1281486 0.1493090 #> Cramer-von Mises statistic 0.6936274 0.8277358 #> Anderson-Darling statistic 3.5672625 4.5436542 #> #> Goodness-of-fit criteria #> 1-mle-gamma 2-mle-lnorm #> Akaike's Information Criterion 2511.250 2526.639 #> Bayesian Information Criterion 2518.325 2533.713 # (2) fit of two discrete distributions to toxocara data # and comparison of goodness-of-fit statistics # data(toxocara) number <- toxocara$number fitp <- fitdist(number,\"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) fitnb <- fitdist(number,\"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb) gofstat(list(fitp, fitnb),fitnames = c(\"Poisson\",\"negbin\")) #> Chi-squared statistic: 31256.96 7.48606 #> Degree of freedom of the Chi-squared distribution: 5 4 #> Chi-squared p-value: 0 0.1123255 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo Poisson theo negbin #> <= 0 14 0.009014207 15.295027 #> <= 1 8 0.078236512 5.808596 #> <= 3 6 1.321767215 6.845015 #> <= 4 6 2.131297776 2.407815 #> <= 9 6 29.827829221 7.835196 #> <= 21 6 19.626223732 8.271110 #> > 21 7 0.005631339 6.537242 #> #> Goodness-of-fit criteria #> Poisson negbin #> Akaike's Information Criterion 1017.067 322.6882 #> Bayesian Information Criterion 1019.037 326.6288 # (3) Get Chi-squared results in addition to # recommended statistics for continuous distributions # set.seed(1234) x4 <- rweibull(n=1000,shape=2,scale=1) # fit of the good distribution f4 <- fitdist(x4,\"weibull\") plot(f4) # fit of a bad distribution f4b <- fitdist(x4,\"cauchy\") plot(f4b) (g <- gofstat(list(f4,f4b),fitnames=c(\"Weibull\", \"Cauchy\"))) #> Goodness-of-fit statistics #> Weibull Cauchy #> Kolmogorov-Smirnov statistic 0.02129364 0.114565 #> Cramer-von Mises statistic 0.06261917 1.854791 #> Anderson-Darling statistic 0.43120643 17.929123 #> #> Goodness-of-fit criteria #> Weibull Cauchy #> Akaike's Information Criterion 1225.734 1679.028 #> Bayesian Information Criterion 1235.549 1688.843 g$chisq #> Weibull Cauchy #> 35.76927 306.99824 g$chisqdf #> Weibull Cauchy #> 25 25 g$chisqpvalue #> Weibull Cauchy #> 7.517453e-02 2.364550e-50 g$chisqtable #> obscounts theo Weibull theo Cauchy #> <= 0.1547 36 27.86449 131.86592 #> <= 0.2381 36 34.87234 16.94381 #> <= 0.2952 36 30.58611 14.10775 #> <= 0.3745 36 50.14472 24.12899 #> <= 0.4323 36 41.16340 21.90706 #> <= 0.4764 36 33.55410 19.88887 #> <= 0.5263 36 39.57636 26.45041 #> <= 0.5771 36 41.67095 32.12597 #> <= 0.6276 36 42.36588 37.99145 #> <= 0.669 36 35.03524 35.92961 #> <= 0.7046 36 30.15737 34.26649 #> <= 0.7447 36 33.82481 41.80511 #> <= 0.7779 36 27.74805 36.41317 #> <= 0.8215 36 35.88169 48.69182 #> <= 0.8582 36 29.58833 40.27626 #> <= 0.9194 36 47.80044 62.45332 #> <= 0.9662 36 35.04387 42.03891 #> <= 1.017 36 36.19084 39.23047 #> <= 1.08 36 42.46698 40.45810 #> <= 1.119 36 24.49715 20.76625 #> <= 1.169 36 29.68482 22.91028 #> <= 1.237 36 36.49226 25.22891 #> <= 1.294 36 27.94301 17.49247 #> <= 1.418 36 51.25543 29.00440 #> <= 1.5 36 27.82405 14.64740 #> <= 1.65 36 38.72011 20.11799 #> <= 1.892 36 37.73807 21.69844 #> > 1.892 28 30.30916 81.16036 # and by defining the breaks (g <- gofstat(list(f4,f4b), chisqbreaks = seq(from = min(x4), to = max(x4), length.out = 10), fitnames=c(\"Weibull\", \"Cauchy\"))) #> Goodness-of-fit statistics #> Weibull Cauchy #> Kolmogorov-Smirnov statistic 0.02129364 0.114565 #> Cramer-von Mises statistic 0.06261917 1.854791 #> Anderson-Darling statistic 0.43120643 17.929123 #> #> Goodness-of-fit criteria #> Weibull Cauchy #> Akaike's Information Criterion 1225.734 1679.028 #> Bayesian Information Criterion 1235.549 1688.843 g$chisq #> Weibull Cauchy #> 6.532102 303.031817 g$chisqdf #> Weibull Cauchy #> 8 8 g$chisqpvalue #> Weibull Cauchy #> 5.878491e-01 9.318101e-61 g$chisqtable #> obscounts theo Weibull theo Cauchy #> <= 0.0264 1 0.9414531 111.941831 #> <= 0.3374 123 118.0587149 63.070591 #> <= 0.6483 222 240.3305518 167.852511 #> <= 0.9593 261 252.4491129 318.542341 #> <= 1.27 204 191.1128355 165.083876 #> <= 1.581 111 112.9380271 62.221846 #> <= 1.892 49 53.8525607 30.121634 #> <= 2.203 19 21.0847217 17.463676 #> <= 2.514 6 6.8505892 11.335604 #> <= 2.825 4 1.8602036 7.933114 #> > 2.825 0 0.5212296 44.432977 # (4) fit of two distributions on acute toxicity values # of fluazinam (in decimal logarithm) for # macroinvertebrates and zooplancton # and comparison of goodness-of-fit statistics # data(fluazinam) log10EC50 <-log10(fluazinam) (fln <- fitdistcens(log10EC50,\"norm\")) #> Fitting of the distribution ' norm ' on censored data by maximum likelihood #> Parameters: #> estimate #> mean 2.161449 #> sd 1.167290 plot(fln) gofstat(fln) #> #> Goodness-of-fit criteria #> 1-mle-norm #> Akaike's Information Criterion 44.82424 #> Bayesian Information Criterion 46.10235 (fll <- fitdistcens(log10EC50,\"logis\")) #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1518291 #> scale 0.6910423 plot(fll) gofstat(fll) #> #> Goodness-of-fit criteria #> 1-mle-logis #> Akaike's Information Criterion 45.10781 #> Bayesian Information Criterion 46.38593 gofstat(list(fll, fln), fitnames = c(\"loglogistic\", \"lognormal\")) #> #> Goodness-of-fit criteria #> loglogistic lognormal #> Akaike's Information Criterion 45.10781 44.82424 #> Bayesian Information Criterion 46.38593 46.10235"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"cdfcomp plots empirical cumulative distribution fitted distribution functions, denscomp plots histogram fitted density functions, qqcomp plots theoretical quantiles empirical ones, ppcomp plots theoretical probabilities empirical ones. cdfcomp able plot fits discrete distribution.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"","code":"cdfcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datapch, datacol, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, horizontals = TRUE, verticals = FALSE, do.points = TRUE, use.ppoints = TRUE, a.ppoints = 0.5, name.points = NULL, lines01 = FALSE, discrete, add = FALSE, plotstyle = \"graphics\", fitnbpts = 101, ...) denscomp(ft, xlim, ylim, probability = TRUE, main, xlab, ylab, datacol, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"topright\", ylegend = NULL, demp = FALSE, dempcol = \"black\", plotstyle = \"graphics\", discrete, fitnbpts = 101, fittype=\"l\", ...) qqcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fitpch, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, use.ppoints = TRUE, a.ppoints = 0.5, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, plotstyle = \"graphics\", ...) ppcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fitpch, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, use.ppoints = TRUE, a.ppoints = 0.5, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"ft One \"fitdist\" object list objects class \"fitdist\". xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot. See also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datapch integer specifying symbol used plotting data points. See also par. datacol specification color used plotting data points. See also par. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. See also par. fitlty (vector ) line type(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitpch (vector ) line type(s) plot fitted quantiles/probabilities. fewer values fits recycled standard fashion. See also par. fittype type plot fitted probabilities case discrete distributions: possible types \"p\" points, \"l\" lines \"o\" overplotted (plot.default). fittype used non-discrete distributions. fitnbpts numeric number points compute fitted probabilities cumulative probabilities. Default 101. addlegend TRUE, legend added plot. legendtext character expression vector length \\(\\ge 1\\) appear legend. See also legend. xlegend, ylegend \\(x\\) \\(y\\) coordinates used position legend. can specified keyword. plotstyle = \"graphics\", see xy.coords legend. plotstyle = \"ggplot\", xlegend keyword must one top, bottom, left, right. See also guide_legend ggplot2 horizontals TRUE, draws horizontal lines step empirical cumulative distribution function (ecdf). See also plot.stepfun. verticals TRUE, draws vertical lines empirical cumulative distribution function (ecdf). taken account horizontals=TRUE. .points TRUE (default), draws points x-locations. large dataset (n > 1e4), .points ignored point drawn. use.ppoints TRUE, probability points empirical distribution defined using function ppoints (1:n - .ppoints)/(n - 2a.ppoints + 1). FALSE, probability points simply defined (1:n)/n. argument ignored discrete data. .ppoints use.ppoints=TRUE, passed ppoints function. name.points Label vector points drawn .e. .points = TRUE (non censored data). lines01 logical plot two horizontal lines h=0 h=1 cdfcomp. line01 logical plot horizontal line \\(y=x\\) qqcomp ppcomp. line01col, line01lty Color line type line01. See also par. demp logical add empirical density plot, using density function. dempcol color empirical density case added plot (demp=TRUE). ynoise logical add small noise plotting empirical quantiles/probabilities qqcomp ppcomp. probability logical use probability scale denscomp. See also hist. discrete TRUE, distributions considered discrete. missing, discrete set TRUE least one object list ft discrete. add TRUE, adds already existing plot. FALSE, starts new plot. parameter available plotstyle = \"ggplot\". plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). ... graphical arguments passed graphical functions used cdfcomp, denscomp, ppcomp qqcomp plotstyle = \"graphics\". plotstyle = \"ggplot\", arguments used histogram plot (hist) denscomp function. plotstyle = \"ggplot\", graphical output can customized relevant ggplot2 functions store output.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"cdfcomp provides plot empirical distribution fitted distribution cdf, default using Hazen's rule empirical distribution, probability points defined (1:n - 0.5)/n. discrete TRUE, probability points always defined (1:n)/n. large dataset (n > 1e4), point drawn line ecdf drawn instead. Note horizontals, verticals .points FALSE, empirical point drawn, fitted cdf shown. denscomp provides density plot fitted distribution histogram data conyinuous data. discrete=TRUE, distributions considered discrete, histogram plotted demp forced TRUE fitted empirical probabilities plotted either vertical lines fittype=\"l\", single points fittype=\"p\" lines points fittype=\"o\". ppcomp provides plot probabilities fitted distribution (\\(x\\)-axis) empirical probabilities (\\(y\\)-axis) default defined (1:n - 0.5)/n (data assumed continuous). large dataset (n > 1e4), lines drawn instead pointss customized fitpch parameter. qqcomp provides plot quantiles theoretical distribution (\\(x\\)-axis) empirical quantiles data (\\(y\\)-axis), default defining probability points (1:n - 0.5)/n theoretical quantile calculation (data assumed continuous). large dataset (n > 1e4), lines drawn instead points customized fitpch parameter. default legend added plots. Many graphical arguments optional, dedicated personalize plots, fixed default values omitted.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"*comp returns list drawn points /lines plotstyle == \"graphics\" object class \"ggplot\" plotstyle == \"ggplot\".","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"Christophe Dutang, Marie-Laure Delignette-Muller Aurelie Siberchicot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"","code":"# (1) Plot various distributions fitted to serving size data # data(groundbeef) serving <- groundbeef$serving fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") cdfcomp(list(fitW, fitln, fitg), horizontals = FALSE) cdfcomp(list(fitW, fitln, fitg), horizontals = TRUE) cdfcomp(list(fitW, fitln, fitg), horizontals = TRUE, verticals = TRUE, datacol = \"purple\") cdfcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", ylab = \"F\", xlim = c(0, 250), xlegend = \"center\", lines01 = TRUE) denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\") ppcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlegend = \"bottomright\", line01 = TRUE) qqcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlegend = \"bottomright\", line01 = TRUE, xlim = c(0, 300), ylim = c(0, 300), fitpch = 16) # (2) Plot lognormal distributions fitted by # maximum goodness-of-fit estimation # using various distances (data plotted in log scale) # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV taxaATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa flnMGEKS <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"KS\") flnMGEAD <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD\") flnMGEADL <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"ADL\") flnMGEAD2L <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD2L\") cdfcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), xlogscale = TRUE, main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\"), verticals = TRUE, xlim = c(1, 100000), name.points=taxaATV) qqcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\"), xlogscale = TRUE, ylogscale = TRUE) ppcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\")) # (3) Plot normal and logistic distributions fitted by # maximum likelihood estimation # using various plotting positions in cdf plots # data(endosulfan) log10ATV <-log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") fll <- fitdist(log10ATV, \"logis\") # default plot using Hazen plotting position: (1:n - 0.5)/n cdfcomp(list(fln, fll), legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\") # plot using mean plotting position (named also Gumbel plotting position) # (1:n)/(n + 1) cdfcomp(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\", use.ppoints = TRUE, a.ppoints = 0) # plot using basic plotting position: (1:n)/n cdfcomp(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\", use.ppoints = FALSE) # (4) Comparison of fits of two distributions fitted to discrete data # data(toxocara) number <- toxocara$number fitp <- fitdist(number, \"pois\") fitnb <- fitdist(number, \"nbinom\") cdfcomp(list(fitp, fitnb), legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"l\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"p\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"o\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) # (5) Customizing of graphical output and use of ggplot2 # data(groundbeef) serving <- groundbeef$serving fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") if (requireNamespace (\"ggplot2\", quietly = TRUE)) { denscomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") cdfcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") qqcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") ppcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") } # customizing graphical output with graphics denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\", addlegend = FALSE) # customizing graphical output with ggplot2 if (requireNamespace (\"ggplot2\", quietly = TRUE)) { dcomp <- denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\", plotstyle = \"ggplot\", breaks = 20, addlegend = FALSE) dcomp + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Ground beef fits\") }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"cdfcompcens plots empirical cumulative distribution fitted distribution functions, qqcompcens plots theoretical quantiles empirical ones, ppcompcens plots theoretical probabilities empirical ones.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"","code":"cdfcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datacol, fillrect, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, lines01 = FALSE, Turnbull.confint = FALSE, NPMLE.method = \"Wang\", add = FALSE, plotstyle = \"graphics\", ...) qqcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fillrect, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, NPMLE.method = \"Wang\", plotstyle = \"graphics\", ...) ppcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fillrect, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, NPMLE.method = \"Wang\", plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"ft One \"fitdistcens\" object list objects class \"fitdistcens\". xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot, see also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datacol specification color used plotting data points. fillrect specification color used filling rectanges non uniqueness empirical cumulative distribution (used NPMLE.method equal \"Wang\" cdfcompcens). Fix NA want fill rectangles. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. fitlty (vector ) line type(s) plot fitted distributions. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions. fewer values fits recycled standard fashion. See also par. addlegend TRUE, legend added plot. legendtext character expression vector length \\(\\geq 1\\) appear legend, see also legend. xlegend, ylegend \\(x\\) \\(y\\) coordinates used position legend. can specified keyword. plotstyle = \"graphics\", see xy.coords legend. plotstyle = \"ggplot\", xlegend keyword must one top, bottom, left, right. See also guide_legend ggplot2 lines01 logical plot two horizontal lines h=0 h=1 cdfcompcens. Turnbull.confint TRUE confidence intervals added Turnbull plot. case NPMLE.method forced \"Turnbull\" NPMLE.method Three NPMLE techniques provided, \"Wang\", default one, rewritten package npsurv using function constrOptim package stats optimisation, \"Turnbull.middlepoints\", older one implemented package survival \"Turnbull.intervals\" uses Turnbull algorithm package survival associates interval equivalence class instead middlepoint interval (see details). \"Wang\" \"Turnbull.intervals\" enable derivation Q-Q plot P-P plot. add TRUE, adds already existing plot. FALSE, starts new plot. parameter available plotstyle = \"ggplot\". line01 logical plot horizontal line \\(y=x\\) qqcompcens ppcompcens. line01col, line01lty Color line type line01. See also par. ynoise logical add small noise plotting empirical quantiles/probabilities qqcompcens ppcompcens. ynoise used various fits plotted \"graphics\" plotstyle. Facets used instead \"ggplot\" plotstyle. plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). \"cdfcompcens\", \"ggplot\" graphics available \"Wang\" NPMLE technique. ... graphical arguments passed graphical functions used cdfcompcens, ppcompcens qqcompcens.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"See details plotdistcens detailed description provided goddness--fit plots.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"Turnbull BW (1974), Nonparametric estimation survivorship function doubly censored data. Journal American Statistical Association, 69, 169-173. Wang Y (2008), Dimension-reduced nonparametric maximum likelihood computation interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402. Wang Y Taylor SM (2013), Efficient computation nonparametric survival functions via hierarchical mixture formulation. Statistics Computing, 23, 713-725. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"","code":"# (1) Plot various distributions fitted to bacterial contamination data # data(smokedfish) Clog10 <- log10(smokedfish) fitsfn <- fitdistcens(Clog10,\"norm\") summary(fitsfn) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean -1.575392 0.2013872 #> sd 1.539446 0.2118026 #> Loglikelihood: -87.10945 AIC: 178.2189 BIC: 183.4884 #> Correlation matrix: #> mean sd #> mean 1.0000000 -0.4325228 #> sd -0.4325228 1.0000000 #> fitsfl <- fitdistcens(Clog10,\"logis\") summary(fitsfl) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location -1.5394230 0.1681236 #> scale 0.8121862 0.1332863 #> Loglikelihood: -86.45499 AIC: 176.91 BIC: 182.1794 #> Correlation matrix: #> location scale #> location 1.0000000 -0.3189915 #> scale -0.3189915 1.0000000 #> dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) fitsfg<-fitdistcens(Clog10,\"gumbel\",start=list(a=-3,b=3)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. summary(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # CDF plot cdfcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found cdfcompcens(list(fitsfn,fitsfl,fitsfg),datacol=\"orange\",fillrect = NA, legendtext=c(\"normal\",\"logistic\",\"Gumbel\"), main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", xlegend = \"bottom\",lines01 = TRUE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # alternative Turnbull plot for the empirical cumulative distribution # (default plot of the previous versions of the package) cdfcompcens(list(fitsfn,fitsfl,fitsfg), NPMLE.method = \"Turnbull.middlepoints\") #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # customizing graphical output with ggplot2 if (requireNamespace (\"ggplot2\", quietly = TRUE)) { cdfcompcens <- cdfcompcens(list(fitsfn,fitsfl,fitsfg),datacol=\"orange\",fillrect = NA, legendtext=c(\"normal\",\"logistic\",\"Gumbel\"), xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", xlegend = \"bottom\",lines01 = TRUE, plotstyle = \"ggplot\") cdfcompcens + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Bacterial contamination fits\") } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # PP plot ppcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found ppcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(2,2)) ppcompcens(fitsfn) ppcompcens(fitsfl) ppcompcens(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(1,1)) if (requireNamespace (\"ggplot2\", quietly = TRUE)) { ppcompcens(list(fitsfn,fitsfl,fitsfg), plotstyle = \"ggplot\") ppcompcens(list(fitsfn,fitsfl,fitsfg), plotstyle = \"ggplot\", fillrect = c(\"lightpink\", \"lightblue\", \"lightgreen\"), fitcol = c(\"red\", \"blue\", \"green\")) } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # QQ plot qqcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(2,2)) qqcompcens(fitsfn) qqcompcens(fitsfl) qqcompcens(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(1,1)) if (requireNamespace (\"ggplot2\", quietly = TRUE)) { qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE, plotstyle = \"ggplot\") qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE, plotstyle = \"ggplot\", fillrect = c(\"lightpink\", \"lightblue\", \"lightgreen\"), fitcol = c(\"red\", \"blue\", \"green\")) } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":null,"dir":"Reference","previous_headings":"","what":"Ground beef serving size data set — groundbeef","title":"Ground beef serving size data set — groundbeef","text":"Serving sizes collected French survey, ground beef patties consumed children 5 years old.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ground beef serving size data set — groundbeef","text":"","code":"data(groundbeef)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Ground beef serving size data set — groundbeef","text":"groundbeef data frame 1 column (serving: serving sizes grams)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Ground beef serving size data set — groundbeef","text":"Delignette-Muller, M.L., Cornu, M. 2008. Quantitative risk assessment Escherichia coli O157:H7 frozen ground beef patties consumed young children French households. International Journal Food Microbiology, 128, 158-164.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ground beef serving size data set — groundbeef","text":"","code":"# (1) load of data # data(groundbeef) # (2) description and plot of data # serving <- groundbeef$serving descdist(serving) #> summary statistics #> ------ #> min: 10 max: 200 #> median: 79 #> mean: 73.64567 #> estimated sd: 35.88487 #> estimated skewness: 0.7352745 #> estimated kurtosis: 3.551384 plotdist(serving) # (3) fit of a Weibull distribution to data # fitW <- fitdist(serving, \"weibull\") summary(fitW) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> plot(fitW) gofstat(fitW) #> Goodness-of-fit statistics #> 1-mle-weibull #> Kolmogorov-Smirnov statistic 0.1396646 #> Cramer-von Mises statistic 0.6840994 #> Anderson-Darling statistic 3.5736460 #> #> Goodness-of-fit criteria #> 1-mle-weibull #> Akaike's Information Criterion 2514.449 #> Bayesian Information Criterion 2521.524"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":null,"dir":"Reference","previous_headings":"","what":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"llplot plots (log)likelihood around estimation distributions fitted maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"","code":"llplot(mlefit, loglik = TRUE, expansion = 1, lseq = 50, back.col = TRUE, nlev = 10, pal.col = terrain.colors(100), fit.show = FALSE, fit.pch = 4, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"mlefit object class \"fitdist\" \"fitdistcens\" obtained maximum likelihood (method = \"mle\") loglik logical plot log-likelihood likelihood function. expansion expansion factor enlarge default range values explored parameter. lseq length sequences parameters. back.col logical (llsurface ). Contours plotted background gradient colors TRUE. nlev number contour levels plot. pal.col Palette colors. Colors used back (llsurface ). fit.show logical plot mle estimate. fit.pch type point used plot mle estimate. ... graphical arguments passed graphical functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"llplot plots (log)likelihood surface(s) (curve one estimated parameter) around maximum likelihood estimation. internally calls function llsurface llcurve. two estimated parameters, (log)likehood surface plotted combination two parameters, fixing ones estimated value. (log)likelihood surface, back.col image (2D-plot) used nlev > 0 contour (2D-plot) used add nlev contours. default range values explored estimated parameter 2 standard error around mle estimate range can expanded (contracted) using argument expansion.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"","code":"# (1) a distribution with one parameter # x <- rexp(50) fite <- fitdist(x, \"exp\") llplot(fite) llplot(fite, col = \"red\", fit.show = TRUE) llplot(fite, col = \"red\", fit.show = TRUE, loglik = FALSE) # (2) a distribution with two parameters # data(groundbeef) serving <- groundbeef$serving fitg <- fitdist(serving, \"gamma\") llplot(fitg) # \\donttest{ llplot(fitg, expansion = 2) llplot(fitg, pal.col = heat.colors(100), fit.show = TRUE) llplot(fitg, back.col = FALSE, nlev = 25, fit.show = TRUE) # } # (3) a distribution with two parameters with one fixed # fitg2 <- fitdist(serving, \"gamma\", fix.arg = list(rate = 0.5)) llplot(fitg2, fit.show = TRUE) # (4) a distribution with three parameters # # \\donttest{ data(endosulfan) ATV <-endosulfan$ATV library(\"actuar\") fBurr <- fitdist(ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) llplot(fBurr) llplot(fBurr, back.col = FALSE, fit.show = TRUE, fit.pch = 16) llplot(fBurr, nlev = 0, pal.col = rainbow(100), lseq = 100) # } # (5) a distribution with two parameters fitted on censored data # data(salinity) fsal <- fitdistcens(salinity, \"lnorm\") llplot(fsal, fit.show = TRUE) llplot(fsal, fit.show = TRUE, loglik = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":null,"dir":"Reference","previous_headings":"","what":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"llsurface plots likelihood surface distributions two parameters, llcurve plots likelihood curve distributions one parameters.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"","code":"llsurface(data, distr, plot.arg, min.arg, max.arg, lseq = 50, fix.arg = NULL, loglik = TRUE, back.col = TRUE, nlev = 10, pal.col = terrain.colors(100), weights = NULL, ...) llcurve(data, distr, plot.arg, min.arg, max.arg, lseq = 50, fix.arg = NULL, loglik = TRUE, weights = NULL, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"data numeric vector non censored data dataframe two columns respectively named left right, describing observed value interval censored data. case left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution corresponding density function dname corresponding distribution function pname must classically defined. plot.arg two-element vector names two parameters vary llsurface, one element llcurve. min.arg two-element vector lower plotting bounds llsurface, one element llcurve. max.arg two-element vector upper plotting bounds llsurface, one element llcurve. lseq length sequences parameters. fix.arg named list fixed value parameters. loglik logical plot log-likelihood likelihood function. back.col logical (llsurface ). Contours plotted background gradient colors TRUE. nlev number contour levels plot (llsurface ). pal.col Palette colors. Colors used back (llsurface ). weights optional vector weights used fitting process. NULL numeric vector strictly positive values (classically number occurences observation). ... graphical arguments passed graphical functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"two function intended called directly internally called llplot. llsurface plots likelihood surface distributions two varying parameters parameters fixed. back.col, image (2D-plot) used. nlev > 0, contour (2D-plot) used add nlev contours. llcurve plots likelihood curve distributions one varying parameter parameters fixed.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"","code":"# (1) loglikelihood or likelihood curve # n <- 100 set.seed(1234) x <- rexp(n) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4, loglik = FALSE) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4, main = \"log-likelihood for exponential distribution\", col = \"red\") abline(v = 1, lty = 2) # (2) loglikelihood surface # x <- rnorm(n, 0, 1) llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), back.col = FALSE, main=\"log-likelihood for normal distribution\") llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), main=\"log-likelihood for normal distribution\", nlev = 20, pal.col = heat.colors(100),) points(0, 1, pch=\"+\", col=\"red\") llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), main=\"log-likelihood for normal distribution\", nlev = 0, back.col = TRUE, pal.col = rainbow(100, s = 0.5, end = 0.8)) points(0, 1, pch=\"+\", col=\"black\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Fit univariate continuous distribution maximizing goodness--fit (minimizing distance) non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"","code":"mgedist(data, distr, gof = \"CvM\", start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. gof character string coding name goodness--fit distance used : \"CvM\" Cramer-von Mises distance, \"KS\" Kolmogorov-Smirnov distance, \"AD\" Anderson-Darling distance, \"ADR\", \"ADL\", \"AD2R\", \"AD2L\" \"AD2\" variants Anderson-Darling distance described Luceno (2006). start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. silent logical remove show warnings bootstraping. gradient function return gradient gof distance \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"mgedist function numerically maximizes goodness--fit, minimizes goodness--fit distance coded argument gof. One may use one classical distances defined Stephens (1986), Cramer-von Mises distance (\"CvM\"), Kolmogorov-Smirnov distance (\"KS\") Anderson-Darling distance (\"AD\") gives weight tails distribution, one variants last distance proposed Luceno (2006). right-tail AD (\"ADR\") gives weight right tail, left-tail AD (\"ADL\") gives weight left tail. Either tails, , can receive even larger weights using second order Anderson-Darling Statistics (using \"AD2R\", \"AD2L\" \"AD2\"). optimization process mledist, see 'details' section function. function intended called directly internally called fitdist bootdist. function intended used continuous distributions weighted maximum goodness--fit estimation allowed. NB: data values particularly small large, scaling may needed optimization process. See example (4).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"mgedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. gof code goodness--fit distance maximized.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Luceno (2006), Fitting generalized Pareto distribution data using maximum goodness--fit estimators. Computational Statistics Data Analysis, 51, 904-917, doi:10.1016/j.csda.2005.09.011 . Stephens MA (1986), Tests based edf statistics. Goodness--fit techniques (D'Agostino RB Stephens MA, eds), Marcel Dekker, New York, pp. 97-194. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"","code":"# (1) Fit of a Weibull distribution to serving size data by maximum # goodness-of-fit estimation using all the distances available # data(groundbeef) serving <- groundbeef$serving mgedist(serving, \"weibull\", gof=\"CvM\") #> $estimate #> shape scale #> 2.093204 82.660014 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.6556672 #> #> $hessian #> shape scale #> shape 4.05295367 0.09244476 #> scale 0.09244476 0.02418777 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.623 #> #> $gof #> [1] \"CvM\" #> mgedist(serving, \"weibull\", gof=\"KS\") #> $estimate #> shape scale #> 2.065634 81.450487 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.112861 #> #> $hessian #> shape scale #> shape 122.668263 6.509057 #> scale 6.509057 7.599584 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 127 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.975 #> #> $gof #> [1] \"KS\" #> mgedist(serving, \"weibull\", gof=\"AD\") #> $estimate #> shape scale #> 2.125473 82.890260 #> #> $convergence #> [1] 0 #> #> $value #> [1] 3.501035 #> #> $hessian #> shape scale #> shape 29.4165108 0.1823375 #> scale 0.1823375 0.1354409 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 63 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.392 #> #> $gof #> [1] \"AD\" #> mgedist(serving, \"weibull\", gof=\"ADR\") #> $estimate #> shape scale #> 2.072087 82.761868 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1.610479 #> #> $hessian #> shape scale #> shape 13.5240921 -0.33242262 #> scale -0.3324226 0.07977375 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.836 #> #> $gof #> [1] \"ADR\" #> mgedist(serving, \"weibull\", gof=\"ADL\") #> $estimate #> shape scale #> 2.197498 82.016005 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1.845939 #> #> $hessian #> shape scale #> shape 15.3272022 0.54407116 #> scale 0.5440712 0.05549883 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.415 #> #> $gof #> [1] \"ADL\" #> mgedist(serving, \"weibull\", gof=\"AD2R\") #> $estimate #> shape scale #> 1.90328 81.33464 #> #> $convergence #> [1] 0 #> #> $value #> [1] 11.56415 #> #> $hessian #> shape scale #> shape 334.61081 -10.4227495 #> scale -10.42275 0.5223167 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 67 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1259.112 #> #> $gof #> [1] \"AD2R\" #> mgedist(serving, \"weibull\", gof=\"AD2L\") #> $estimate #> shape scale #> 2.483836 78.252113 #> #> $convergence #> [1] 0 #> #> $value #> [1] 9.786977 #> #> $hessian #> shape scale #> shape 113.511932 4.1108355 #> scale 4.110836 0.2341312 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 69 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1265.933 #> #> $gof #> [1] \"AD2L\" #> mgedist(serving, \"weibull\", gof=\"AD2\") #> $estimate #> shape scale #> 2.081168 85.281194 #> #> $convergence #> [1] 0 #> #> $value #> [1] 26.95166 #> #> $hessian #> shape scale #> shape 534.9606 -10.5940982 #> scale -10.5941 0.7606462 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 67 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1256.313 #> #> $gof #> [1] \"AD2\" #> # (2) Fit of a uniform distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # set.seed(1234) u <- runif(100,min=5,max=10) mgedist(u,\"unif\",gof=\"CvM\") #> $estimate #> min max #> 4.788260 9.568912 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.1142423 #> #> $hessian #> min max #> min 2.906956 1.461523 #> max 1.461523 2.570923 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 59 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"CvM\" #> mgedist(u,\"unif\",gof=\"KS\") #> $estimate #> min max #> 4.664535 9.463995 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.08 #> #> $hessian #> min max #> min 43.06566 -33.35097 #> max -33.35097 -61.06933 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 29 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"KS\" #> # (3) Fit of a triangular distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # # \\donttest{ require(mc2d) set.seed(1234) t <- rtriang(100,min=5,mode=6,max=10) mgedist(t,\"triang\",start = list(min=4, mode=6,max=9),gof=\"CvM\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. #> $estimate #> min mode max #> 5.051036 5.796428 9.391579 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.06428299 #> #> $hessian #> min mode max #> min 3.051858 3.248860 1.522501 #> mode 3.248860 3.821007 1.800899 #> max 1.522501 1.800899 1.593900 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 106 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"CvM\" #> mgedist(t,\"triang\",start = list(min=4, mode=6,max=9),gof=\"KS\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. #> $estimate #> min mode max #> 4.939094 5.813200 9.248592 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.06191245 #> #> $hessian #> min mode max #> min 158.93759 158.9436 70.39038 #> mode 158.94358 199.0473 70.39510 #> max 70.39038 70.3951 106.08995 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 268 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"KS\" #> # } # (4) scaling problem # the simulated dataset (below) has particularly small values, hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 6:0) cat(i, try(mgedist(x*10^i,\"cauchy\")$estimate, silent=TRUE), \"\\n\") #> 6 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 5 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 4 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 3 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 2 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 1 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 0 Error in eval(expr, envir, enclos) : object 'x' not found #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum likelihood fit of univariate distributions — mledist","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Fit univariate distributions using maximum likelihood censored non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum likelihood fit of univariate distributions — mledist","text":"","code":"mledist(data, distr, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum likelihood fit of univariate distributions — mledist","text":"data numeric vector non censored data dataframe two columns respectively named left right, describing observed value interval censored data. case left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution corresponding density function dname corresponding distribution function pname must classically defined. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see details). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. optim.method \"default\" (see details) optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying MLE optimisation (see details). weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MLE used, otherwise ordinary MLE. silent logical remove show warnings bootstraping. gradient function return gradient log-likelihood \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum likelihood fit of univariate distributions — mledist","text":"function intended called directly internally called fitdist bootdist used maximum likelihood method fitdistcens bootdistcens. assumed distr argument specifies distribution probability density function cumulative distribution function (d, p). quantile function random generator function (q, r) may needed function mmedist, qmedist, mgedist, fitdist,fitdistcens, bootdistcens bootdist. following named distributions, reasonable starting values computed start omitted (.e. NULL) : \"norm\", \"lnorm\", \"exp\" \"pois\", \"cauchy\", \"gamma\", \"logis\", \"nbinom\" (parametrized mu size), \"geom\", \"beta\", \"weibull\" stats package; \"invgamma\", \"llogis\", \"invweibull\", \"pareto1\", \"pareto\", \"lgamma\", \"trgamma\", \"invtrgamma\" actuar package. Note starting values may good enough fit poor. function uses closed-form formula fit uniform distribution. start list, named list names d,p,q,r functions chosen distribution. start function data, function return named list names d,p,q,r functions chosen distribution. mledist function allows user set fixed values parameters. start, fix.arg list, named list names d,p,q,r functions chosen distribution. fix.arg function data, function return named list names d,p,q,r functions chosen distribution. custom.optim=NULL (default), maximum likelihood estimations distribution parameters computed R base optim constrOptim. finite bounds (lower=-Inf upper=Inf) supplied, optim used method specified optim.method. Note optim.method=\"default\" means optim.method=\"Nelder-Mead\" distributions least two parameters optim.method=\"BFGS\" distributions one parameter. finite bounds supplied (among lower upper) gradient != NULL, constrOptim used. finite bounds supplied (among lower upper) gradient == NULL, constrOptim used optim.method=\"Nelder-Mead\"; optim used optim.method=\"L-BFGS-B\" \"Brent\"; case, error raised (behavior constrOptim). errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1). custom.optim NULL, user-supplied function used instead R base optim. custom.optim must (least) following arguments fn function optimized, par initialized parameters. Internally function optimized also arguments, obs observations ddistname distribution name non censored data (Beware potential conflicts optional arguments custom.optim). assumed custom.optim carry MINIMIZATION. Finally, return least following components par estimate, convergence convergence code, value fn(par), hessian, counts number calls (function gradient) message (default NULL) error message custom.optim raises error, see returned value optim. See examples fitdist fitdistcens. Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary MLE carried , otherwise specified weights used balance log-likelihood contributions. yet possible take account weights functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat, descdist, bootdist, bootdistcens mgedist. (developments planned future). NB: data values particularly small large, scaling may needed optimization process. See Example (7).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum likelihood fit of univariate distributions — mledist","text":"mledist returns list following components, estimate parameter estimates. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. used fitdist estimate standard errors. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. method \"closed formula\" appropriate otherwise NULL.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum likelihood fit of univariate distributions — mledist","text":"","code":"# (1) basic fit of a normal distribution with maximum likelihood estimation # set.seed(1234) x1 <- rnorm(n=100) mledist(x1,\"norm\") #> $estimate #> mean sd #> -0.1567617 0.9993707 #> #> $convergence #> [1] 0 #> #> $value #> [1] 141.8309 #> #> $hessian #> mean sd #> mean 100.126 0.0000 #> sd 0.000 200.2538 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 43 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -141.8309 #> #> $vcov #> NULL #> # (2) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view dedicated to probability distributions dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) mledist(x1,\"gumbel\",start=list(a=10,b=5)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (3) fit of a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) mledist(x2,\"pois\") #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> [1] 46.18434 #> #> $hessian #> lambda #> lambda 17.64707 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 6 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $vcov #> NULL #> # (4) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) mledist(x3,\"beta\") #> $estimate #> shape1 shape2 #> 4.859798 10.918841 #> #> $convergence #> [1] 0 #> #> $value #> [1] -78.33052 #> #> $hessian #> shape1 shape2 #> shape1 16.295311 -6.542753 #> shape2 -6.542753 3.047900 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 47 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 78.33052 #> #> $vcov #> NULL #> # (5) fit frequency distributions on USArrests dataset. # x4 <- USArrests$Assault mledist(x4, \"pois\") #> $estimate #> lambda #> 170.76 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1211.705 #> #> $hessian #> lambda #> lambda 0.2928087 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 3 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1211.705 #> #> $vcov #> NULL #> mledist(x4, \"nbinom\") #> $estimate #> size mu #> 3.822579 170.747853 #> #> $convergence #> [1] 0 #> #> $value #> [1] 290.3297 #> #> $hessian #> size mu #> size 1.759308e+00 -1.993783e-05 #> mu -1.993783e-05 6.413003e-03 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 47 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -290.3297 #> #> $vcov #> NULL #> # (6) fit a continuous distribution (Gumbel) to censored data. # data(fluazinam) log10EC50 <-log10(fluazinam) # definition of the Gumbel distribution dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) mledist(log10EC50,\"gumbel\",start=list(a=0,b=2),optim.method=\"Nelder-Mead\") #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (7) scaling problem # the simulated dataset (below) has particularly small values, # hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 6:0) cat(i, try(mledist(x*10^i, \"cauchy\")$estimate, silent=TRUE), \"\\n\") #> 6 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 5 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 4 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 3 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 2 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 1 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 0 Error in eval(expr, envir, enclos) : object 'x' not found #> # (17) small example for the zero-modified geometric distribution # dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) #pdf x2 <- c(2, 4, 0, 40, 4, 21, 0, 0, 0, 2, 5, 0, 0, 13, 2) #simulated dataset initp1 <- function(x) list(p1=mean(x == 0)) #init as MLE mledist(x2, \"zmgeom\", fix.arg=initp1, start=list(p2=1/2)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'."},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Matching moment fit of univariate distributions — mmedist","title":"Matching moment fit of univariate distributions — mmedist","text":"Fit univariate distributions matching moments (raw centered) non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Matching moment fit of univariate distributions — mmedist","text":"","code":"mmedist(data, distr, order, memp, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Matching moment fit of univariate distributions — mmedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution (see 'details'). order numeric vector moment order(s). length vector must equal number parameters estimate. memp function implementing empirical moments, raw centered consistent distr argument (weights argument). See details . start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization . weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MME used, otherwise ordinary MME. silent logical remove show warnings bootstraping. gradient function return gradient squared difference \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Matching moment fit of univariate distributions — mmedist","text":"argument distr can one base R distributions: \"norm\", \"lnorm\", \"exp\" \"pois\", \"gamma\", \"logis\", \"nbinom\" , \"geom\", \"beta\" \"unif\". case, arguments data distr required, estimate computed closed-form formula. distributions characterized one parameter (\"geom\", \"pois\" \"exp\"), parameter simply estimated matching theoretical observed means, distributions characterized two parameters, parameters estimated matching theoretical observed means variances (Vose, 2000). Note closed-form formula, fix.arg used start ignored. argument distr can also distribution name long corresponding mdistr function exists, e.g. \"pareto\" \"mpareto\" exists. case arguments arguments order memp supplied order carry matching numerically, minimization sum squared differences observed theoretical moments. Optionnally arguments can supplied control optimization (see 'details' section mledist details arguments control optimization). case, fix.arg can used start taken account. non closed-form estimators, memp must provided compute empirical moments. weights=NULL, function must two arguments x, order: x numeric vector data order order moment. weights!=NULL, function must three arguments x, order, weights: x numeric vector data, order order moment, weights numeric vector weights. See examples . Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary MME carried , otherwise specified weights used compute (raw centered) weighted moments. closed-form estimators, weighted mean variance computed wtdmean wtdvar Hmisc package. numerical minimization used, weighted expected computed memp function. yet possible take account weighths functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). function intended called directly internally called fitdist bootdist used matching moments method. Since Version 1.2-0, mmedist automatically computes asymptotic covariance matrix using . Ibragimov R. 'minskii (1981), hence theoretical moments mdist defined order equals twice maximal order given order. instance, normal distribution, fit expectation variance need mnorm order \\(2\\times2=4\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Matching moment fit of univariate distributions — mmedist","text":"mmedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function (appropriate) name optimization function used maximum likelihood. optim.method (appropriate) optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. method either \"closed formula\" name optimization method. order order moment(s) matched. memp empirical moment function.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Matching moment fit of univariate distributions — mmedist","text":". Ibragimov R. 'minskii (1981), Statistical Estimation - Asymptotic Theory, Springer-Verlag, doi:10.1007/978-1-4899-0027-2 Evans M, Hastings N Peacock B (2000), Statistical distributions. John Wiley Sons Inc, doi:10.1002/9780470627242 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Matching moment fit of univariate distributions — mmedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Matching moment fit of univariate distributions — mmedist","text":"","code":"# (1) basic fit of a normal distribution with moment matching estimation # set.seed(1234) n <- 100 x1 <- rnorm(n=n) mmedist(x1, \"norm\") #> $estimate #> mean sd #> -0.1567617 0.9993707 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] -141.8309 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 2 #> #> $memp #> NULL #> #> $vcov #> NULL #> #weighted w <- c(rep(1, n/2), rep(10, n/2)) mmedist(x1, \"norm\", weights=w)$estimate #> Warning: weights are not taken into account in the default initial values #> mean sd #> 0.08565839 1.02915474 # (2) fit a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) mmedist(x2, \"pois\") #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 #> #> $memp #> NULL #> #> $vcov #> NULL #> # (3) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) mmedist(x3, \"beta\") #> $estimate #> shape1 shape2 #> 4.522734 10.219685 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] 78.19503 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 2 #> #> $memp #> NULL #> #> $vcov #> NULL #> # (4) fit a Pareto distribution # # \\donttest{ require(actuar) #simulate a sample x4 <- rpareto(1000, 6, 2) #empirical raw moment memp <- function(x, order) mean(x^order) memp2 <- function(x, order, weights) sum(x^order * weights)/sum(weights) #fit by MME mmedist(x4, \"pareto\", order=c(1, 2), memp=memp, start=list(shape=10, scale=10), lower=1, upper=Inf) #> $estimate #> shape scale #> 4.560423 1.464764 #> #> $convergence #> [1] 0 #> #> $value #> [1] 6.740714e-13 #> #> $hessian #> NULL #> #> $optim.function #> [1] \"constrOptim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 404 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -80.49091 #> #> $method #> [1] \"default\" #> #> $order #> [1] 1 2 #> #> $memp #> function(x, order) mean(x^order) #> #> #> $vcov #> NULL #> #fit by weighted MME w <- rep(1, length(x4)) w[x4 < 1] <- 2 mmedist(x4, \"pareto\", order=c(1, 2), memp=memp2, weights=w, start=list(shape=10, scale=10), lower=1, upper=Inf) #> Warning: weights are not taken into account in the default initial values #> $estimate #> shape scale #> 5.656694 1.630806 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.09812e-14 #> #> $hessian #> NULL #> #> $optim.function #> [1] \"constrOptim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> [1] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [38] 2 2 2 1 2 2 1 2 2 2 1 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [112] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 #> [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 #> [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 #> [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 #> [260] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 #> [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [334] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 2 #> [371] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 #> [408] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 #> [445] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 #> [482] 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 #> [519] 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 #> [556] 2 2 2 2 1 2 2 1 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [593] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [630] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 #> [667] 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 #> [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [741] 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 #> [778] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [815] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [852] 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 #> [889] 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [926] 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 #> [963] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 #> [1000] 2 #> #> $counts #> function gradient #> 767 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 119.7361 #> #> $method #> [1] \"default\" #> #> $order #> [1] 1 2 #> #> $memp #> function(x, order, weights) sum(x^order * weights)/sum(weights) #> #> #> $vcov #> NULL #> # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum spacing estimation of univariate distributions — msedist","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Fit univariate distribution maximizing (log) spacings non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum spacing estimation of univariate distributions — msedist","text":"","code":"msedist(data, distr, phidiv=\"KL\", power.phidiv=NULL, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights=NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum spacing estimation of univariate distributions — msedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. phidiv character string coding name phi-divergence used : \"KL\" Kullback-Leibler information (corresponds classic maximum spacing estimation), \"J\" Jeffreys' divergence, \"R\" Renyi's divergence, \"H\" Hellinger distance, \"V\" Vajda's measure information, see details. power.phidiv relevant, numeric power used phi-divergence : NULL phidiv=\"KL\" phidiv=\"J\" , positive different 1 phidiv=\"R\", greater equal 1 phidiv=\"H\" phidiv=\"V\", see details. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MSE used, otherwise ordinary MSE. silent logical remove show warnings bootstraping. gradient function return gradient gof distance \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum spacing estimation of univariate distributions — msedist","text":"msedist function numerically maximizes phi-divergence function spacings, spacings differences cumulative distribution function evaluated sorted dataset. classical maximum spacing estimation (MSE) introduced Cheng Amin (1986) Ranneby (1984) independently phi-diverence logarithm, see Anatolyev Kosenok (2005) link MSE maximum likelihood estimation. MSE generalized Ranneby Ekstrom (1997) allowing different phi-divergence function. Generalized MSE maximizes $$ S_n(\\theta)=\\frac{1}{n+1}\\sum_{=1}^{n+1} \\phi\\left(F(x_{()}; \\theta)-F(x_{(-1)}; \\theta) \\right), $$ \\(F(;\\theta)\\) parametric distribution function fitted, \\(\\phi\\) phi-divergence function, \\(x_{(1)}<\\dots0, \\alpha\\neq 1 $$ Hellinger distance (phidiv=\"H\" power.phidiv=p) $$\\phi(x)=-|1-x^{1/p}|^p \\textrm{ } p\\ge 1 $$ Vajda's measure information (phidiv=\"V\" power.phidiv=beta) $$\\phi(x)=-|1-x|^\\beta \\textrm{ } \\beta\\ge 1 $$ optimization process mledist, see 'details' section function. function intended called directly internally called fitdist bootdist. function intended used non-censored data. NB: data values particularly small large, scaling may needed optimization process, see mledist's examples.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum spacing estimation of univariate distributions — msedist","text":"msedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. phidiv character string coding name phi-divergence used either \"KL\", \"J\", \"R\", \"H\" \"V\". power.phidiv Either NULL numeric power used phi-divergence.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Anatolyev, S., Kosenok, G. (2005). alternative maximum likelihood based spacings. Econometric Theory, 21(2), 472-476, doi:10.1017/S0266466605050255 . Cheng, R.C.H. N..K. Amin (1983) Estimating parameters continuous univariate distributions shifted origin. Journal Royal Statistical Society Series B 45, 394-403, doi:10.1111/j.2517-6161.1983.tb01268.x . Ranneby, B. (1984) maximum spacing method: estimation method related maximum likelihood method. Scandinavian Journal Statistics 11, 93-112. Ranneby, B. Ekstroem, M. (1997). Maximum spacing estimates based different metrics. Umea universitet.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum spacing estimation of univariate distributions — msedist","text":"","code":"# (1) Fit of a Weibull distribution to serving size data by maximum # spacing estimation # data(groundbeef) serving <- groundbeef$serving msedist(serving, \"weibull\") #> $estimate #> shape scale #> 1.423799 80.894950 #> #> $convergence #> [1] 0 #> #> $value #> [1] 3.789824 #> #> $hessian #> shape scale #> shape 0.792656647 -0.0043440632 #> scale -0.004344063 0.0002995895 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 59 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1287.97 #> #> $phidiv #> [1] \"KL\" #> #> $power.phidiv #> NULL #> # (2) Fit of an exponential distribution # set.seed(123) x1 <- rexp(1e3) #the convergence is quick msedist(x1, \"exp\", control=list(trace=0, REPORT=1)) #> $estimate #> rate #> 0.967625 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.516802 #> #> $hessian #> rate #> rate 1.066843 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 12 2 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1029.544 #> #> $phidiv #> [1] \"KL\" #> #> $power.phidiv #> NULL #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot of empirical and theoretical distributions for non-censored data — plotdist","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Plots empirical distribution (non-censored data) theoretical one specified.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"","code":"plotdist(data, distr, para, histo = TRUE, breaks = \"default\", demp = FALSE, discrete, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. argument may omitted para omitted. para named list giving parameters named distribution. argument may omitted distr omitted. histo logical plot histogram using hist function. breaks \"default\" histogram plotted function hist default breaks definition. Else breaks passed function hist. argument taken account discrete TRUE. demp logical plot empirical density first plot (alone superimposed histogram depending value argument histo) using density function. discrete TRUE, distribution considered discrete. \tdistr discrete missing, discrete set \tFALSE. discrete missing distr, \tdiscrete set TRUE distr belongs \t\"binom\", \"nbinom\",\"geom\", \"hyper\" \"pois\". ... graphical arguments passed graphical functions used plotdist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Empirical , specified, theoretical distributions plotted density cdf. plot density, user can use arguments histo demp specify wants histogram using function hist, density plot using function density, (least one two arguments must put \"TRUE\"). continuous distributions, function hist used default breaks definition breaks \"default\" passing breaks argument differs \"default\". continuous distribution theoretical distribution specified arguments distname para, Q-Q plot (plot quantiles theoretical fitted distribution (x-axis) empirical quantiles data) P-P plot (.e. value data set, plot cumulative density function fitted distribution (x-axis) empirical cumulative density function (y-axis)) also given (Cullen Frey, 1999). function ppoints (default parameter argument ) used Q-Q plot, generate set probabilities evaluate inverse distribution. NOTE VERSION 0.4-3, ppoints also used P-P plot cdf plot continuous data. personalize four plots proposed continuous data, example change plotting position, recommend use functions cdfcomp, denscomp, qqcomp ppcomp.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"","code":"# (1) Plot of an empirical distribution with changing # of default line types for CDF and colors # and optionally adding a density line # set.seed(1234) x1 <- rnorm(n=30) plotdist(x1) plotdist(x1,demp = TRUE) plotdist(x1,histo = FALSE, demp = TRUE) #> Warning: arguments ‘freq’, ‘main’, ‘xlab’ are not made use of plotdist(x1, col=\"blue\", type=\"b\", pch=16) #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete plotdist(x1, type=\"s\") #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete # (2) Plot of a discrete distribution against data # set.seed(1234) x2 <- rpois(n=30, lambda = 2) plotdist(x2, discrete=TRUE) plotdist(x2, \"pois\", para=list(lambda = mean(x2))) plotdist(x2, \"pois\", para=list(lambda = mean(x2)), lwd=\"2\") # (3) Plot of a continuous distribution against data # xn <- rnorm(n=100, mean=10, sd=5) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn))) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), pch=16) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), demp = TRUE) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), histo = FALSE, demp = TRUE) # (4) Plot of serving size data # data(groundbeef) plotdist(groundbeef$serving, type=\"s\") #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete # (5) Plot of numbers of parasites with a Poisson distribution data(toxocara) number <- toxocara$number plotdist(number, discrete = TRUE) plotdist(number,\"pois\",para=list(lambda=mean(number)))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot of empirical and theoretical distributions for censored data — plotdistcens","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Plots empirical distribution censored data theoretical one specified.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"","code":"plotdistcens(censdata, distr, para, leftNA = -Inf, rightNA = Inf, NPMLE = TRUE, Turnbull.confint = FALSE, NPMLE.method = \"Wang\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"censdata dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution, corresponding density function dname corresponding distribution function pname must defined, directly density function. para named list giving parameters named distribution. argument may omitted distr omitted. leftNA real value left bound left censored observations : -Inf finite value 0 positive data example. rightNA real value right bound right censored observations : Inf finite value realistic maximum value. NPMLE TRUE NPMLE (nonparametric maximum likelihood estimate) technique used estimate cdf curve censored data previous arguments leftNA rightNA used (see details) Turnbull.confint TRUE confidence intervals added Turnbull plot. case NPMLE.method forced \"Turnbull.middlepoints\" NPMLE.method Three NPMLE techniques provided, \"Wang\", default one, rewritten package npsurv using function constrOptim package stats optimisation, \"Turnbull.middlepoints\", older one implemented package survival \"Turnbull.intervals\" uses Turnbull algorithm package survival associates interval equivalence class instead middlepoint interval (see details). \"Wang\" \"Turnbull.intervals\" enable derivation Q-Q plot P-P plot. ... graphical arguments passed methods. title plot can modified using argument main CDF plot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"NPMLE TRUE, NPMLE.method \"Wang\" , empirical distributions plotted cdf using either constrained Newton method (Wang, 2008) hierarchical constrained Newton method (Wang, 2013) compute overall empirical cdf curve. NPMLE TRUE, NPMLE.method \"Turnbull.intervals\" , empirical plotted cdf using EM approach Turnbull (Turnbull, 1974). two cases, grey rectangles represent areas empirical distribution function unique. cases theoretical distribution specified, two goodness--fit plots also provided, Q-Q plot (plot quantiles theoretical fitted distribution (x-axis) empirical quantiles data) P-P plot (.e. value data set, plot cumulative density function fitted distribution (x-axis) empirical cumulative density function (y-axis)). Grey rectangles Q-Q plot P-P plot also represent areas non uniqueness empirical quantiles probabilities, directly derived non uniqueness areas empirical cumulative distribution. NPMLE TRUE, NPMLE.method \"Turnbull.middlepoints\", empirical , specified, theoretical distributions plotted cdf using EM approach Turnbull (Turnbull, 1974) compute overall empirical cdf curve, confidence intervals Turnbull.confint TRUE, calls functions survfit plot.survfit survival package. NPMLE FALSE empirical , specified, theoretical distributions plotted cdf, data directly reported segments interval, left right censored data, points non-censored data. plotting, observations ordered rank r associated . Left censored observations ordered first, right bounds. Interval censored non censored observations ordered mid-points , last, right censored observations ordered left bounds. leftNA (resp. rightNA) finite, left censored (resp. right censored) observations considered interval censored observations ordered mid-points non-censored interval censored data. sometimes necessary fix rightNA leftNA realistic extreme value, even exactly known, obtain reasonable global ranking observations. ranking, n observations plotted point (one x-value) segment (interval possible x-values), y-value equal r/n, r rank observation global ordering previously described. second method may interesting certainly less rigorous methods prefered.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Turnbull BW (1974), Nonparametric estimation survivorship function doubly censored data. Journal American Statistical Association, 69, 169-173, doi:10.2307/2285518 . Wang Y (2008), Dimension-reduced nonparametric maximum likelihood computation interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402, doi:10.1016/j.csda.2007.10.018 . Wang Y Taylor SM (2013), Efficient computation nonparametric survival functions via hierarchical mixture formulation. Statistics Computing, 23, 713-725, doi:10.1007/s11222-012-9341-9 . Wang, Y., & Fani, S. (2018), Nonparametric maximum likelihood computation U-shaped hazard function. Statistics Computing, 28(1), 187-200, doi:10.1007/s11222-017-9724-z . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"","code":"# (1) Plot of an empirical censored distribution (censored data) as a CDF # using the default Wang method # data(smokedfish) d1 <- as.data.frame(log10(smokedfish)) plotdistcens(d1) # (2) Add the CDF of a normal distribution # plotdistcens(d1, \"norm\", para=list(mean = -1.6, sd = 1.5)) # (3) Various plots of the same empirical distribution # # default Wang plot with representation of equivalence classess plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Wang\") # same plot but using the Turnbull alorithm from the package survival plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Wang\") # Turnbull plot with middlepoints (as in the package survival) plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Turnbull.middlepoints\") # Turnbull plot with middlepoints and confidence intervals plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Turnbull.middlepoints\", Turnbull.confint = TRUE) # with intervals and points plotdistcens(d1,rightNA=3, NPMLE = FALSE) # with intervals and points # defining a minimum value for left censored values plotdistcens(d1,leftNA=-3, NPMLE = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":null,"dir":"Reference","previous_headings":"","what":"Pre-fitting procedure — prefit","title":"Pre-fitting procedure — prefit","text":"Search good starting values","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pre-fitting procedure — prefit","text":"","code":"prefit(data, distr, method = c(\"mle\", \"mme\", \"qme\", \"mge\"), feasible.par, memp=NULL, order=NULL, probs=NULL, qtype=7, gof=NULL, fix.arg=NULL, lower, upper, weights=NULL, silent=TRUE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pre-fitting procedure — prefit","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. method character string coding fitting method: \"mle\" 'maximum likelihood estimation', \"mme\" 'moment matching estimation', \"qme\" 'quantile matching estimation' \"mge\" 'maximum goodness--fit estimation'. feasible.par named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). may account closed-form formulas. order numeric vector moment order(s). length vector must equal number parameters estimate. memp function implementing empirical moments, raw centered consistent distr argument (weights argument). probs numeric vector probabilities quantile matching done. length vector must equal number parameters estimate. qtype quantile type used R quantile function compute empirical quantiles, (default 7 corresponds default quantile method R). gof character string coding name goodness--fit distance used : \"CvM\" Cramer-von Mises distance,\"KS\" Kolmogorov-Smirnov distance, \"AD\" Anderson-Darling distance, \"ADR\", \"ADL\", \"AD2R\", \"AD2L\" \"AD2\" variants Anderson-Darling distance described Luceno (2006). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. use argument possible method=\"mme\" closed-form formula used. weights optional vector weights used fitting process. NULL numeric vector. non-NULL, weighted MLE used, otherwise ordinary MLE. silent logical remove show warnings. lower Lower bounds parameters. upper Upper bounds parameters. ... arguments passed generic functions, one functions \"mledist\", \"mmedist\", \"qmedist\" \"mgedist\" depending chosen method. See mledist, mmedist, qmedist, mgedist details parameter estimation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Pre-fitting procedure — prefit","text":"Searching good starting values achieved transforming parameters (constraint interval real line) probability distribution. Indeed, positive parameters \\((0,Inf)\\) transformed using logarithm (typically scale parameter sd normal distribution, see Normal), parameters \\((1,Inf)\\) transformed using function \\(log(x-1)\\), probability parameters \\((0,1)\\) transformed using logit function \\(log(x/(1-x))\\) (typically parameter prob geometric distribution, see Geometric), negative probability parameters \\((-1,0)\\) transformed using function \\(log(-x/(1+x))\\), real parameters course transformed , typically mean normal distribution, see Normal. parameters transformed, optimization carried quasi-Newton algorithm (typically BFGS) transform back original parameter value.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pre-fitting procedure — prefit","text":"named list.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Pre-fitting procedure — prefit","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Pre-fitting procedure — prefit","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Pre-fitting procedure — prefit","text":"","code":"# (1) fit of a gamma distribution by maximum likelihood estimation # x <- rgamma(1e3, 5/2, 7/2) prefit(x, \"gamma\", \"mle\", list(shape=3, scale=3), lower=-Inf, upper=Inf) #> $shape #> [1] 2.57829 #> #> $scale #> [1] 3.559245 #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Quantile matching fit of univariate distributions — qmedist","title":"Quantile matching fit of univariate distributions — qmedist","text":"Fit univariate distribution matching quantiles non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Quantile matching fit of univariate distributions — qmedist","text":"","code":"qmedist(data, distr, probs, start = NULL, fix.arg = NULL, qtype = 7, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Quantile matching fit of univariate distributions — qmedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. probs numeric vector probabilities quantile matching done. length vector must equal number parameters estimate. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. qtype quantile type used R quantile function compute empirical quantiles, (default 7 corresponds default quantile method R). optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted QME used, otherwise ordinary QME. silent logical remove show warnings bootstraping. gradient function return gradient squared difference \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Quantile matching fit of univariate distributions — qmedist","text":"qmedist function carries quantile matching numerically, minimization sum squared differences observed theoretical quantiles. Note discrete distribution, sum squared differences step function consequently, optimum unique, see FAQ. optimization process mledist, see 'details' section function. Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary QME carried , otherwise specified weights used compute weighted quantiles used squared differences. Weigthed quantiles computed wtdquantile Hmisc package. yet possible take account weighths functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). function intended called directly internally called fitdist bootdist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Quantile matching fit of univariate distributions — qmedist","text":"qmedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. probs probability vector quantiles matched.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Quantile matching fit of univariate distributions — qmedist","text":"Klugman SA, Panjer HH Willmot GE (2012), Loss Models: Data Decissions, 4th edition. Wiley Series Statistics Finance, Business Economics, p. 253, doi:10.1198/tech.2006.s409 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Quantile matching fit of univariate distributions — qmedist","text":"Christophe Dutang Marie Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Quantile matching fit of univariate distributions — qmedist","text":"","code":"# (1) basic fit of a normal distribution # set.seed(1234) x1 <- rnorm(n=100) qmedist(x1, \"norm\", probs=c(1/3, 2/3)) #> $estimate #> mean sd #> -0.3025734 0.8521385 #> #> $convergence #> [1] 0 #> #> $value #> [1] 4.855518e-10 #> #> $hessian #> mean sd #> mean 4.000000e+00 -5.569326e-14 #> sd -5.569326e-14 7.421040e-01 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 57 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -146.1278 #> #> $probs #> [1] 0.3333333 0.6666667 #> # (2) defining your own distribution functions, here for the Gumbel # distribution for other distributions, see the CRAN task view dedicated # to probability distributions dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) qgumbel <- function(p, a, b) a - b*log(-log(p)) qmedist(x1, \"gumbel\", probs=c(1/3, 2/3), start=list(a=10,b=5)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (3) fit a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) qmedist(x2, \"pois\", probs=1/2) #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.25 #> #> $hessian #> lambda #> lambda 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 1 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $probs #> [1] 0.5 #> # (4) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) qmedist(x3, \"beta\", probs=c(1/3, 2/3)) #> $estimate #> shape1 shape2 #> 5.820826 14.053655 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.779463e-12 #> #> $hessian #> shape1 shape2 #> shape1 0.005429533 -0.0021926587 #> shape2 -0.002192659 0.0008954389 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 89 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 76.02016 #> #> $probs #> [1] 0.3333333 0.6666667 #> # (5) fit frequency distributions on USArrests dataset. # x4 <- USArrests$Assault qmedist(x4, \"pois\", probs=1/2) #> $estimate #> lambda #> 170.76 #> #> $convergence #> [1] 0 #> #> $value #> [1] 144 #> #> $hessian #> lambda #> lambda 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 1 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1211.705 #> #> $probs #> [1] 0.5 #> qmedist(x4, \"nbinom\", probs=c(1/3, 2/3)) #> $estimate #> size mu #> 2.518966 182.313344 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.2222222 #> #> $hessian #> size mu #> size 0 0 #> mu 0 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 37 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -292.5969 #> #> $probs #> [1] 0.3333333 0.6666667 #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":null,"dir":"Reference","previous_headings":"","what":"Quantile estimation from a fitted distribution — quantile","title":"Quantile estimation from a fitted distribution — quantile","text":"Quantile estimation fitted distribution, optionally confidence intervals calculated bootstrap result.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Quantile estimation from a fitted distribution — quantile","text":"","code":"# S3 method for class 'fitdist' quantile(x, probs = seq(0.1, 0.9, by=0.1), ...) # S3 method for class 'fitdistcens' quantile(x, probs = seq(0.1, 0.9, by=0.1), ...) # S3 method for class 'bootdist' quantile(x, probs = seq(0.1, 0.9, by=0.1),CI.type = \"two.sided\", CI.level = 0.95, ...) # S3 method for class 'bootdistcens' quantile(x, probs = seq(0.1, 0.9, by=0.1),CI.type = \"two.sided\", CI.level = 0.95, ...) # S3 method for class 'quantile.fitdist' print(x, ...) # S3 method for class 'quantile.fitdistcens' print(x, ...) # S3 method for class 'quantile.bootdist' print(x, ...) # S3 method for class 'quantile.bootdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Quantile estimation from a fitted distribution — quantile","text":"x object class \"fitdist\", \"fitdistcens\", \"bootdist\", \"bootdistcens\" \"quantile.fitdist\", \"quantile.fitdistcens\", \"quantile.bootdist\", \"quantile.bootdistcens\" print generic function. probs numeric vector probabilities values [0, 1] quantiles must calculated. CI.type Type confidence intervals : either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. ... arguments passed generic functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Quantile estimation from a fitted distribution — quantile","text":"Quantiles parametric distribution calculated probability specified probs, using estimated parameters. used object class \"bootdist\" \"bootdistcens\", percentile confidence intervals medians etimates also calculated bootstrap result. CI.type two.sided, CI.level two-sided confidence intervals quantiles calculated. CI.type less greater, CI.level one-sided confidence intervals quantiles calculated. print functions show estimated quantiles percentile confidence intervals median estimates bootstrap resampling done previously, number bootstrap iterations estimation converges inferior whole number bootstrap iterations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Quantile estimation from a fitted distribution — quantile","text":"quantile returns list 2 components (first two described ) called object class \"fitdist\" \"fitdistcens\" 8 components (described ) called object class \"bootdist\" \"bootdistcens\" : quantiles dataframe containing estimated quantiles probability value specified argument probs (one row, many columns values probs). probs numeric vector probabilities quantiles calculated. bootquant data frame containing bootstraped values quantile (many rows, specified call bootdist argument niter, many columns values probs) quantCI CI.type two.sided, two bounds CI.level percent two.sided confidence interval quantile (two rows many columns values probs). CI.type less, right bound CI.level percent one.sided confidence interval quantile (one row). CI.type greater, left bound CI.level percent one.sided confidence interval quantile (one row). quantmedian Median bootstrap estimates (per probability). CI.type Type confidence interval: either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. nbboot number samples drawn bootstrap. nbconverg number iterations optimization algorithm converges.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Quantile estimation from a fitted distribution — quantile","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Quantile estimation from a fitted distribution — quantile","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Quantile estimation from a fitted distribution — quantile","text":"","code":"# (1) Fit of a normal distribution on acute toxicity log-transformed values of # endosulfan for nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, followed with calculations of their # confidence intervals with various definitions, from a small number of bootstrap # iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 bln <- bootdist(fln, bootmethod=\"param\", niter=101) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.206058 1.615810 2.040136 #> 97.5 % 2.372660 2.617113 2.937556 quantile(bln, probs = c(0.05, 0.1, 0.2), CI.type = \"greater\") #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> left bound of one-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.299871 1.64396 2.126053 quantile(bln, probs = c(0.05, 0.1, 0.2), CI.level = 0.9) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> two-sided 90 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.299871 1.643960 2.126053 #> 95 % 2.297746 2.565286 2.894080 # (2) Draw of 95 percent confidence intervals on quantiles of the # previously fitted distribution # cdfcomp(fln) q1 <- quantile(bln, probs = seq(0,1,length=101)) points(q1$quantCI[1,],q1$probs,type=\"l\") points(q1$quantCI[2,],q1$probs,type=\"l\") # (2b) Draw of 95 percent confidence intervals on quantiles of the # previously fitted distribution # using the NEW function CIcdfplot # CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"pink\") # (3) Fit of a distribution on acute salinity log-transformed tolerance # for riverine macro-invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, followed with calculations of # their confidence intervals with various definitions. # from a small number of bootstrap iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(salinity) log10LC50 <-log10(salinity) flncens <- fitdistcens(log10LC50,\"norm\") quantile(flncens, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 blncens <- bootdistcens(flncens, niter = 101) quantile(blncens, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.057448 1.138889 1.239646 #> 97.5 % 1.203538 1.270419 1.355852 quantile(blncens, probs = c(0.05, 0.1, 0.2), CI.type = \"greater\") #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> left bound of one-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.062249 1.145186 1.245616 quantile(blncens, probs = c(0.05, 0.1, 0.2), CI.level = 0.9) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> two-sided 90 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.062249 1.145186 1.245616 #> 95 % 1.195896 1.266786 1.346183"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":null,"dir":"Reference","previous_headings":"","what":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"72-hour acute salinity tolerance (LC50 values) riverine macro-invertebrates.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"","code":"data(salinity)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"salinity data frame 2 columns named left right, describing observed LC50 value (electrical condutivity, millisiemens per centimeter) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value noncensored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"Kefford, B.J., Nugegoda, D., Metzeling, L., Fields, E. 2006. Validating species sensitivity distributions using salinity tolerance riverine macroinvertebrates southern Murray-darling Basin (Vitoria, Australia). Canadian Journal Fisheries Aquatic Science, 63, 1865-1877.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"","code":"# (1) load of data # data(salinity) # (2) plot of data using Turnbull cdf plot # log10LC50 <- log10(salinity) plotdistcens(log10LC50) # (3) fit of a normal and a logistic distribution to data in log10 # (classical distributions used for species sensitivity # distributions, SSD, in ecotoxicology)) # and visual comparison of the fits using Turnbull cdf plot # fln <- fitdistcens(log10LC50, \"norm\") summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 1.4702582 0.02817044 #> sd 0.2154709 0.02369006 #> Loglikelihood: -61.79623 AIC: 127.5925 BIC: 132.9567 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.2937484 #> sd 0.2937484 1.0000000 #> fll <- fitdistcens(log10LC50, \"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 1.4761562 0.02822706 #> scale 0.1269994 0.01543956 #> Loglikelihood: -62.81293 AIC: 129.6259 BIC: 134.9901 #> Correlation matrix: #> location scale #> location 1.0000000 0.2024688 #> scale 0.2024688 1.0000000 #> cdfcompcens(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10(LC50)\", xlim = c(0.5, 2), lines01 = TRUE) # (4) estimation of the 5 percent quantile value of # the normal fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # non parametric bootstrap # from a small number of bootstrap iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(LC50) bln <- bootdistcens(fln, niter = 101) HC5ln <- quantile(bln, probs = 0.05) # in LC50 10^(HC5ln$quantiles) #> p=0.05 #> estimate 13.0569 10^(HC5ln$quantCI) #> p=0.05 #> 2.5 % 11.08712 #> 97.5 % 15.50325 # (5) estimation of the HC5 value # with its one-sided 95 percent confidence interval (type \"greater\") # # in log10(LC50) HC5lnb <- quantile(bln, probs = 0.05, CI.type = \"greater\") # in LC50 10^(HC5lnb$quantiles) #> p=0.05 #> estimate 13.0569 10^(HC5lnb$quantCI) #> p=0.05 #> 5 % 11.31157"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":null,"dir":"Reference","previous_headings":"","what":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"Contamination data Listeria monocytogenes smoked fish Belgian market period 2005 2007.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"","code":"data(smokedfish)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"smokedfish data frame 2 columns named left right, describing observed value Listeria monocytogenes concentration (CFU/g) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"Busschaert, P., Geereard, .H., Uyttendaele, M., Van Impe, J.F., 2010. Estimating distributions qualitative (semi) quantitative microbiological contamination data use risk assessment. International Journal Food Microbiology. 138, 260-269.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"","code":"# (1) load of data # data(smokedfish) # (2) plot of data in CFU/g # plotdistcens(smokedfish) # (3) plot of transformed data in log10[CFU/g] # Clog10 <- log10(smokedfish) plotdistcens(Clog10) # (4) Fit of a normal distribution to data in log10[CFU/g] # fitlog10 <- fitdistcens(Clog10, \"norm\") summary(fitlog10) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean -1.575392 0.2013872 #> sd 1.539446 0.2118026 #> Loglikelihood: -87.10945 AIC: 178.2189 BIC: 183.4884 #> Correlation matrix: #> mean sd #> mean 1.0000000 -0.4325228 #> sd -0.4325228 1.0000000 #> plot(fitlog10)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":null,"dir":"Reference","previous_headings":"","what":"Parasite abundance in insular feral cats — toxocara","title":"Parasite abundance in insular feral cats — toxocara","text":"Toxocara cati abundance feral cats living Kerguelen island.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Parasite abundance in insular feral cats — toxocara","text":"","code":"data(toxocara)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Parasite abundance in insular feral cats — toxocara","text":"toxocara data frame 1 column (number: number parasites digestive tract)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Parasite abundance in insular feral cats — toxocara","text":"Fromont, E., Morvilliers, L., Artois, M., Pontier, D. 2001. Parasite richness abundance insular mainland feral cats. Parasitology, 123, 143-151.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Parasite abundance in insular feral cats — toxocara","text":"","code":"# (1) load of data # data(toxocara) # (2) description and plot of data # number <- toxocara$number descdist(number, discrete = TRUE, boot = 11) #> summary statistics #> ------ #> min: 0 max: 75 #> median: 2 #> mean: 8.679245 #> estimated sd: 14.29332 #> estimated skewness: 2.630609 #> estimated kurtosis: 11.4078 plotdist(number, discrete = TRUE) # (3) fit of a Poisson distribution to data # fitp <- fitdist(number, \"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) # (4) fit of a negative binomial distribution to data # fitnb <- fitdist(number, \"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb)"},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-12-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.2-2","title":"fitdistrplus 1.2-2","text":"NEW FEATURES website bringing together resources related fitdistrplus package now exists github.io following URL: https://lbbe-software.github.io/fitdistrplus/ BUG FIX default starting value gamma distribution wrongly computed rate parameter.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-12-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.2-1","title":"fitdistrplus 1.2-1","text":"CRAN release: 2024-07-12 NEW FEATURES fitdistrplus git repo now belongs lbbe-software organization modify add initial value univariate distributions provided actuar. create new vignette regarding default initial values. add generic functions AIC() BIC() fitdist fitdistcens objects. make gofstat() work fitdistcens objects (giving AIC BIC values). add calculation hessian using optimHess within fitdist given optim. compute asymptotic covariance matrix MME : Now theoretical moments m defined order equals twice maximal order given order. add new argument calcvcov order (dis)able computation covariance matrix method. graphics function *comp() now return list drawn points /lines plotstyle == \"graphics\". add density function bootdist(cens) objects. add DOIs man pages. BUG FIXES scale parameter fixed, startarg function also set rate parameter. leads error calling density. add sanity check plotdistcens: following code plotdistcens(data.frame(right=smokedfish$right, left=smokedfish$left)) raised error via npsurv(), thanks R. Pouillot. bug fixed using breaks plotdist. solve extremely long time taking lines descdist. add defensive programming input data (check NA, NaN, Inf values). correct links man pages URL DOI. remove use plot.np vignettes.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-11","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-11","title":"fitdistrplus 1.1-11","text":"CRAN release: 2023-04-25 NEW FEATURES add print argument descdist function allow plot skewness-kurtosis graph, without printing computed parameters BUG FIX use deprecated ggplot2 functions updated use deprecated BibTeX entries updated bug fixed drawing CI lines CIcdfcplot ggplot2 called bug fixed drawing horizontal lines cdfcompcens","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-8","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-8","title":"fitdistrplus 1.1-8","text":"CRAN release: 2022-03-10 WARNING FIX update URL fitdistrplus.Rd replace (class(x) == XX) (inherits(x, XX)) replace dontrun tags donttest examples rd files BUG FIX fix error t-detectbound.R producing “failure: length > 1 coercion logical” reported Brian Ripley","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-6","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-6","title":"fitdistrplus 1.1-6","text":"CRAN release: 2021-09-28 NEW FEATURES new function Surv2fitdistcens() format data use fitdistcens() format used survival package new dataset fremale order illustrate Surv2fitdistcens() support use ggplot2 CIcdfplot add taxon names endosulfan dataset new argument name.points cdfcomp CIcdfplot add labels next points","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-5","title":"fitdistrplus 1.1-5","text":"CRAN release: 2021-05-28 WARNING FIX reduce testing times test files","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-3","title":"fitdistrplus 1.1-3","text":"CRAN release: 2020-12-05 NEW FEATURE take account fix.arg uniform distribution BUG FIXES add loglikelihood value uniform distribution (mledist()) correct usage triple dots argument llsurface() fix error ppcomp() qqcomp() raised large dataset","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-1","title":"fitdistrplus 1.1-1","text":"CRAN release: 2020-05-19 NEW FEATURES add internal functions cope problems lack maintenance package npsurv remove dependence package remove deprecated argument Turnbull plotdistcens()","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-14","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-14","title":"fitdistrplus 1.0-14","text":"CRAN release: 2019-01-23 NEW FEATURES add new estimation method called maximum spacing estimation via msedist()","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-13","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-13","title":"fitdistrplus 1.0-13","text":"BUG FIXES fix issues coming noLD (–disable-long-double) configuration R","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-12","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-12","title":"fitdistrplus 1.0-12","text":"BUG FIXES bug fixed qmedist() fitdistcens() raised error checkparamlist(). bug fixed testdpqfun() assumes first argument d,p,q,r functions exactly base R.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-11","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-11","title":"fitdistrplus 1.0-11","text":"CRAN release: 2018-09-10 NEW FEATURES update FAQ beta(,). improve graphics discrete distributions denscomp(). improve automatic naming legends xxxcomp(). harmonize outputs mledist(), qmedist(), mmedist(), mgedist(), fitdist() fitdistcens(). automatic test d, p, q functions fitdist() raise warnings. improve test starting fixed values. add new default starting values distributions actuar. change default CDF plot censored data, using Wang NPMLE algorithm provided package npsurv (plotdistcens() cdfcompcens()) add two new goodness--fit plots (QQ-plot PP-plot) censored data (cf. plotdistcens, qqcompcens ppcompcens). add part dedicated censored datain FAQ vignette. homogeneization xlim ylim default definition plotdistcens. Removing name first argument calls dpq functions order make package compatible distributions defined non classical name first argument (resp. x, q, p d, p, q functions). add possibility change title CDF plot plotdistcens() using argument main. support use ggplot2 cdfcompcens, qqcompcens, ppcompcens. BUG FIXES bug fixed concerning use gofstat chi squared df <=0 (error message blocking functions) bug fix mledist() bounds set (NULL) censored MLE enable correct use non-equidistant breaks denscomp histogram plotstyle = “ggplot”, prohibit use non-equidistant breaks probability = FALSE (adding stop case).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-9","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-9","title":"fitdistrplus 1.0-9","text":"CRAN release: 2017-03-24 update FAQ linear inequality constraints.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-8","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-8","title":"fitdistrplus 1.0-8","text":"CRAN release: 2017-02-01 NEW FEATURES support use ggplot2 cdfcomp, denscomp, qqcomp, ppcomp. BUG FIXES correct legend qqcomp ppomp large data. correct weights mmedist. correct name Akaike gofstat. correct use trueval plot.bootdist. correct vignette truncate (inflated) distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-7","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-7","title":"fitdistrplus 1.0-7","text":"CRAN release: 2016-07-02 NEW FEATURES keep JSS vignette pdf. start FAQ vignette add datasets (?dataFAQ) . provide likelihood plot/surface/curve: llplot, llcurve, llsurface. provide parallelization bootstrap bootdist bootdistcens. provide graphic (e)cdf bootstraped confidence interval/area: CIcdfplot. allow use constrOptim() mledist, mmedist, mgedist, qmedist functions. add possible pre-fitting procedure: prefit. BUG FIXES add invisible() graphical functions. bug fixed concerning use weights censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-6","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-6","title":"fitdistrplus 1.0-6","text":"CRAN release: 2015-11-30 BUG FIXES automatic definition starting values distributions llogis invweibull now working.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-5","title":"fitdistrplus 1.0-5","text":"CRAN release: 2015-09-21 NEW FEATURES update starting/fixing values mledist, mmedist, mgedist, qmedist functions. update graphics bootstrap procedure. add argument .points cdfcomp. add argument weights mledist, qmedist, mmedist, fitdist, fitdistcens. add argument keepdata fitdist, fitdistcens. suppress warnings/errors fitdist(cens), bootdist(cens). BUG FIXES defensive programming plotdist, cdfcomp,… simplify plotting curves cdfcomp seq(xmin, xmax, =1) used.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-4","title":"fitdistrplus 1.0-4","text":"CRAN release: 2015-02-23 release JSS publication.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-3","title":"fitdistrplus 1.0-3","text":"CRAN release: 2014-12-13 NEW FEATURES new generic functions fitdist(cens): loglik, vcov coef. vignette updated version paper accepted Journal Statistical Software. add argument discrete fitdist order able take account non classical discrete distributions plotting fit plot.fitdist cdfcomp calculating goodness--fit statistics gofstat (add example : fit zero inflate Poisson distribution). add S3 class descdist print method. BUG FIXES fitdist can handle non invertible Hessian matrices.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-2","title":"fitdistrplus 1.0-2","text":"CRAN release: 2014-02-12 NEW FEATURES plotdist can plot empirical density histogram, density plot superimposed. strong warning added documentation function descdist problematic high variance skewness kurtosis. BUG FIXES bug fixed bootdistcens : argument fix.arg now correctly passed mle.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-1","title":"fitdistrplus 1.0-1","text":"CRAN release: 2013-04-10 NEW FEATURES gofstat can handle multiple fitdist objects. plotdist discrete data slightly enhanced.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-0","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-0","title":"fitdistrplus 1.0-0","text":"CRAN release: 2012-12-27 NEW FEATURES update cdfcomp add denscomp, ppcomp qqcomp functions. add argument Turnbull.confint functions plotdistcens cdfcompcens order draw confidence intervals empirical distribution requested. ppoints now used “fitdist” QQ plot, PP plot cdf plot continuous data (used QQ plot previous versions) enable Blom type plotting position (using default Hazen plotting position can chanfge using arguments use.ppoints .ppoints) many changes examples given reference manual. vignette removed, transformed paper soon submit journal. add four data sets : fluazinam, salinity, danishuni danishmulti. add functions calculate quantiles fitted distribution, 95 percent CI calculated bootstrap : quantile generic function available fitdist bootdist objects quantile generic function available fitdistcens bootdistcens objects. BUG FIXES correction formula CvM test Weibull distribution. elimination CvM AD tests normal, lognormal logistic distributions : formulas previously used (given Stephens 1986) use exactly MLE estimates thus results approximates. make arguments xlim ylim functional cdfcompcens. bug fix closed formula mmedist lognormal distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-4","title":"fitdistrplus 0.3-4","text":"CRAN release: 2012-03-22 NEW FEATURES posibility fix xlegend keyword (e.g. bottomright) cdfcomp cdfcompdens. improvement new vignette. BUG FIXES correction NAMESPACE file order enable correct print summary fitdistcens object (correlation matrix, loglikelihood AIC BIC statistics).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-3","title":"fitdistrplus 0.3-3","text":"NEW FEATURES new function (cdfcompcens) plot cumulative distributions corresponding various fits using censored data set. add example scaling problem man pages.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-2","title":"fitdistrplus 0.3-2","text":"NEW FEATURES new plot empirical cdf curve plotdistcens, using Turnbull algorithm call function survfit{survival}. new arguments function cdfcomp : verticals, horizontals xlim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-1","title":"fitdistrplus 0.3-1","text":"NEW FEATURES add draft new version vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-0","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-0","title":"fitdistrplus 0.3-0","text":"NEW FEATURES new function (cdfcomp) plot cumulative distributions corresponding various fits using non censored data set. add two data sets : endosulfan toxocara.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-02-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.2-2","title":"fitdistrplus 0.2-2","text":"CRAN release: 2011-04-27 BUG FIXES elimination NON-ASCII characters vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-02-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.2-1","title":"fitdistrplus 0.2-1","text":"CRAN release: 2011-03-18 NEW FEATURES new fitting method implemented continuous distributions : maximum goodness--fit estimation (function mgedist) (moment available non censored data).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-5","title":"fitdistrplus 0.1-5","text":"NEW FEATURES new goodness--fit statistic added gofstat, corresponding test : Cramer-von Mises distance. new fitting method implemented : quantile matching estimation (function qmedist). moment, available non censored data. moment matching estimation extended (function mmedist) enable numerical matching closed formula available. BUG FIXES correction bug inserted adding argument fix.arg prevent print results goodness--fit tests.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-4","title":"fitdistrplus 0.1-4","text":"CRAN release: 2010-09-16 NEW FEATURES component named dots added list returned fitdist fitdistcens order pass optional arguments control optimization mledist bootdist bootdistcens. bootdist bootdistcens changed take account optional arguments defined call fitdist fitdistcens. argument added fitdist, fitdistcens mledist, named fix.arg, giving possibility fix distribution parameters maximizing likelihood. Functions bootdist, bootdistcens gofstat also changed order take new argument account. new data file bacterial contamination censored data extracted Busschaert et al. 2000 examples corresponding analysis dataset. BUG FIXES correction bug print plot bootstraped samples using bootdist bootdistcens one parameter estimated maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-3","title":"fitdistrplus 0.1-3","text":"CRAN release: 2010-06-02 NEW FEATURES new data file groundbeef (groundbeef.rda groundbeef.Rd) new use dataset examples. new function gofstat. Goodness--fit statistics computed fitdist may computed printed use function gofstat. new function, whole results computed printed : results tests printed argument print.test==TRUE continuous distributions Anderson-Darling Kolomogorov-Smirnov statistics printed default (complete results returned gofstat). modifications descdist : three arguments added descdist 1/ method, choose unbiased estimations standard deviation, skewness kurtosis (default choice) sample values. 2/ obs.col choose color used plot observed point graph. 3/ boot.col choose color used plot bootstrap sample points graph. modifications plotfit : minor changes performed order facilitate use argument … personnalize plots (see examples plotdist.Rd) modication vignette BUG FIXES correction bug plotdist due redefinition previous version parameter “ylim” plot histogram theoretical density function (problem infinite values theoretical density function).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-2","title":"fitdistrplus 0.1-2","text":"CRAN release: 2009-12-29 NEW FEATURES deletion mledistcens modification mledist order maximize likelihood censored non censored data. possibility choose optimization method used maximum likelihood estimation (MLE) distribution parameters using new argument “optim.method” mledist. possibility specify contraints distribution parameters using new arguments “lower” “upper” mledist. possibility use custom optimization function MLE using new argument “custom.optim”. moment matching estimation longer done argument method set “mom” set “mme” fitdist. renaming momdist mmedist. calculation AIC BIC criterion maximum likelihood estimation distribution parameters change default number iterations 999 1001 bootstrap order avoid interpolation using quantile function use argument “log” (resp. “log.p”) density (resp. distribution) available compute loglikelihood. BUG FIXES omitting name first argument calls density function maximization likelihood order enable use density function defined first parameter (vector quantiles) name differing “x” (classical name density distributions defined R), density function dexGAUS package gamlss.dist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-1","title":"fitdistrplus 0.1-1","text":"CRAN release: 2009-02-16 Initial release.","code":""}] +[{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-know-the-root-name-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I know the root name of a distribution?","title":"Frequently Asked Questions","text":"root name probability distribution name used d, p, q, r functions. base R distributions, root names given R-intro : https://cran.r-project.org/doc/manuals/R-intro.html#Probability-distributions. example, must use \"pois\" Poisson distribution \"poisson\".","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-find-non-standard-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I find “non standard” distributions?","title":"Frequently Asked Questions","text":"non-standard distributions, can either find package implementing define . comprehensive list non-standard distributions given Distributions task view https://CRAN.R-project.org/view=Distributions. two examples user-defined distributions. third example (shifted exponential) given FAQ 3.5.4. Gumbel distribution zero-modified geometric distribution","code":"dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q, a, b) exp(-exp((a-q)/b)) qgumbel <- function(p, a, b) a-b*log(-log(p)) data(groundbeef) fitgumbel <- fitdist(groundbeef$serving, \"gumbel\", start=list(a=10, b=10)) dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) pzmgeom <- function(q, p1, p2) p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2) rzmgeom <- function(n, p1, p2) { u <- rbinom(n, 1, 1-p1) #prob to get zero is p1 u[u != 0] <- rgeom(sum(u != 0), p2)+1 u } x2 <- rzmgeom(1000, 1/2, 1/10) fitdist(x2, \"zmgeom\", start=list(p1=1/2, p2=1/2))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-set-or-find-initial-values-for-non-standard-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How do I set (or find) initial values for non standard distributions?","title":"Frequently Asked Questions","text":"documented, provide initial values following distributions: \"norm\", \"lnorm\", \"exp\", \"pois\", \"cauchy\", \"gamma“, \"logis\", \"nbinom\", \"geom\", \"beta\", \"weibull\" stats package; \"invgamma\", \"llogis\", \"invweibull\", \"pareto1\", \"pareto\", \"lgamma\", \"trgamma\", \"invtrgamma\" actuar package. Look first statistics probability books different volumes N. L. Johnson, S. Kotz N. Balakrishnan books, e.g. Continuous Univariate Distributions, Vol. 1, Thesaurus univariate discrete probability distributions G. Wimmer G. Altmann. Statistical Distributions M. Evans, N. Hastings, B. Peacock. Distributional Analysis L-moment Statistics using R Environment Statistical Computing W. Asquith. available, find initial values equalling theoretical empirical quartiles. graphical function plotdist() plotdistcens() can also used assess suitability starting values : iterative manual process can move parameter values obtain distribution roughly fits data take parameter values starting values real fit. may also consider prefit() function find initial values especially case parameters constrained.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-fit-a-distribution-with-at-least-3-parameters","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Is it possible to fit a distribution with at least 3 parameters?","title":"Frequently Asked Questions","text":"Yes, example Burr distribution detailed JSS paper. reproduce quickly .","code":"data(\"endosulfan\") library(\"actuar\") fendo.B <- fitdist(endosulfan$ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) summary(fendo.B) ## Fitting of the distribution ' burr ' by maximum likelihood ## Parameters : ## estimate Std. Error ## shape1 0.206 0.0561 ## shape2 1.540 0.3188 ## rate 1.497 0.4683 ## Loglikelihood: -520 AIC: 1046 BIC: 1054 ## Correlation matrix: ## shape1 shape2 rate ## shape1 1.000 -0.900 -0.727 ## shape2 -0.900 1.000 0.588 ## rate -0.727 0.588 1.000"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-there-are-differences-between-mle-and-mme-for-the-lognormal-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Why there are differences between MLE and MME for the lognormal distribution?","title":"Frequently Asked Questions","text":"recall lognormal distribution function given FX(x)=Φ(log(x)−μσ), F_X(x) = \\Phi\\left(\\frac{\\log(x)-\\mu}{\\sigma} \\right), Φ\\Phi denotes distribution function standard normal distribution. know E(X)=exp(μ+12σ2)E(X) = \\exp\\left( \\mu+\\frac{1}{2} \\sigma^2 \\right) Var(X)=exp(2μ+σ2)(eσ2−1)Var(X) = \\exp\\left( 2\\mu+\\sigma^2\\right) (e^{\\sigma^2} -1). MME obtained inverting previous formulas, whereas MLE following explicit solution μ̂MLE=1n∑=1nlog(xi),σ̂MLE2=1n∑=1n(log(xi)−μ̂MLE)2. \\hat\\mu_{MLE} = \\frac{1}{n}\\sum_{=1}^n \\log(x_i),~~ \\hat\\sigma^2_{MLE} = \\frac{1}{n}\\sum_{=1}^n (\\log(x_i) - \\hat\\mu_{MLE})^2. Let us fit sample MLE MME. fit looks particularly good cases. Let us compare theoretical moments (mean variance) given fitted values (μ̂,σ̂\\hat\\mu,\\hat\\sigma), E(X)=exp(μ̂+12σ̂2),Var(X)=exp(2μ̂+σ̂2)(eσ̂2−1). E(X) = \\exp\\left( \\hat\\mu+\\frac{1}{2} \\hat\\sigma^2 \\right), Var(X) = \\exp\\left( 2\\hat\\mu+\\hat\\sigma^2\\right) (e^{\\hat\\sigma^2} -1). MLE point view, lognormal sample x1,…,xnx_1,\\dots,x_n equivalent handle normal sample log(x1),…,log(xn)\\log(x_1),\\dots,\\log(x_n). However, well know Jensen inequality E(X)=E(exp(log(X)))≥exp(E(log(X)))E(X) = E(\\exp(\\log(X))) \\geq \\exp(E(\\log(X))) implying MME estimates provides better moment estimates MLE.","code":"x3 <- rlnorm(1000) f1 <- fitdist(x3, \"lnorm\", method=\"mle\") f2 <- fitdist(x3, \"lnorm\", method=\"mme\") par(mfrow=1:2, mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points=FALSE, xlogscale = TRUE, main = \"CDF plot\") denscomp(list(f1, f2), demp=TRUE, main = \"Density plot\") c(\"E(X) by MME\"=as.numeric(exp(f2$estimate[\"meanlog\"]+f2$estimate[\"sdlog\"]^2/2)), \"E(X) by MLE\"=as.numeric(exp(f1$estimate[\"meanlog\"]+f1$estimate[\"sdlog\"]^2/2)), \"empirical\"=mean(x3)) ## E(X) by MME E(X) by MLE empirical ## 1.61 1.60 1.61 c(\"Var(X) by MME\"=as.numeric(exp(2*f2$estimate[\"meanlog\"]+f2$estimate[\"sdlog\"]^2) * (exp(f2$estimate[\"sdlog\"]^2)-1)), \"Var(X) by MLE\"=as.numeric(exp(2*f1$estimate[\"meanlog\"]+f1$estimate[\"sdlog\"]^2) * (exp(f1$estimate[\"sdlog\"]^2)-1)), \"empirical\"=var(x3)) ## Var(X) by MME Var(X) by MLE empirical ## 4.30 4.36 4.30"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-distribution-with-positive-support-when-data-contains-negative-values","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a distribution with positive support when data contains negative values?","title":"Frequently Asked Questions","text":"answer : fit distribution positive support (say gamma distribution) data contains negative values. irrelevant fit. really need use distribution, two options: either remove negative values (recommended) shift data.","code":"set.seed(1234) x <- rnorm(100, mean = 1, sd = 0.5) (try(fitdist(x, \"exp\"))) ## Error in computing default starting values. ## Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : ## Error in startarg_transgamma_family(x, distr) : ## values must be positive to fit an exponential distribution ## [1] \"Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : \\n Error in startarg_transgamma_family(x, distr) : \\n values must be positive to fit an exponential distribution\\n\\n\" ## attr(,\"class\") ## [1] \"try-error\" ## attr(,\"condition\") ## fitdist(x[x >= 0], \"exp\") ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 1.06 0.107 fitdist(x - min(x), \"exp\") ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.914 0.0914"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-finite-support-distribution-when-data-is-outside-that-support","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a finite-support distribution when data is outside that support?","title":"Frequently Asked Questions","text":"answer : fit distribution finite-support (say beta distribution) data outside [0,1][0,1]. irrelevant fit. really need use distribution, two ways tackle issue: either remove impossible values (recommended) shift/scale data.","code":"set.seed(1234) x <- rnorm(100, mean = 0.5, sd = 0.25) (try(fitdist(x, \"beta\"))) ## Error in computing default starting values. ## Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : ## Error in startargdefault(obs, distname) : ## values must be in [0-1] to fit a beta distribution ## [1] \"Error in manageparam(start.arg = start, fix.arg = fix.arg, obs = data, : \\n Error in startargdefault(obs, distname) : \\n values must be in [0-1] to fit a beta distribution\\n\\n\" ## attr(,\"class\") ## [1] \"try-error\" ## attr(,\"condition\") ## fitdist(x[x > 0 & x < 1], \"beta\") ## Fitting of the distribution ' beta ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 2.08 0.288 ## shape2 2.50 0.352 fitdist((x - min(x)*1.01) / (max(x) * 1.01 - min(x) * 1.01), \"beta\") ## Fitting of the distribution ' beta ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 1.77 0.236 ## shape2 2.17 0.296"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-truncated-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit truncated distributions?","title":"Frequently Asked Questions","text":"answer yes: fitting procedure must carried carefully. Let XX original untruncated random variable. truncated variable conditionnal random variable Y=X|l= low) * (x <= upp) } ptexp <- function(q, rate, low, upp) { PU <- pexp(upp, rate=rate) PL <- pexp(low, rate=rate) (pexp(q, rate)-PL) / (PU-PL) * (q >= low) * (q <= upp) + 1 * (q > upp) } n <- 200 x <- rexp(n); x <- x[x > .5 & x < 3] f1 <- fitdist(x, \"texp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))) f2 <- fitdist(x, \"texp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=.5, upp=3)) gofstat(list(f1, f2)) ## Goodness-of-fit statistics ## 1-mle-texp 2-mle-texp ## Kolmogorov-Smirnov statistic 0.0952 0.084 ## Cramer-von Mises statistic 0.1343 0.104 ## Anderson-Darling statistic Inf 1.045 ## ## Goodness-of-fit criteria ## 1-mle-texp 2-mle-texp ## Akaike's Information Criterion 127 132 ## Bayesian Information Criterion 130 135 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-truncated-inflated-distributions","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit truncated inflated distributions?","title":"Frequently Asked Questions","text":"answer yes: fitting procedure must carried carefully. Let XX original untruncated random variable. truncated variable Y=max(min(X,u),l)Y = \\max(\\min(X, u), l) ly>l+1y>uF_Y(y)=F_X(y)1_{u>y>l} + 1_{y>u}. density (w.r.t. Lebesgues measure) since two probability masses P(Y=l)=P(X≤l)>0P(Y=l)= P(X\\leq l)>0 P(Y=u)=P(X>u)>0P(Y=u)=P(X>u)>0. However, density function respect measure m(x)=δl(x)+δu(x)+λ(x)m(x)= \\delta_l(x)+\\delta_u(x)+\\lambda(x) fY(y)={FX(l)y=lfX(y)lminiyil>\\min_i y_i u= low) * (x <= upp) + PL * (x == low) + PU * (x == upp) } ptiexp <- function(q, rate, low, upp) pexp(q, rate) * (q >= low) * (q <= upp) + 1 * (q > upp) n <- 100; x <- pmax(pmin(rexp(n), 3), .5) # the loglikelihood has a discontinous point at the solution par(mar=c(4,4,2,1), mfrow=1:2) llcurve(x, \"tiexp\", plot.arg=\"low\", fix.arg = list(rate=2, upp=5), min.arg=0, max.arg=.5, lseq=200) llcurve(x, \"tiexp\", plot.arg=\"upp\", fix.arg = list(rate=2, low=0), min.arg=3, max.arg=4, lseq=200) (f1 <- fitdist(x, \"tiexp\", method=\"mle\", start=list(rate=3, low=0, upp=20))) ## Fitting of the distribution ' tiexp ' by maximum likelihood ## Parameters: ## estimate ## rate 0.949 ## low -0.502 ## upp 23.072 (f2 <- fitdist(x, \"tiexp\", method=\"mle\", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))) ## Fitting of the distribution ' tiexp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.947 0.0982 ## Fixed parameters: ## value ## low 0.5 ## upp 3.0 gofstat(list(f1, f2)) ## Goodness-of-fit statistics ## 1-mle-tiexp 2-mle-tiexp ## Kolmogorov-Smirnov statistic 0.378 0.377 ## Cramer-von Mises statistic 1.890 1.882 ## Anderson-Darling statistic 10.222 10.193 ## ## Goodness-of-fit criteria ## 1-mle-tiexp 2-mle-tiexp ## Akaike's Information Criterion 216 162 ## Bayesian Information Criterion 224 165 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5)) curve(ptiexp(x, 1, .5, 3), add=TRUE, col=\"blue\", lty=3) legend(\"bottomright\", lty=1:3, col=c(\"red\", \"green\", \"blue\", \"black\"), legend=c(\"full MLE\", \"MLE fixed arg\", \"true CDF\", \"emp. CDF\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-uniform-distribution","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a uniform distribution?","title":"Frequently Asked Questions","text":"uniform distribution 𝒰(,b)\\mathcal U(,b) support parameters since density scale shape parameter fU(u)=1b−a1[,b](u)f_U(u) = \\frac{1}{b-}1_{[,b]}(u). distribution, maximize log-likelihood likelihood. Let (xi)(x_i)_i ..d. observations 𝒰(,b)\\mathcal U(,b) distribution. likelihood L(,b)=∏=1n1b−a1[,b](xi)=1a≤xi≤b,=1,…,n1b−=1a≤minixi1maxixi≤b1b−L(,b) = \\prod_{=1}^n \\frac{1}{b-} 1_{[,b]}(x_i) = 1_{\\leq x_i \\leq b, =1,\\dots,n} \\frac{1}{b-}^n = 1_{\\leq \\min_i x_i} 1_{\\max_i x_i \\leq b} \\frac{1}{b-}^n Hence ↦L(,b)\\mapsto L(,b) fixed b∈]maxixi,+∞[b\\]\\max_i x_i, +\\infty[ increasing ]−∞,minixi]]-\\infty, \\min_i x_i], similarly b↦L(,b)b\\mapsto L(,b) decreasing fixed aa. leads minixi\\min_i x_i maxixi\\max_i x_i MLE uniform distribution. notice likelihood function LL defined ℝ2\\mathbb R^2 yet cancels outside S=]−∞,minixi]×]maxixi,+∞[S=]-\\infty, \\min_i x_i]\\times]\\max_i x_i, +\\infty[. Hence, log-likelihood undefined outside SS, issue maximizing log-likelihood. reasons, fitdist(data, dist=\"unif\", method=\"mle\") uses explicit form MLE distribution. example Maximizing log-likelihood harder can done defining new density function. Appropriate starting values parameters bound must supplied. Using closed-form expression (fitdist()) maximizing log-likelihood (unif2) lead similar results.","code":"trueval <- c(\"min\"=3, \"max\"=5) x <- runif(n=500, trueval[1], trueval[2]) f1 <- fitdist(x, \"unif\") delta <- .01 par(mfrow=c(1,1), mar=c(4,4,2,1)) llsurface(x, \"unif\", plot.arg = c(\"min\", \"max\"), min.arg=c(min(x)-2*delta, max(x)-delta), max.arg=c(min(x)+delta, max(x)+2*delta), main=\"likelihood surface for uniform\", loglik=FALSE) abline(v=min(x), h=max(x), col=\"grey\", lty=2) points(f1$estimate[1], f1$estimate[2], pch=\"x\", col=\"red\") points(trueval[1], trueval[2], pch=\"+\", col=\"blue\") legend(\"bottomright\", pch=c(\"+\",\"x\"), col=c(\"blue\",\"red\"), c(\"true\", \"fitted\")) delta <- .2 llsurface(x, \"unif\", plot.arg = c(\"min\", \"max\"), min.arg=c(3-2*delta, 5-delta), max.arg=c(3+delta, 5+2*delta), main=\"log-likelihood surface for uniform\") abline(v=min(x), h=max(x), col=\"grey\", lty=2) points(f1$estimate[1], f1$estimate[2], pch=\"x\", col=\"red\") points(trueval[1], trueval[2], pch=\"+\", col=\"blue\") legend(\"bottomright\", pch=c(\"+\",\"x\"), col=c(\"blue\",\"red\"), c(\"true\", \"fitted\")) dunif2 <- function(x, min, max) dunif(x, min, max) punif2 <- function(q, min, max) punif(q, min, max) f2 <- fitdist(x, \"unif2\", start=list(min=0, max=10), lower=c(-Inf, max(x)), upper=c(min(x), Inf)) print(c(logLik(f1), logLik(f2)), digits=7) ## [1] -346.0539 -346.0540 print(cbind(coef(f1), coef(f2)), digits=7) ## [,1] [,2] ## min 3.000684 3.000683 ## max 4.998606 4.998606"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-fit-a-beta-distribution-with-the-same-shape-parameter","dir":"Articles","previous_headings":"Questions regarding distributions","what":"Can I fit a beta distribution with the same shape parameter?","title":"Frequently Asked Questions","text":"Yes, can wrap density function beta distribution one shape parameter. example concave density. Another example U-shaped density.","code":"x <- rbeta(1000, 3, 3) dbeta2 <- function(x, shape, ...) dbeta(x, shape, shape, ...) pbeta2 <- function(q, shape, ...) pbeta(q, shape, shape, ...) fitdist(x, \"beta2\", start=list(shape=1/2)) ## Fitting of the distribution ' beta2 ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 3.24 0.135 x <- rbeta(1000, .3, .3) fitdist(x, \"beta2\", start=list(shape=1/2), optim.method=\"L-BFGS-B\", lower=1e-2) ## Fitting of the distribution ' beta2 ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 0.295 0.00986"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-estimate-support-parameter-the-case-of-the-four-parameter-beta","dir":"Articles","previous_headings":"Questions regarding distributions","what":"How to estimate support parameter? the case of the four-parameter beta","title":"Frequently Asked Questions","text":"Let us consider four-parameter beta distribution, also known PERT distribution, defined following density x∈[,c]x\\[,c]fX(x)=(x−)α−1(c−x)β−1/CNf_X(x) = (x-)^{\\alpha-1} (c-x)^{\\beta-1}/C_N CNC_N normalizing constant α=1+d(b−)/(c−)\\alpha=1+d(b-)/(c-), β=1+d(c−b)/(c−)\\beta=1+d(c-b)/(c-). ,ca,c support parameters, b∈],c[b\\],c[ mode dd shape parameter. uniform distribution, one can show MLE aa cc respectively sample minimum maximum. code illustrates strategy using partial closed formula fix.arg full numerical search MLE. NB: small sample size, latter generally better goodness--fit statistics; small positive number added subtracted fixing support parameters aa cc sample minimum maximum.","code":"require(mc2d) x2 <- rpert(n=2e2, min=0, mode=1, max=2, shape=3/4) eps <- sqrt(.Machine$double.eps) f1 <- fitdist(x2, \"pert\", start=list(min=-1, mode=0, max=10, shape=1), lower=c(-Inf, -Inf, -Inf, 0), upper=c(Inf, Inf, Inf, Inf)) ## Warning in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some ## parameter names have no starting/fixed value but have a default value: mean. ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced f2 <- fitdist(x2, \"pert\", start=list(mode=1, shape=1), fix.arg=list(min=min(x2)-eps, max=max(x2)+eps), lower=c(min(x2), 0), upper=c(max(x2), Inf)) ## Warning in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some ## parameter names have no starting/fixed value but have a default value: mean. print(cbind(coef(f1), c(f2$fix.arg[\"min\"], coef(f2)[\"mode\"], f2$fix.arg[\"max\"], coef(f2)[\"shape\"])), digits=7) ## [,1] [,2] ## min 0.03106317 0.03395487 ## mode 1.120283 1.956149 ## max 1.9595 1.956234 ## shape 0.3056077 0.008646087 gofstat(list(f1,f2)) ## Goodness-of-fit statistics ## 1-mle-pert 2-mle-pert ## Kolmogorov-Smirnov statistic 0.0452 0.0584 ## Cramer-von Mises statistic 0.0823 0.1834 ## Anderson-Darling statistic 0.5325 1.2776 ## ## Goodness-of-fit criteria ## 1-mle-pert 2-mle-pert ## Akaike's Information Criterion 268 265 ## Bayesian Information Criterion 281 272 par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcomp(list(f1,f2))"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"where-can-we-find-the-results-of-goodness-of-fit-tests","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Where can we find the results of goodness-of-fit tests ?","title":"Frequently Asked Questions","text":"Results goodness--fit tests printed given object returned gofstat() can access described example . Nevertheless, p-values given every test. Anderson-Darling (ad), Cramer von Mises (cvm) Kolomogorov (ks), decision (rejection H0 ) given, available (see FAQ 2.3 details).","code":"set.seed(1234) x <- rgamma(n = 100, shape = 2, scale = 1) # fit of the good distribution fgamma <- fitdist(x, \"gamma\") # fit of a bad distribution fexp <- fitdist(x, \"exp\") g <- gofstat(list(fgamma, fexp), fitnames = c(\"gamma\", \"exp\")) par(mfrow=c(1,1), mar=c(4,4,2,1)) denscomp(list(fgamma, fexp), legendtext = c(\"gamma\", \"exp\")) # results of the tests ## chi square test (with corresponding table with theoretical and observed counts) g$chisqpvalue ## gamma exp ## 1.89e-01 7.73e-05 g$chisqtable ## obscounts theo gamma theo exp ## <= 0.5483 9 10.06 23.66 ## <= 0.8122 9 8.82 9.30 ## <= 0.9592 9 5.27 4.68 ## <= 1.368 9 14.64 11.37 ## <= 1.523 9 5.24 3.74 ## <= 1.701 9 5.73 3.97 ## <= 1.94 9 7.09 4.82 ## <= 2.381 9 11.08 7.50 ## <= 2.842 9 9.00 6.29 ## <= 3.801 9 11.93 9.28 ## > 3.801 10 11.15 15.40 ## Anderson-Darling test g$adtest ## gamma exp ## \"not rejected\" \"rejected\" ## Cramer von Mises test g$cvmtest ## gamma exp ## \"not rejected\" \"rejected\" ## Kolmogorov-Smirnov test g$kstest ## gamma exp ## \"not rejected\" \"rejected\""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-reasonable-to-use-goodness-of-fit-tests-to-validate-the-fit-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","title":"Frequently Asked Questions","text":"first versions fitdistrplus, available, results GOF tests (AD, KS, CvM) automatically printed. decided suppress automatic printing realized users difficulties interpret results tests sometimes misused . Goodness--fit tests often appear objective tools decide wether fitted distribution well describes data set. ! reasonable reject distribution just goodness--fit test rejects (see FAQ 2.2.1). reasonable validate distribution goodness--fit tests reject (see FAQ 2.2.2). fitted distribution evaluated using graphical methods (goodness--fit graphs automatically provided package plotting result fit (output fitdist() fitdistcens() complementary graphs help compare different fits - see ?graphcomp). really think appropriate way evaluate adequacy fit ones recommend . can find type recommendations reference books : Probabilistic techniques exposure assessment - handbook dealing variability uncertainty models inputs .C. Cullen H.C. Frey. Application uncertainty analysis ecological risks pesticides W.J. Warren-Hicks . Hart. Statistical inference G. Casella R.L. Berger Loss models: data decision S.. Klugman H.H. Panjer G.E. Willmot Moreover, selection distribution also driven knowledge underlying processes available. example variable negative, one cautious fitting normal distribution, potentially gives negative values, even observed data variable seem well fitted normal distribution.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"should-i-reject-a-distribution-because-a-goodness-of-fit-test-rejects-it","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph > Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","what":"Should I reject a distribution because a goodness-of-fit test rejects it ?","title":"Frequently Asked Questions","text":"reasonable reject distribution just goodness--fit test rejects , especially case big samples. real life, soon sufficient amount data, reject fitted distribution. know model perfectly describe real data, generally true question find better distribution among pool simple parametric distributions describe data, compare different models (see FAQ 2.4 2.5 corresponding questions). illustre point let us comment example presented . drew two samples Poisson distribution mean parameter equal 100. many applications, value parameter, Poisson distribution considered well approximated normal distribution. Testing fit (using Kolmogorov-Smirnov test ) normal distribution sample 100 observations reject normal fit, testing sample 10000 observations reject , samples come distribution.","code":"set.seed(1234) x1 <- rpois(n = 100, lambda = 100) f1 <- fitdist(x1, \"norm\") g1 <- gofstat(f1) g1$kstest ## 1-mle-norm ## \"not rejected\" x2 <- rpois(n = 10000, lambda = 100) f2 <- fitdist(x2, \"norm\") g2 <- gofstat(f2) g2$kstest ## 1-mle-norm ## \"rejected\" par(mfrow=c(1,2), mar=c(4,4,2,1)) denscomp(f1, demp = TRUE, addlegend = FALSE, main = \"small sample\") denscomp(f2, demp = TRUE, addlegend = FALSE, main = \"big sample\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"should-i-accept-a-distribution-because-goodness-of-fit-tests-do-not-reject-it","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph > Is it reasonable to use goodness-of-fit tests to validate the fit of a distribution ?","what":"Should I accept a distribution because goodness-of-fit tests do not reject it ?","title":"Frequently Asked Questions","text":", reasonable validate distribution goodness--fit tests reject . Like hypothesis tests, goodness--fit tests lack statistical power sample size high. different goodness--fit tests equally sensitive different types deviation empirical fitted distributions. example Kolmogorov-Smirnov test sensitive distributions differ global fashion near centre distribution. Anderson-Darling test sensitive distributions differ tails, Cramer von Mises sensitive small repetitive differences empirical theoretical distribution functions. sensitivity chi square test depend definition classes, even propose default definition classes user provide classes, choice obvious impact results test. test appropriate data discrete, even modelled continuous distribution, following example. Two samples respective sizes 500 50 drawn Poisson distribution mean parameter equal 1 (sufficiently high value consider Poisson distribution approximated normal one). Using Kolmogorov-Smirnov test, small sample normal fit rejected bigger sample. rejected smaller sample even fit rejected simple visual confrontation distributions. particular case, chi square test classes defined default rejected te normal fit samples.","code":"set.seed(1234) x3 <- rpois(n = 500, lambda = 1) f3 <- fitdist(x3, \"norm\") g3 <- gofstat(f3) g3$kstest ## 1-mle-norm ## \"rejected\" x4 <- rpois(n = 50, lambda = 1) f4 <- fitdist(x4, \"norm\") g4 <- gofstat(f4) g4$kstest ## 1-mle-norm ## \"not rejected\" par(mfrow=c(1,2), mar=c(4,4,2,1)) denscomp(f3, addlegend = FALSE, main = \"big sample\") denscomp(f4, addlegend = FALSE, main = \"small sample\") g3$chisqtable ## obscounts theocounts ## <= 0 180.0 80.3 ## <= 1 187.0 163.5 ## <= 2 87.0 168.1 ## <= 3 32.0 73.4 ## > 3 14.0 14.7 g3$chisqpvalue ## [1] 7.11e-42 g4$chisqtable ## obscounts theocounts ## <= 0 14.00 5.46 ## <= 1 15.00 14.23 ## <= 2 15.00 18.09 ## > 2 6.00 12.22 g4$chisqpvalue ## [1] 3.57e-05"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-all-goodness-of-fit-tests-are-not-available-for-every-distribution","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Why all goodness-of-fit tests are not available for every distribution ?","title":"Frequently Asked Questions","text":"Chi-squared test available distribution one must conscious result depends definition cells observed data grouped, correct definition possible small sample. Concerning Kolmogorov-Smirnov test, proposed continuous distribution, critical value corresponding comparison empirical distribution fully specified distribution. distribution fully known fitted distribution, result test subject caution, general asymptotic theory Kolmogorov-Smirnov statistics case fitted distribution. Nevertheless, one can use Monte Carlo methods conduct Kolmgorov-Smirnov goodness--fit tests cases sample used estimate model parameters. method implemented R package KScorrect variety continuous distributions. asymptotic theory proposed quadratic statistics distributions (Anderson-Darling, Cramer von Mises). reference book used subject (Tests based edf statistics Stephens MA Goodness--fit techniques D’Agostino RB Stephens MA) proposes critical values statistics classical distributions (exponential, gamma, Weibull, logistic, Cauchy, normal lognormal). asymptotic theory statistics also depends way parameters estimated. estimated maximum likelihood Cauchy, normal lognormal distributions results reported Stephens, propose results Anderson-Darling Cramer von Mises using results exponential, gamma, Weibull, logistic distributions. user can refer cited books use proposed formula estimate parameters Cauchy, normal lognormal distributions apply tests using critical values given book. R packages goftest ADGofTest also explored users like apply Anderson-Darling Cramer von Mises tests distributions. time sure case parameters unknown (estimated maximum likelihood) tackled two packages. Concerning development package, rather develoing goodness--fit tests made choice develop graphical tools help appreciate quality fit compare fits different distributions data set (see FAQ 2.2 argumentation).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-use-goodness-of-fit-statistics-to-compare-the-fit-of-different-distributions-on-a-same-data-set","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"How can we use goodness-of-fit statistics to compare the fit of different distributions on a same data set ?","title":"Frequently Asked Questions","text":"Goodness--fit statistics based empirical distribution function (Kolmogorov-Smirnov, Anderson-Darling Cramer von Mises) may used measure distance fitted distribution empirical distribution. one wants compare fit various distributions data set, smaller statistics better. Kolmogorov-Smirnov statistics sensitive distributions differ global fashion near centre distribution Anderson-Darling statistics sensitive distributions differ tails, Cramer von Mises statistics sensitive small repetitive differences empirical theoretical distribution functions. mentioned main vignette package, use Anderson-Darling compare fit different distributions subject caution due weighting quadratic distance fitted empirical distribution functions depends parametric distribution. Moreover, statistics based empirical distribution function penalize distributions greater number parameters generally flexible, induce -fitting. Goodness-fo-fit statistics based information criteria (AIC, BIC) correspond deviance penalized complexity model (number parameters distribution), smaller better. generic statistics, adapted focus part fitted distribution, take account complexity distribution thus help prevent overfitting.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-use-a-test-to-compare-the-fit-of-two-distributions-on-a-same-data-set","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Can we use a test to compare the fit of two distributions on a same data set ?","title":"Frequently Asked Questions","text":"package implement test two nested distributions (one special case one, e.g. exponential gamma distributions) likelihood ratio test can easily implemented using loglikelihood provided fitdist fitdistcens. Denoting LL maximum likelihood obtained complete distribution L0L_0 one obtained simplified distribution, sample size increases, −2ln(L0L)=2ln(L)−2ln(L0)- 2 ln(\\frac{L_0}{L}) = 2 ln(L) - 2 ln(L_0) tends Chi squared distribution degrees freedom equal difference numbers parameters characterizing two nested distributions. find example test. test can also used fits censored data.","code":"set.seed(1234) g <- rgamma(100, shape = 2, rate = 1) (f <- fitdist(g, \"gamma\")) ## Fitting of the distribution ' gamma ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape 2.025 0.266 ## rate 0.997 0.149 (f0 <- fitdist(g, \"exp\")) ## Fitting of the distribution ' exp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.492 0.0492 L <- logLik(f) k <- length(f$estimate) # number of parameters of the complete distribution L0 <- logLik(f0) k0 <- length(f0$estimate) # number of parameters of the simplified distribution (stat <- 2*L - 2*L0) ## [1] 23.9 (critical_value <- qchisq(0.95, df = k - k0)) ## [1] 3.84 (rejected <- stat > critical_value) ## [1] TRUE"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-get-goodness-of-fit-statistics-for-a-fit-on-censored-data","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Can we get goodness-of-fit statistics for a fit on censored data ?","title":"Frequently Asked Questions","text":"Function gofstat yet proposed package fits censored data develop one among one objectives future. Published works goodness--fit statistics based empirical distribution function censored data generally focused data containing one type censoring (e.g. right censored data survival data). Build statistics general case, data containing time (right, left interval censoring), remains tricky. Nevertheless, possible type censored data, use information criteria (AIC BIC given summary object class fitdistcens) compare fits various distributions data set.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-cullen-frey-graph-may-be-misleading","dir":"Articles","previous_headings":"Questions regarding goodness-of-fit tests and statistics, Cullen-Frey graph","what":"Why Cullen-Frey graph may be misleading?","title":"Frequently Asked Questions","text":"considering distribution large theoretical moments infinite moments, using Cullen-Frey may appropriate. typical log-normal distribution ℒ𝒩(μ,σ2)\\mathcal L\\mathcal N(\\mu,\\sigma^2). Indeed distribution, skewness kurtosis functions exponential σ2\\sigma^2. large values, even small σ\\sigma. sk(X)=(eσ2+2)eσ2−1,kr(X)=e4σ2+2e3σ2+3e2σ2−3. sk(X) = (e^{\\sigma^2}+2)\\sqrt{e^{\\sigma^2}-1}, kr(X) = e^{4\\sigma^2} + 2e^{3\\sigma^2} + 3e^{2\\sigma^2}-3. convergence theoretical standardized moments (skewness kurtosis) slow future, plan use trimmed linear moments deal issue. moments always exist even distribution infinite mean, e.g. Cauchy distribution.","code":"n <- 1e3 x <- rlnorm(n) descdist(x) ## summary statistics ## ------ ## min: 0.0436 max: 20.3 ## median: 1.02 ## mean: 1.61 ## estimated sd: 1.89 ## estimated skewness: 3.49 ## estimated kurtosis: 21.9"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-choose-optimization-method","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"How to choose optimization method?","title":"Frequently Asked Questions","text":"want perform optimization without bounds, optim() used. can try derivative-free method Nelder-Mead Hessian-free method BFGS. want perform optimization bounds, two methods available without providing gradient objective function: Nelder-Mead via constrOptim() bounded BFGS via optim(). cases, see help mledist() vignette optimization algorithms.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"the-optimization-algorithm-stops-with-error-code-100--what-shall-i-do","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"The optimization algorithm stops with error code 100. What shall I do?","title":"Frequently Asked Questions","text":"First, add traces adding control=list(trace=1, REPORT=1). Second, try set bounds parameters. Third, find better starting values (see FAQ 1.3).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"why-distribution-with-a-log-argument-may-converge-better","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"Why distribution with a log argument may converge better?","title":"Frequently Asked Questions","text":"Say, study shifted lognormal distribution defined following density f(x)=1xσ2πexp(−(ln(x+δ)−μ)22σ2) f(x) = \\frac{1}{x \\sigma \\sqrt{2 \\pi}} \\exp\\left(- \\frac{(\\ln (x+\\delta)- \\mu)^2}{2\\sigma^2}\\right) x>−δx>-\\delta μ\\mu location parameter, σ\\sigma scale parameter δ\\delta boundary parameter. Let us fit distribution dataset y MLE. define two functions densities without log argument. now optimize minus log-likelihood. don’t use log argument, algorithms stalls. Indeed algorithm stops following value, log-likelihood infinite. something wrong computation. R-base implementation using log argument seems reliable. happens C-base implementation dlnorm takes care log value. file ../src/nmath/dlnorm.c R sources, find C code dlnorm last four lines logical condtion give_log?, see log argument handled: log=TRUE, use −(log(2π)+y2/2+log(xσ))-(\\log(\\sqrt{2\\pi}) + y^2/2+\\log(x\\sigma)) log=FALSE, use 2π*exp(y2/2)/(xσ))\\sqrt{2\\pi} *\\exp( y^2/2)/(x\\sigma)) (logarithm outside dlnorm) Note constant log(2π)\\log(\\sqrt{2\\pi}) pre-computed C macro M_LN_SQRT_2PI. order sort problem, use constrOptim wrapping optim take account linear constraints. allows also use optimization methods L-BFGS-B (low-memory BFGS bounded) used optim. Another possible perform computations higher precision arithmetics implemented package Rmpfr using MPFR library.","code":"dshiftlnorm <- function(x, mean, sigma, shift, log = FALSE) dlnorm(x+shift, mean, sigma, log=log) pshiftlnorm <- function(q, mean, sigma, shift, log.p = FALSE) plnorm(q+shift, mean, sigma, log.p=log.p) qshiftlnorm <- function(p, mean, sigma, shift, log.p = FALSE) qlnorm(p, mean, sigma, log.p=log.p)-shift dshiftlnorm_no <- function(x, mean, sigma, shift) dshiftlnorm(x, mean, sigma, shift) pshiftlnorm_no <- function(q, mean, sigma, shift) pshiftlnorm(q, mean, sigma, shift) data(dataFAQlog1) y <- dataFAQlog1 D <- 1-min(y) f0 <- fitdist(y+D, \"lnorm\") start <- list(mean=as.numeric(f0$estimate[\"meanlog\"]), sigma=as.numeric(f0$estimate[\"sdlog\"]), shift=D) # works with BFGS, but not Nelder-Mead f <- fitdist(y, \"shiftlnorm\", start=start, optim.method=\"BFGS\") summary(f) ## Fitting of the distribution ' shiftlnorm ' by maximum likelihood ## Parameters : ## estimate Std. Error ## mean -1.386 0.02401 ## sigma 0.071 0.00192 ## shift 0.248 0.00598 ## Loglikelihood: 8299 AIC: -16591 BIC: -16573 ## Correlation matrix: ## mean sigma shift ## mean 1.000 -0.885 0.999 ## sigma -0.885 1.000 -0.886 ## shift 0.999 -0.886 1.000 f2 <- try(fitdist(y, \"shiftlnorm_no\", start=start, optim.method=\"BFGS\")) ## ## Error in fitdist(y, \"shiftlnorm_no\", start = start, optim.method = \"BFGS\") : ## the function mle failed to estimate the parameters, ## with the error code 100 print(attr(f2, \"condition\")) ## sum(log(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 ))) ## [1] -Inf log(prod(dshiftlnorm_no(y, 0.16383978, 0.01679231, 1.17586600 ))) ## [1] -Inf sum(dshiftlnorm(y, 0.16383978, 0.01679231, 1.17586600, TRUE )) ## [1] 7761 double dlnorm(double x, double meanlog, double sdlog, int give_log) { double y; #ifdef IEEE_754 if (ISNAN(x) || ISNAN(meanlog) || ISNAN(sdlog)) return x + meanlog + sdlog; #endif if(sdlog <= 0) { if(sdlog < 0) ML_ERR_return_NAN; // sdlog == 0 : return (log(x) == meanlog) ? ML_POSINF : R_D__0; } if(x <= 0) return R_D__0; y = (log(x) - meanlog) / sdlog; return (give_log ? -(M_LN_SQRT_2PI + 0.5 * y * y + log(x * sdlog)) : M_1_SQRT_2PI * exp(-0.5 * y * y) / (x * sdlog)); /* M_1_SQRT_2PI = 1 / sqrt(2 * pi) */ } -(M_LN_SQRT_2PI + 0.5 * y * y + log(x * sdlog)) M_1_SQRT_2PI * exp(-0.5 * y * y) / (x * sdlog)) f2 <- fitdist(y, \"shiftlnorm\", start=start, lower=c(-Inf, 0, -min(y)), optim.method=\"Nelder-Mead\") ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced summary(f2) ## Fitting of the distribution ' shiftlnorm ' by maximum likelihood ## Parameters : ## estimate Std. Error ## mean -1.3873 NaN ## sigma 0.0711 NaN ## shift 0.2481 NaN ## Loglikelihood: 8299 AIC: -16591 BIC: -16573 ## Correlation matrix: ## mean sigma shift ## mean 1 NaN NaN ## sigma NaN 1 NaN ## shift NaN NaN 1 print(cbind(BFGS=f$estimate, NelderMead=f2$estimate)) ## BFGS NelderMead ## mean -1.386 -1.3873 ## sigma 0.071 0.0711 ## shift 0.248 0.2481"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"what-to-do-when-there-is-a-scaling-issue","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"What to do when there is a scaling issue?","title":"Frequently Asked Questions","text":"Let us consider dataset particular small values. way sort multiply dataset large value. Let us consider dataset particular large values. way sort multiply dataset small value.","code":"data(dataFAQscale1) head(dataFAQscale1) ## [1] -0.007077 -0.000947 -0.001898 -0.000475 -0.001902 -0.000476 summary(dataFAQscale1) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## -0.00708 -0.00143 -0.00047 -0.00031 0.00096 0.00428 for(i in 6:0) cat(10^i, try(mledist(dataFAQscale1*10^i, \"cauchy\")$estimate), \"\\n\") ## 1e+06 -290 1194 ## 1e+05 -29 119 ## 10000 -2.9 11.9 ## 1000 -0.29 1.19 ## 100 -0.029 0.119 ## 10 -0.0029 0.0119 ## ## 1 NA NA data(dataFAQscale2) head(dataFAQscale2) ## [1] 1.40e+09 1.41e+09 1.43e+09 1.44e+09 1.49e+09 1.57e+09 summary(dataFAQscale2) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 1.40e+09 1.58e+09 2.24e+09 2.55e+09 3.39e+09 4.49e+09 for(i in 0:5) cat(10^(-2*i), try(mledist(dataFAQscale2*10^(-2*i), \"cauchy\")$estimate), \"\\n\") ## 1 2.03e+09 6.59e+08 ## 0.01 20283641 6594932 ## 1e-04 202836 65949 ## 1e-06 2028 659 ## 1e-08 20.3 6.59 ## 1e-10 0.203 0.0659"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-scale-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for scale parameters","title":"Frequently Asked Questions","text":"Consider normal distribution 𝒩(μ,σ2)\\mathcal{N}(\\mu, \\sigma^2) defined density f(x)=12πσ2exp(−(x−μ)22σ2),x∈ℝ, f(x) = \\frac{1}{\\sqrt{2\\pi\\sigma^2}}\\exp\\left(-\\frac{(x-\\mu)^2}{2\\sigma^2}\\right), x\\\\mathbb{R}, μ\\mu location parameter μ∈ℝ\\mu\\\\mathbb{R}, σ2\\sigma^2 scale parameter σ2>0\\sigma^2>0. Therefore optimizing log-likelihood squared differences GoF statistics. Setting lower bound scale parameter easy fitdist: just use lower argument.","code":"set.seed(1234) x <- rnorm(1000, 1, 2) fitdist(x, \"norm\", lower=c(-Inf, 0)) ## Fitting of the distribution ' norm ' by maximum likelihood ## Parameters: ## estimate Std. Error ## mean 0.947 0.0630 ## sd 1.994 0.0446"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-shape-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for shape parameters","title":"Frequently Asked Questions","text":"Consider Burr distribution ℬ(μ,σ2)\\mathcal B(\\mu, \\sigma^2) defined density f(x)=ab(x/s)bx[1+(x/s)b]+1,x∈ℝ, f(x) = \\frac{b (x/s)^b}{x [1 + (x/s)^b]^{+ 1}}, x\\\\mathbb{R}, ,ba,b shape parameters ,b>0a,b>0, ss scale parameter s>0s>0.","code":"x <- rburr(1000, 1, 2, 3) fitdist(x, \"burr\", lower=c(0, 0, 0), start=list(shape1 = 1, shape2 = 1, rate = 1)) ## Fitting of the distribution ' burr ' by maximum likelihood ## Parameters: ## estimate Std. Error ## shape1 0.969 0.0334 ## shape2 2.051 0.0367 ## rate 3.180 0.0516"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-probability-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for probability parameters","title":"Frequently Asked Questions","text":"Consider geometric distribution 𝒢(p)\\mathcal G(p) defined mass probability function f(x)=p(1−p)x,x∈ℕ, f(x) = p(1-p)^x, x\\\\mathbb{N}, pp probability parameter p∈[0,1]p\\[0,1].","code":"x <- rgeom(1000, 1/4) fitdist(x, \"geom\", lower=0, upper=1) ## Fitting of the distribution ' geom ' by maximum likelihood ## Parameters: ## estimate Std. Error ## prob 0.242 0.00666"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-bounds-for-boundary-parameters","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting bounds for boundary parameters","title":"Frequently Asked Questions","text":"Consider shifted exponential distribution ℰ(μ,λ)\\mathcal E(\\mu,\\lambda) defined mass probability function f(x)=λexp(−λ(x−μ)),x>μ, f(x) = \\lambda \\exp(-\\lambda(x-\\mu)), x>\\mu, λ\\lambda scale parameter λ>0\\lambda>0, μ\\mu boundary (shift) parameter μ∈ℝ\\mu\\\\mathbb{R}. optimizing log-likelihood, boundary constraint ∀=1,…,n,xi>μ⇒mini=1,…,nxi>μ⇔μ>−mini=1,…,nxi. \\forall =1,\\dots,n, x_i>\\mu \\Rightarrow \\min_{=1,\\dots,n} x_i > \\mu \\Leftrightarrow \\mu > -\\min_{=1,\\dots,n} x_i. Note optimizing squared differences GoF statistics, constraint may necessary. Let us R.","code":"dsexp <- function(x, rate, shift) dexp(x-shift, rate=rate) psexp <- function(x, rate, shift) pexp(x-shift, rate=rate) rsexp <- function(n, rate, shift) rexp(n, rate=rate)+shift x <- rsexp(1000, 1/4, 1) fitdist(x, \"sexp\", start=list(rate=1, shift=0), lower= c(0, -min(x))) ## Fitting of the distribution ' sexp ' by maximum likelihood ## Parameters: ## estimate Std. Error ## rate 0.248 4.71e-10 ## shift 1.005 NaN"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"setting-linear-inequality-bounds","dir":"Articles","previous_headings":"Questions regarding optimization procedures > How do I set bounds on parameters when optimizing?","what":"Setting linear inequality bounds","title":"Frequently Asked Questions","text":"distributions, bounds parameters independent. instance, normal inverse Gaussian distribution (μ,δ,α,β\\mu, \\delta, \\alpha, \\beta parametrization) following parameter constraints, can reformulated linear inequality: {α>0δ>0α>|β|⇔(01000010001−10011)⏟ui(μδαβ)≥(0000)⏟ci. \\left\\{ \\begin{array}{l}\\alpha > 0\\\\ \\delta >0\\\\ \\alpha > |\\beta|\\end{array} \\right. \\Leftrightarrow \\underbrace{ \\left( \\begin{matrix} 0 & 1 & 0 & 0 \\\\ 0 & 0 & 1 & 0 \\\\ 0 & 0 & 1 & -1 \\\\ 0 & 0 & 1 & 1 \\\\ \\end{matrix} \\right) }_{ui} \\left( \\begin{matrix} \\mu\\\\ \\delta\\\\ \\alpha \\\\ \\beta \\\\ \\end{matrix} \\right) \\geq \\underbrace{ \\left( \\begin{matrix} 0\\\\ 0\\\\ 0 \\\\ 0 \\\\ \\end{matrix} \\right)}_{ci}. constraints can carried via constrOptim() arguments ci ui. example","code":"library(GeneralizedHyperbolic) myoptim <- function(fn, par, ui, ci, ...) { res <- constrOptim(f=fn, theta=par, method=\"Nelder-Mead\", ui=ui, ci=ci, ...) c(res, convergence=res$convergence, value=res$objective, par=res$minimum, hessian=res$hessian) } x <- rnig(1000, 3, 1/2, 1/2, 1/4) ui <- rbind(c(0,1,0,0), c(0,0,1,0), c(0,0,1,-1), c(0,0,1,1)) ci <- c(0,0,0,0) fitdist(x, \"nig\", custom.optim=myoptim, ui=ui, ci=ci, start=list(mu = 0, delta = 1, alpha = 1, beta = 0)) ## Warning in fitdist(x, \"nig\", custom.optim = myoptim, ui = ui, ci = ci, start = ## list(mu = 0, : The dnig function should return a vector of with NaN values when ## input has inconsistent parameters and not raise an error ## Warning in fitdist(x, \"nig\", custom.optim = myoptim, ui = ui, ci = ci, start = ## list(mu = 0, : The pnig function should return a vector of with NaN values when ## input has inconsistent values and not raise an error ## Fitting of the distribution ' nig ' by maximum likelihood ## Parameters: ## estimate ## mu 2.985 ## delta 0.457 ## alpha 0.466 ## beta 0.237"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-works-quantile-matching-estimation-for-discrete-distributions","dir":"Articles","previous_headings":"Questions regarding optimization procedures","what":"How works quantile matching estimation for discrete distributions?","title":"Frequently Asked Questions","text":"Let us consider geometric distribution values {0,1,2,3,…}\\{0,1,2,3,\\dots\\}. probability mass function, cumulative distribution function quantile function P(X=x)=p(1−p)⌊x⌋,FX(x)=1−(1−p)⌊x⌋,FX−1(q)=⌊log(1−q)log(1−p)⌋. P(X=x)= p (1-p)^{\\lfloor x\\rfloor}, F_X(x) = 1- (1-p)^{\\lfloor x\\rfloor}, F_X^{-1}(q) = \\left\\lfloor\\frac{\\log(1-q)}{\\log(1-p)}\\right\\rfloor. Due integer part (floor function), distribution function quantile function step functions. Now study QME geometric distribution. Since one parameter, choose one probabiliy, p=1/2p=1/2. theoretical median following integer FX−1(1/2)=⌊log(1/2)log(1−p)⌋. F_X^{-1}(1/2) = \\left\\lfloor\\frac{\\log(1/2)}{\\log(1-p)}\\right\\rfloor. Note theoretical median discrete distribution integer. Empirically, median may integer. Indeed even length dataset, empirical median qn,1/2=xn/2⋆+xn/2+1⋆2, q_{n,1/2} = \\frac{x_{n/2}^\\star + x_{n/2+1}^\\star}{2}, x1⋆<…= low) * (x <= upp) } ptgamma <- function(q, shape, rate, low, upp) { PU <- pgamma(upp, shape = shape, rate = rate) PL <- pgamma(low, shape = shape, rate = rate) (pgamma(q, shape, rate) - PL) / (PU - PL) * (q >= low) * (q <= upp) + 1 * (q > upp) } rtgamma <- function(n, shape, rate, low=0, upp=Inf, maxit=10) { stopifnot(n > 0) if(low > upp) return(rep(NaN, n)) PU <- pgamma(upp, shape = shape, rate = rate) PL <- pgamma(low, shape = shape, rate = rate) #simulate directly expected number of random variate n2 <- n/(PU-PL) x <- rgamma(n, shape=shape, rate=rate) x <- x[x >= low & x <= upp] i <- 0 while(length(x) < n && i < maxit) { n2 <- (n-length(x))/(PU-PL) y <- rgamma(n2, shape=shape, rate=rate) x <- c(x, y[y >= low & y <= upp]) i <- i+1 } x[1:n] } n <- 100 ; shape <- 11 ; rate <- 3 ; x0 <- 5 x <- rtgamma(n, shape = shape, rate = rate, low=x0) fit.NM.2P <- fitdist( data = x, distr = \"tgamma\", method = \"mle\", start = list(shape = 10, rate = 10), fix.arg = list(upp = Inf, low=x0), lower = c(0, 0), upper=c(Inf, Inf)) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced fit.NM.3P <- fitdist( data = x, distr = \"tgamma\", method = \"mle\", start = list(shape = 10, rate = 10, low=1), fix.arg = list(upp = Inf), lower = c(0, 0, -Inf), upper=c(Inf, Inf, min(x))) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in cov2cor(varcovar): NaNs produced ## fit3P fit2P true value ## shape 11.8503 57.13 11 ## rate 3.2982 10.92 3 ## low 5.0094 5.00 5 ## mean sq. error 0.2707 730.30 0 ## rel. error 0.0595 2.28 0 fit.gamma <- fitdist( data = x-x0, distr = \"gamma\", method = \"mle\") ## fit3P fit2P orig. data fit2P shift data true value ## shape 11.8503 57.13 1.498 11 ## rate 3.2982 10.92 2.289 3 ## low 5.0094 5.00 5.000 5 ## mean sq. error 0.2707 730.30 30.266 0 ## rel. error 0.0595 2.28 0.367 0 ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced ## fit3P fit2P orig. data true value ## shape 11.7976 15.489 11 ## rate 3.0813 3.679 3 ## low 5.0001 5.000 5 ## mean sq. error 0.2143 6.871 0 ## rel. error 0.0332 0.212 0"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-we-compute-marginal-confidence-intervals-on-parameter-estimates-from-their-reported-standard-error","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"Can we compute marginal confidence intervals on parameter estimates from their reported standard error ?","title":"Frequently Asked Questions","text":"statistics, deriving marginal confidence intervals MLE parameter estimates using approximation standard errors (calculated hessian) quite common procedure. based wald approximation stands sample size nn sufficiently high, marginal 95%95\\% confidence ith component θi\\theta_i model parameter θ\\theta estimated maximum likelihood (estimate denoted θ̂\\hat \\theta) can approximated : θ̂±1.96×SE(θ̂)\\hat \\theta_i \\pm 1.96 \\times SE(\\hat \\theta_i ) SE(θ̂)SE(\\hat \\theta_i ) ith term diagonal covariance matrix estimates (ViiV_{ii}). VV generally approximated inverse Fisher information matrix ((θ̂)(\\hat \\theta)). Fisher information matrix corresponds opposite hessian matrix evaluated MLE estimate. Let us recall hessian matrix defined Hij(y,θ)=∂2L(y,θ)∂θi∂θjH_{ij}(y, \\theta) = \\frac{\\partial^2 L(y, \\theta)}{\\partial \\theta_i \\partial \\theta_j} L(y,θ)L(y, \\theta) loglikelihod function data yy parameter θ\\theta. using approximation, one must keep mind validity depend sample size. also strongly depends data, distribution, also parameterization distribution. reason recommend potential users Wald approximation compare results ones obtained using bootstrap procedure (see ) using approximation. look loglikelihood contours also interesting Wald approximation assumes elliptical contours. general context, recommend use bootstrap compute confidence intervals parameters function parameters. find two examples, one Wald confidence intervals seem correct one give wrong results, parameter values even outside possible range (negative rate bound gamma distribution).","code":"set.seed(1234) n <- rnorm(30, mean = 10, sd = 2) fn <- fitdist(n, \"norm\") bn <- bootdist(fn) bn$CI ## Median 2.5% 97.5% ## mean 9.41 8.78 10.02 ## sd 1.73 1.33 2.15 fn$estimate + cbind(\"estimate\"= 0, \"2.5%\"= -1.96*fn$sd, \"97.5%\"= 1.96*fn$sd) ## estimate 2.5% 97.5% ## mean 9.41 8.77 10.04 ## sd 1.78 1.33 2.22 par(mfrow=c(1,1), mar=c(4,4,2,1)) llplot(fn, back.col = FALSE) set.seed(1234) g <- rgamma(30, shape = 0.1, rate = 10) fg <- fitdist(g, \"gamma\") bg <- bootdist(fg) bg$CI ## Median 2.5% 97.5% ## shape 0.0923 0.0636 0.145 ## rate 30.1018 9.6288 147.323 fg$estimate + cbind(\"estimate\"= 0, \"2.5%\"= -1.96*fg$sd, \"97.5%\"= 1.96*fg$sd) ## estimate 2.5% 97.5% ## shape 0.0882 0.0553 0.121 ## rate 24.2613 -6.3431 54.866 par(mfrow=c(1,1), mar=c(4,4,2,1)) llplot(fg, back.col = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-compute-confidence-intervals-on-quantiles-from-the-fit-of-a-distribution","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How can we compute confidence intervals on quantiles from the fit of a distribution ?","title":"Frequently Asked Questions","text":"quantile() function can used calculate quantile fitted distribution called object class fitdist fitdistcens first argument. called object class bootdist bootdistcens first argument, quantiles returned accompanied confidence interval calculated using bootstraped sample parameters. Moreover, can use CIcdfplot() function plot fitted distribution CDF curve surrounded band corresponding pointwise intervals quantiles. See example censored data corresponding 72-hour acute salinity tolerance (LC50values) rivermarine invertebrates.","code":"data(salinity) log10LC50 <-log10(salinity) fit <- fitdistcens(log10LC50, \"norm\") # Bootstrap bootsample <- bootdistcens(fit, niter = 101) #### We used only 101 iterations in that example to limit the calculation time but #### in practice you should take at least 1001 bootstrap iterations # Calculation of the quantile of interest (here the 5 percent hazard concentration) (HC5 <- quantile(bootsample, probs = 0.05)) ## (original) estimated quantiles for each specified probability (censored data) ## p=0.05 ## estimate 1.12 ## Median of bootstrap estimates ## p=0.05 ## estimate 1.12 ## ## two-sided 95 % CI of each quantile ## p=0.05 ## 2.5 % 1.05 ## 97.5 % 1.20 # visualizing pointwise confidence intervals on other quantiles par(mfrow=c(1,1), mar=c(4,4,2,1)) CIcdfplot(bootsample, CI.output = \"quantile\", CI.fill = \"pink\", xlim = c(0.5,2), main = \"\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-can-we-compute-confidence-intervals-on-any-function-of-the-parameters-of-the-fitted-distribution","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How can we compute confidence intervals on any function of the parameters of the fitted distribution ?","title":"Frequently Asked Questions","text":"bootstrap sample parameter estimates can used calculate bootstrap sample variable defined function parameters fitted distribution. bootstrap sample can easily compute conidence interval using percentiles. example uses bootstrap sample parameters previous example (FAQ 4.2) calculate 95 percent confidence interval Potentially Affected Portion (PAF) species given exposure salinity (fixed 1.2 log10 example). complex calculations especially tranfer uncertainty within quantitative risk assessment, recommend use package mc2d aims making calculations easy gives extensive examples use bootstrap samples parameters estimated using functions package fitdistrplus.","code":"exposure <- 1.2 # Bootstrap sample of the PAF at this exposure PAF <- pnorm(exposure, mean = bootsample$estim$mean, sd = bootsample$estim$sd) # confidence interval from 2.5 and 97.5 percentiles quantile(PAF, probs = c(0.025, 0.975)) ## 2.5% 97.5% ## 0.0487 0.1470"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-we-choose-the-bootstrap-number","dir":"Articles","previous_headings":"Questions regarding uncertainty","what":"How do we choose the bootstrap number?","title":"Frequently Asked Questions","text":"Generally, need choose number bootstrap values high original sample size. search number mean standard values become stable. log-normal example , enough 100 bootstrap values.","code":"f.ln.MME <- fitdist(rlnorm(1000), \"lnorm\", method = \"mme\", order = 1:2) # Bootstrap b.ln.50 <- bootdist(f.ln.MME, niter = 50) b.ln.100 <- bootdist(f.ln.MME, niter = 100) b.ln.200 <- bootdist(f.ln.MME, niter = 200) b.ln.500 <- bootdist(f.ln.MME, niter = 500) d1 <- density(b.ln.50, b.ln.100, b.ln.200, b.ln.500) plot(d1)"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"can-i-personalize-the-default-plot-given-for-an-object-of-class-fitdist-or-fitdistcens","dir":"Articles","previous_headings":"How to personalize plots","what":"Can I personalize the default plot given for an object of class fitdist or fitdistcens?","title":"Frequently Asked Questions","text":"default plot given using plot() function object class fitdist fitdistcens hard personalize. Indeed plot designed give quick overview fit, used graph manuscript formal presentation. personalize () goodness--fit plots, rather use specific graphical functions, denscomp, cdfcomp, ppcomp, qqcomp cdfcompcens (see following paragraphs).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-personalize-goodness-of-fit-plots","dir":"Articles","previous_headings":"How to personalize plots","what":"How to personalize goodness-of-fit plots ?","title":"Frequently Asked Questions","text":"default plot object class fitdist can easily reproduced personalized using denscomp, cdfcomp, ppcomp qqcomp. similar way, default plot object class fitdistcens can easily personalized using cdfcompcens.","code":"data(groundbeef) serving <- groundbeef$serving fit <- fitdist(serving, \"gamma\") par(mfrow = c(2,2), mar = c(4, 4, 1, 1)) denscomp(fit, addlegend = FALSE, main = \"\", xlab = \"serving sizes (g)\", fitcol = \"orange\") qqcomp(fit, addlegend = FALSE, main = \"\", fitpch = 16, fitcol = \"grey\", line01lty = 2) cdfcomp(fit, addlegend = FALSE, main = \"\", xlab = \"serving sizes (g)\", fitcol = \"orange\", lines01 = TRUE) ppcomp(fit, addlegend = FALSE, main = \"\", fitpch = 16, fitcol = \"grey\", line01lty = 2)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-obtain-ggplot2-plots","dir":"Articles","previous_headings":"How to personalize plots","what":"Is it possible to obtain ggplot2 plots ?","title":"Frequently Asked Questions","text":"argument plotstyle added functions denscomp, cdfcomp, ppcomp, qqcompand cdfcompcens, ppcompcens, qqcompcens enable generation plots using ggplot2 package. argument default fixed graphics must simply fixed ggplot purpose, following example. latter case graphical functions return graphic object can personalized using ggplot2 functions.","code":"library(ggplot2) fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") dcomp <- denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), xlab = \"serving sizes (g)\", xlim = c(0, 250), fitcol = c(\"red\", \"green\", \"orange\"), fitlty = 1, fitlwd = 1:3, xlegend = \"topright\", plotstyle = \"ggplot\", addlegend = FALSE) dcomp + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Ground beef fits\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"is-it-possible-to-add-the-names-of-the-observations-in-a-goodness-of-fit-plot-e-g--the-names-of-the-species-in-the-plot-of-the-species-sensitivity-distribution-ssd-classically-used-in-ecotoxicology","dir":"Articles","previous_headings":"How to personalize plots","what":"Is it possible to add the names of the observations in a goodness-of-fit plot, e.g. the names of the species in the plot of the Species Sensitivity Distribution (SSD) classically used in ecotoxicology ?","title":"Frequently Asked Questions","text":"argument named name.points can used functions cdfcomp CIcdfcomp pass label vector observed points add names points left point. option available ECDF goodness--fit plots non censored data. option can used , example, name species classical plot Species Sensitivity Distributions (SSD) ecotoxicology.","code":"data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV taxaATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa f <- fitdist(ATV, \"lnorm\") cdfcomp(f, xlogscale = TRUE, main = \"Species Sensitivty Distribution\", xlim = c(1, 100000), name.points = taxaATV, addlegend = FALSE, plotstyle = \"ggplot\")"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-code-censored-data-in-fitdistrplus","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to code censored data in fitdistrplus ?","title":"Frequently Asked Questions","text":"Censored data must rpresented package dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. type representation corresponds coding names \"interval2\" function Surv package survival. way represent censored data fitdistrplus function Surv2fitdistcens() can used help format data use fitdistcens() one format used survival package (see help page Surv2fitdistcens()). toy example .","code":"dtoy <- data.frame(left = c(NA, 2, 4, 6, 9.7, 10), right = c(1, 3, 7, 8, 9.7, NA)) dtoy ## left right ## 1 NA 1.0 ## 2 2.0 3.0 ## 3 4.0 7.0 ## 4 6.0 8.0 ## 5 9.7 9.7 ## 6 10.0 NA"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-do-i-prepare-the-input-of-fitdistcens-with-surv2fitdistcens","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How do I prepare the input of fitdistcens() with Surv2fitdistcens()?","title":"Frequently Asked Questions","text":"Let us consider classical right-censored dataset human life: twenty values randomly chosen canlifins dataset CASdatasets package. refer help Surv2fitdistcens() censoring types. performing survival analysis, common use Surv() function package survival handle different types censoring. order ease use fitdistcens(), dedicated function Surv2fitdistcens() implemented arguments similar ones Surv(). Let us now fit two simple distributions.","code":"exitage <- c(81.1,78.9,72.6,67.9,60.1,78.3,83.4,66.9,74.8,80.5,75.6,67.1, 75.3,82.8,70.1,85.4,74,70,71.6,76.5) death <- c(0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0) svdata <- Surv2fitdistcens(exitage, event=death) flnormc <- fitdistcens(svdata, \"lnorm\") fweic <- fitdistcens(svdata, \"weibull\") par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcompcens(list(fweic, flnormc), xlim=range(exitage), xlegend = \"topleft\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-represent-an-empirical-distribution-from-censored-data","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to represent an empirical distribution from censored data ?","title":"Frequently Asked Questions","text":"representation empirical distribution censored data trivial problem. One can simply represent observation interval y-value defined rank observation done using function plotdistcens. representation can interesting visualize raw data, remains difficult correctly order observations case (see example right using data smokedfish). Many authors worked development algorithms non parametric maximum likelihood estimation (NPMLE) empirical cumulative distribution function (ECDF) interval censored data (including left right censored data can considered interval censored data one bound infinity). old versions fitdistrplus used Turnbull algorithm using calls functions package survival. Even Turnbull algorithm still available package, default plot now uses function npsurv package npsurv. package provides performant algorithms developped Yong Wang (see references cited help page plotdistcens). Due lack maintenance package forced rewrite main functions package, using another optimization function. ECDF plot also implemented using Turnbull algorithm survival (see ). can see example, new implementation NPMLE provides different type plot ECDF, representing filled rectangles zones non-uniqueness NPMLE ECDF. Indeed NPMLE algorithm generally proceeds two steps. first step aims identifying equivalence classes (also named litterture Turnbull intervals maximal intersection intervals innermost intervals maximal cliques data). Equivalences classess points/intervals NPMLE ECDF may change. Equivalence classes shown correspond regions left bound interval (named L following plot previous toy example) immediately followed right bound interval (named R following plot). equivalence class may null length (example non censored value). second step aims assigning probability mass equivalence class, may zero classes. NPMLE unique equivalence classes non uniqueness NPMLE ECDF represented filled rectangles. Various NPMLE algorithms implemented packages Icens, interval npsurv. less performant enable handling data survival data, especially left censored observations.","code":"par(mfrow = c(1,2), mar = c(3, 4, 3, 0.5)) plotdistcens(dtoy, NPMLE = FALSE) data(smokedfish) dsmo <- log10(smokedfish) plotdistcens(dsmo, NPMLE = FALSE) par(mfrow = c(2, 2), mar = c(3, 4, 3, 0.5)) # Turnbull algorithm with representation of middle points of equivalence classes plotdistcens(dsmo, NPMLE.method = \"Turnbull.middlepoints\", xlim = c(-1.8, 2.4)) # Turnbull algorithm with representation of equivalence classes as intervals plotdistcens(dsmo, NPMLE.method = \"Turnbull.intervals\") # Wang algorithm with representation of equivalence classes as intervals plotdistcens(dsmo, NPMLE.method = \"Wang\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/FAQ.html","id":"how-to-assess-the-goodness-of-fit-of-a-distribution-fitted-on-censored-data","dir":"Articles","previous_headings":"Questions regarding (left, right and/or interval) censored data","what":"How to assess the goodness-of-fit of a distribution fitted on censored data ?","title":"Frequently Asked Questions","text":"available method fitdistrplus fit distributions censored data maximum likelihood estimation (MLE). distribution fitted using fitdistcens, AIC BIC values can found summary object class fitdistcens returned function. values can used compare fit various distributions dataset. Function gofstat yet proposed package fits censored data plan develop future calculation goodness--fit statistics censored data. Considering goodness--fit plots, generic plot function object class fitdistcensprovides three plots, one CDF using NPMLE ECDF plot (default using Wang prepresentation, see previous part details), Q-Q plot P-P plot simply derived Wang plot ECDF, filled rectangles indicating non uniqueness NPMLE ECDF. Functions cdfcompcens(), qqcompens() ppcompcens() can used individualize personnalize CDF, Q-Q P-P goodness--fit plots /compare fit various distributions dataset. Considering Q-Q plots P-P plots, may easier compare various fits splitting plots done automatically using plotstyle ggplot qqcompens() ppcompcens() can also done manually plotstyle graphics.","code":"fnorm <- fitdistcens(dsmo,\"norm\") flogis <- fitdistcens(dsmo,\"logis\") # comparison of AIC values summary(fnorm)$aic ## [1] 178 summary(flogis)$aic ## [1] 177 par(mar = c(2, 4, 3, 0.5)) plot(fnorm) par(mfrow=c(1,1), mar=c(4,4,2,1)) cdfcompcens(list(fnorm, flogis), fitlty = 1) qqcompcens(list(fnorm, flogis)) ppcompcens(list(fnorm, flogis)) qqcompcens(list(fnorm, flogis), lwd = 2, plotstyle = \"ggplot\", fitcol = c(\"red\", \"green\"), fillrect = c(\"pink\", \"lightgreen\"), legendtext = c(\"normal distribution\", \"logistic distribution\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"quick-overview-of-main-optimization-methods","dir":"Articles","previous_headings":"","what":"Quick overview of main optimization methods","title":"Which optimization algorithm to choose?","text":"present quickly main optimization methods. Please refer Numerical Optimization (Nocedal & Wright, 2006) Numerical Optimization: theoretical practical aspects (Bonnans, Gilbert, Lemarechal & Sagastizabal, 2006) good introduction. consider following problem \\(\\min_x f(x)\\) \\(x\\\\mathbb{R}^n\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"derivative-free-optimization-methods","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods","what":"Derivative-free optimization methods","title":"Which optimization algorithm to choose?","text":"Nelder-Mead method one well known derivative-free methods use values \\(f\\) search minimum. consists building simplex \\(n+1\\) points moving/shrinking simplex good direction. set initial points \\(x_1, \\dots, x_{n+1}\\). order points \\(f(x_1)\\leq f(x_2)\\leq\\dots\\leq f(x_{n+1})\\). compute \\(x_o\\) centroid \\(x_1, \\dots, x_{n}\\). compute reflected point \\(x_r = x_o + \\alpha(x_o-x_{n+1})\\). \\(f(x_1)\\leq f(x_r) 1.2 Hessian-free optimization methods","what":"Computing the direction \\(d_k\\)","title":"Which optimization algorithm to choose?","text":"desirable property \\(d_k\\) \\(d_k\\) ensures descent \\(f(x_{k+1}) < f(x_{k})\\). Newton methods \\(d_k\\) minimizes local quadratic approximation \\(f\\) based Taylor expansion, \\(q_f(d) = f(x_k) + g(x_k)^Td +\\frac{1}{2} d^T H(x_k) d\\) \\(g\\) denotes gradient \\(H\\) denotes Hessian. consists using exact solution local minimization problem \\(d_k = - H(x_k)^{-1} g(x_k)\\). practice, methods preferred (least ensure positive definiteness). method approximates Hessian matrix \\(H_k\\) function \\(H_{k-1}\\), \\(x_k\\), \\(f(x_k)\\) \\(d_k\\) solves system \\(H_k d = - g(x_k)\\). implementation may also directly approximate inverse Hessian \\(W_k\\) order compute \\(d_k = -W_k g(x_k)\\). Using Sherman-Morrison-Woodbury formula, can switch \\(W_k\\) \\(H_k\\). determine \\(W_k\\), first must verify secant equation \\(H_k y_k =s_k\\) \\(y_k=W_k s_k\\) \\(y_k = g_{k+1}-g_k\\) \\(s_k=x_{k+1}-x_k\\). define \\(n(n-1)\\) terms, generally impose symmetry minimum distance conditions. say rank 2 update \\(H_k = H_{k-1} + u u^T + b v v^T\\) rank 1 update $H_k = H_{k-1} + u u^T $. Rank \\(n\\) update justified spectral decomposition theorem. two rank-2 updates symmetric preserve positive definiteness DFP minimizes \\(\\min || H - H_k ||_F\\) \\(H=H^T\\): \\[ H_{k+1} = \\left (-\\frac {y_k s_k^T} {y_k^T s_k} \\right ) H_k \\left (-\\frac {s_k y_k^T} {y_k^T s_k} \\right )+\\frac{y_k y_k^T} {y_k^T s_k} \\Leftrightarrow W_{k+1} = W_k + \\frac{s_k s_k^T}{y_k^{T} s_k} - \\frac {W_k y_k y_k^T W_k^T} {y_k^T W_k y_k} . \\] BFGS minimizes \\(\\min || W - W_k ||_F\\) \\(W=W^T\\): \\[ H_{k+1} = H_k - \\frac{ H_k y_k y_k^T H_k }{ y_k^T H_k y_k } + \\frac{ s_k s_k^T }{ y_k^T s_k } \\Leftrightarrow W_{k+1} = \\left (-\\frac {y_k s_k^T} {y_k^T s_k} \\right )^T W_k \\left (-\\frac { y_k s_k^T} {y_k^T s_k} \\right )+\\frac{s_k s_k^T} {y_k^T s_k} . \\] R, -called BFGS scheme implemented optim. Another possible method (initially arised quadratic problems) nonlinear conjugate gradients. consists computing directions \\((d_0, \\dots, d_k)\\) conjugate respect matrix close true Hessian \\(H(x_k)\\). Directions computed iteratively \\(d_k = -g(x_k) + \\beta_k d_{k-1}\\) \\(k>1\\), initiated \\(d_1 = -g(x_1)\\). \\(\\beta_k\\) updated according scheme: \\(\\beta_k = \\frac{ g_k^T g_k}{g_{k-1}^T g_{k-1} }\\): Fletcher-Reeves update, \\(\\beta_k = \\frac{ g_k^T (g_k-g_{k-1} )}{g_{k-1}^T g_{k-1}}\\): Polak-Ribiere update. exists also three-term formula computing direction \\(d_k = -g(x_k) + \\beta_k d_{k-1}+\\gamma_{k} d_t\\) \\(tt+1\\) otherwise \\(\\gamma_k=0\\) \\(k=t\\). See Yuan (2006) well-known schemes Hestenses-Stiefel, Dixon Conjugate-Descent. three updates (Fletcher-Reeves, Polak-Ribiere, Beale-Sorenson) (non-linear) conjugate gradient available optim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"computing-the-stepsize-t_k","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods > 1.2 Hessian-free optimization methods","what":"Computing the stepsize \\(t_k\\)","title":"Which optimization algorithm to choose?","text":"Let \\(\\phi_k(t) = f(x_k + t d_k)\\) given direction/iterate \\((d_k, x_k)\\). need find conditions find satisfactory stepsize \\(t_k\\). literature, consider descent condition: \\(\\phi_k'(0) < 0\\) Armijo condition: \\(\\phi_k(t) \\leq \\phi_k(0) + t c_1 \\phi_k'(0)\\) ensures decrease \\(f\\). Nocedal & Wright (2006) presents backtracking (geometric) approach satisfying Armijo condition minimal condition, .e. Goldstein Price condition. set \\(t_{k,0}\\) e.g. 1, \\(0 < \\alpha < 1\\), \\(t_{k,+1} = \\alpha \\times t_{k,}\\). end Repeat backtracking linesearch available optim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"benchmark","dir":"Articles","previous_headings":"1 Quick overview of main optimization methods","what":"Benchmark","title":"Which optimization algorithm to choose?","text":"simplify benchmark optimization methods, create fitbench function computes desired estimation method optimization methods. function currently exported package.","code":"fitbench <- function(data, distr, method, grad = NULL, control = list(trace = 0, REPORT = 1, maxit = 1000), lower = -Inf, upper = +Inf, ...)"},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"theoretical-value","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution > 2.1 Log-likelihood function and its gradient for beta distribution","what":"Theoretical value","title":"Which optimization algorithm to choose?","text":"density beta distribution given \\[ f(x; \\delta_1,\\delta_2) = \\frac{x^{\\delta_1-1}(1-x)^{\\delta_2-1}}{\\beta(\\delta_1,\\delta_2)}, \\] \\(\\beta\\) denotes beta function, see NIST Handbook mathematical functions https://dlmf.nist.gov/. recall \\(\\beta(,b)=\\Gamma()\\Gamma(b)/\\Gamma(+b)\\). log-likelihood set observations \\((x_1,\\dots,x_n)\\) \\[ \\log L(\\delta_1,\\delta_2) = (\\delta_1-1)\\sum_{=1}^n\\log(x_i)+ (\\delta_2-1)\\sum_{=1}^n\\log(1-x_i)+ n \\log(\\beta(\\delta_1,\\delta_2)) \\] gradient respect \\(\\) \\(b\\) \\[ \\nabla \\log L(\\delta_1,\\delta_2) = \\left(\\begin{matrix} \\sum\\limits_{=1}^n\\ln(x_i) - n\\psi(\\delta_1)+n\\psi( \\delta_1+\\delta_2) \\\\ \\sum\\limits_{=1}^n\\ln(1-x_i)- n\\psi(\\delta_2)+n\\psi( \\delta_1+\\delta_2) \\end{matrix}\\right), \\] \\(\\psi(x)=\\Gamma'(x)/\\Gamma(x)\\) digamma function, see NIST Handbook mathematical functions https://dlmf.nist.gov/.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"r-implementation","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution > 2.1 Log-likelihood function and its gradient for beta distribution","what":"R implementation","title":"Which optimization algorithm to choose?","text":"fitdistrplus package, minimize opposite log-likelihood: implement opposite gradient grlnL. log-likelihood gradient exported.","code":"lnL <- function(par, fix.arg, obs, ddistnam) fitdistrplus:::loglikelihood(par, fix.arg, obs, ddistnam) grlnlbeta <- fitdistrplus:::grlnlbeta"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"random-generation-of-a-sample","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Random generation of a sample","title":"Which optimization algorithm to choose?","text":"","code":"#(1) beta distribution n <- 200 x <- rbeta(n, 3, 3/4) grlnlbeta(c(3, 4), x) #test ## [1] -133 317 hist(x, prob=TRUE, xlim=0:1) lines(density(x), col=\"red\") curve(dbeta(x, 3, 3/4), col=\"green\", add=TRUE) legend(\"topleft\", lty=1, col=c(\"red\",\"green\"), legend=c(\"empirical\", \"theoretical\"), bty=\"n\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"fit-beta-distribution","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Fit Beta distribution","title":"Which optimization algorithm to choose?","text":"Define control parameters. Call mledist default optimization function (optim implemented stats package) without gradient different optimization methods. case constrained optimization, mledist permits direct use constrOptim function (still implemented stats package) allow linear inequality constraints using logarithmic barrier. Use exp/log transformation shape parameters \\(\\delta_1\\) \\(\\delta_2\\) ensure shape parameters strictly positive. extract values fitted parameters, value corresponding log-likelihood number counts function minimize gradient (whether theoretical gradient numerically approximated one).","code":"ctr <- list(trace=0, REPORT=1, maxit=1000) unconstropt <- fitbench(x, \"beta\", \"mle\", grad=grlnlbeta, lower=0) ## BFGS NM CGFR CGPR CGBS L-BFGS-B NM-B G-BFGS ## 14 14 14 14 14 14 14 14 ## G-CGFR G-CGPR G-CGBS G-BFGS-B G-NM-B G-CGFR-B G-CGPR-B G-CGBS-B ## 14 14 14 14 14 14 14 14 dbeta2 <- function(x, shape1, shape2, log) dbeta(x, exp(shape1), exp(shape2), log=log) #take the log of the starting values startarg <- lapply(fitdistrplus:::startargdefault(x, \"beta\"), log) #redefine the gradient for the new parametrization grbetaexp <- function(par, obs, ...) grlnlbeta(exp(par), obs) * exp(par) expopt <- fitbench(x, distr=\"beta2\", method=\"mle\", grad=grbetaexp, start=startarg) ## BFGS NM CGFR CGPR CGBS G-BFGS G-CGFR G-CGPR G-CGBS ## 14 14 14 14 14 14 14 14 14 #get back to original parametrization expopt[c(\"fitted shape1\", \"fitted shape2\"), ] <- exp(expopt[c(\"fitted shape1\", \"fitted shape2\"), ])"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"results-of-the-numerical-investigation","dir":"Articles","previous_headings":"2 Numerical illustration with the beta distribution","what":"Results of the numerical investigation","title":"Which optimization algorithm to choose?","text":"Results displayed following tables: (1) original parametrization without specifying gradient (-B stands bounded version), (2) original parametrization (true) gradient (-B stands bounded version -G gradient), (3) log-transformed parametrization without specifying gradient, (4) log-transformed parametrization (true) gradient (-G stands gradient). Table 2.1: Unconstrained optimization approximated gradient Table 2.2: Unconstrained optimization true gradient Table 2.3: Exponential trick optimization approximated gradient Table 2.4: Exponential trick optimization true gradient Using llsurface, plot log-likehood surface around true value (green) fitted parameters (red). can simulate bootstrap replicates using bootdist function.","code":"llsurface(min.arg=c(0.1, 0.1), max.arg=c(7, 3), xlim=c(.1,7), plot.arg=c(\"shape1\", \"shape2\"), nlev=25, lseq=50, data=x, distr=\"beta\", back.col = FALSE) points(unconstropt[1,\"BFGS\"], unconstropt[2,\"BFGS\"], pch=\"+\", col=\"red\") points(3, 3/4, pch=\"x\", col=\"green\") b1 <- bootdist(fitdist(x, \"beta\", method = \"mle\", optim.method = \"BFGS\"), niter = 100, parallel = \"snow\", ncpus = 2) summary(b1) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## shape1 2.73 2.272 3.283 ## shape2 0.75 0.652 0.888 plot(b1, trueval = c(3, 3/4))"},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"theoretical-value-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution > 3.1 Log-likelihood function and its gradient for negative binomial distribution","what":"Theoretical value","title":"Which optimization algorithm to choose?","text":"p.m.f. Negative binomial distribution given \\[ f(x; m,p) = \\frac{\\Gamma(x+m)}{\\Gamma(m)x!} p^m (1-p)^x, \\] \\(\\Gamma\\) denotes beta function, see NIST Handbook mathematical functions https://dlmf.nist.gov/. exists alternative representation \\(\\mu=m (1-p)/p\\) equivalently \\(p=m/(m+\\mu)\\). Thus, log-likelihood set observations \\((x_1,\\dots,x_n)\\) \\[ \\log L(m,p) = \\sum_{=1}^{n} \\log\\Gamma(x_i+m) -n\\log\\Gamma(m) -\\sum_{=1}^{n} \\log(x_i!) + mn\\log(p) +\\sum_{=1}^{n} {x_i}\\log(1-p) \\] gradient respect \\(m\\) \\(p\\) \\[ \\nabla \\log L(m,p) = \\left(\\begin{matrix} \\sum_{=1}^{n} \\psi(x_i+m) -n \\psi(m) + n\\log(p) \\\\ mn/p -\\sum_{=1}^{n} {x_i}/(1-p) \\end{matrix}\\right), \\] \\(\\psi(x)=\\Gamma'(x)/\\Gamma(x)\\) digamma function, see NIST Handbook mathematical functions https://dlmf.nist.gov/.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"r-implementation-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution > 3.1 Log-likelihood function and its gradient for negative binomial distribution","what":"R implementation","title":"Which optimization algorithm to choose?","text":"fitdistrplus package, minimize opposite log-likelihood: implement opposite gradient grlnL.","code":"grlnlNB <- function(x, obs, ...) { m <- x[1] p <- x[2] n <- length(obs) c(sum(psigamma(obs+m)) - n*psigamma(m) + n*log(p), m*n/p - sum(obs)/(1-p)) }"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"random-generation-of-a-sample-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Random generation of a sample","title":"Which optimization algorithm to choose?","text":"","code":"#(2) negative binomial distribution n <- 200 trueval <- c(\"size\"=10, \"prob\"=3/4, \"mu\"=10/3) x <- rnbinom(n, trueval[\"size\"], trueval[\"prob\"]) hist(x, prob=TRUE, ylim=c(0, .3), xlim=c(0, 10)) lines(density(x), col=\"red\") points(min(x):max(x), dnbinom(min(x):max(x), trueval[\"size\"], trueval[\"prob\"]), col = \"green\") legend(\"topright\", lty = 1, col = c(\"red\", \"green\"), legend = c(\"empirical\", \"theoretical\"), bty=\"n\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"fit-a-negative-binomial-distribution","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Fit a negative binomial distribution","title":"Which optimization algorithm to choose?","text":"Define control parameters make benchmark. case constrained optimization, mledist permits direct use constrOptim function (still implemented stats package) allow linear inequality constraints using logarithmic barrier. Use exp/log transformation shape parameters \\(\\delta_1\\) \\(\\delta_2\\) ensure shape parameters strictly positive. extract values fitted parameters, value corresponding log-likelihood number counts function minimize gradient (whether theoretical gradient numerically approximated one).","code":"ctr <- list(trace = 0, REPORT = 1, maxit = 1000) unconstropt <- fitbench(x, \"nbinom\", \"mle\", grad = grlnlNB, lower = 0) ## BFGS NM CGFR CGPR CGBS L-BFGS-B NM-B G-BFGS ## 14 14 14 14 14 14 14 14 ## G-CGFR G-CGPR G-CGBS G-BFGS-B G-NM-B G-CGFR-B G-CGPR-B G-CGBS-B ## 14 14 14 14 14 14 14 14 unconstropt <- rbind(unconstropt, \"fitted prob\" = unconstropt[\"fitted mu\", ] / (1 + unconstropt[\"fitted mu\", ])) dnbinom2 <- function(x, size, prob, log) dnbinom(x, exp(size), 1 / (1 + exp(-prob)), log = log) # transform starting values startarg <- fitdistrplus:::startargdefault(x, \"nbinom\") startarg$mu <- startarg$size / (startarg$size + startarg$mu) startarg <- list(size = log(startarg[[1]]), prob = log(startarg[[2]] / (1 - startarg[[2]]))) # redefine the gradient for the new parametrization Trans <- function(x) c(exp(x[1]), plogis(x[2])) grNBexp <- function(par, obs, ...) grlnlNB(Trans(par), obs) * c(exp(par[1]), plogis(x[2])*(1-plogis(x[2]))) expopt <- fitbench(x, distr=\"nbinom2\", method=\"mle\", grad=grNBexp, start=startarg) ## BFGS NM CGFR CGPR CGBS G-BFGS G-CGFR G-CGPR G-CGBS ## 14 14 14 14 14 14 14 14 14 # get back to original parametrization expopt[c(\"fitted size\", \"fitted prob\"), ] <- apply(expopt[c(\"fitted size\", \"fitted prob\"), ], 2, Trans)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"results-of-the-numerical-investigation-1","dir":"Articles","previous_headings":"3 Numerical illustration with the negative binomial distribution","what":"Results of the numerical investigation","title":"Which optimization algorithm to choose?","text":"Results displayed following tables: (1) original parametrization without specifying gradient (-B stands bounded version), (2) original parametrization (true) gradient (-B stands bounded version -G gradient), (3) log-transformed parametrization without specifying gradient, (4) log-transformed parametrization (true) gradient (-G stands gradient). Table 3.1: Unconstrained optimization approximated gradient Table 3.2: Unconstrained optimization true gradient Table 3.3: Exponential trick optimization approximated gradient Table 3.4: Exponential trick optimization true gradient Using llsurface, plot log-likehood surface around true value (green) fitted parameters (red). can simulate bootstrap replicates using bootdist function.","code":"llsurface(min.arg = c(5, 0.3), max.arg = c(15, 1), xlim=c(5, 15), plot.arg = c(\"size\", \"prob\"), nlev = 25, lseq = 50, data = x, distr = \"nbinom\", back.col = FALSE) points(unconstropt[\"fitted size\", \"BFGS\"], unconstropt[\"fitted prob\", \"BFGS\"], pch = \"+\", col = \"red\") points(trueval[\"size\"], trueval[\"prob\"], pch = \"x\", col = \"green\") b1 <- bootdist(fitdist(x, \"nbinom\", method = \"mle\", optim.method = \"BFGS\"), niter = 100, parallel = \"snow\", ncpus = 2) summary(b1) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## size 61.95 11.05 118.32 ## mu 3.43 3.17 3.72 ## ## The estimation method converged only for 76 among 100 iterations plot(b1, trueval=trueval[c(\"size\", \"mu\")])"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/Optimalgo.html","id":"conclusion","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Which optimization algorithm to choose?","text":"Based two previous examples, observe methods converge point. reassuring. However, number function evaluations (gradient evaluations) different method another. Furthermore, specifying true gradient log-likelihood help fitting procedure generally slows convergence. Generally, best method standard BFGS method BFGS method exponential transformation parameters. Since exponential function differentiable, asymptotic properties still preserved (Delta method) finite-sample may produce small bias.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Overview of the fitdistrplus package","text":"Fitting distributions data common task statistics consists choosing probability distribution modelling random variable, well finding parameter estimates distribution. requires judgment expertise generally needs iterative process distribution choice, parameter estimation, quality fit assessment. R (R Development Core Team 2013) package MASS (Venables Ripley 2010), maximum likelihood estimation available via fitdistr function; steps fitting process can done using R functions (Ricci 2005). paper, present R package fitdistrplus (Delignette-Muller et al. 2014) implementing several methods fitting univariate parametric distribution. first objective developing package provide R users set functions dedicated help overall process. fitdistr function estimates distribution parameters maximizing likelihood function using optim function. distinction parameters different roles (e.g., main parameter nuisance parameter) made, paper focuses parameter estimation general point--view. cases, estimation methods prefered, maximum goodness--fit estimation (also called minimum distance estimation), proposed R package actuar three different goodness--fit distances (Dutang, Goulet, Pigeon 2008). developping fitdistrplus package, second objective consider various estimation methods addition maximum likelihood estimation (MLE). Functions developped enable moment matching estimation (MME), quantile matching estimation (QME), maximum goodness--fit estimation (MGE) using eight different distances. Moreover, fitdistrplus package offers possibility specify user-supplied function optimization, useful cases classical optimization techniques, included optim, adequate. applied statistics, frequent fit distributions censored data Commeau et al. (2012). MASS fitdistr function enable maximum likelihood estimation type data. packages can used work censored data, especially survival data Jordan (2005), packages generally focus specific models, enabling fit restricted set distributions. third objective thus provide R users function estimate univariate distribution parameters right-, left- interval-censored data. packages CRAN provide estimation procedures user-supplied parametric distribution support different types data. distrMod package (Kohl Ruckdeschel 2010) provides object-oriented (S4) implementation probability models includes distribution fitting procedures given minimization criterion. criterion user-supplied function sufficiently flexible handle censored data, yet trivial way, see Example M4 distrMod vignette. fitting functions MLEstimator MDEstimator return S4 class coercion method class mle provided respective functionalities (e.g., confint logLik) package stats4 available, . fitdistrplus, chose use standard S3 class system understanding R users. designing fitdistrplus package, forget implement generic functions also available S3 classes. Finally, various packages provide functions estimate mode, moments L-moments distribution, see reference manuals modeest, lmomco Lmoments packages. package available Comprehensive R Archive Network . paper organized follows: Section 2 presents tools fitting continuous distributions classic non-censored data. Section 3 deals estimation methods types data, Section 4 concludes.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Choice","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Choice of candidate distributions","title":"Overview of the fitdistrplus package","text":"illustrating use various functions fitdistrplus package continuous non-censored data, first use data set named groundbeef included package. data set contains pointwise values serving sizes grams, collected French survey, ground beef patties consumed children 5 years old. used quantitative risk assessment published Delignette-Muller Cornu (2008). fitting one distributions data set, generally necessary choose good candidates among predefined set distributions. choice may guided knowledge stochastic processes governing modeled variable, , absence knowledge regarding underlying process, observation empirical distribution. help user choice, developed functions plot characterize empirical distribution. First , common start plots empirical distribution function histogram (density plot), can obtained plotdist function fitdistrplus package. function provides two plots (see Figure 2.1): left-hand plot default histogram density scale (density plot , according values arguments histo demp) right-hand plot empirical cumulative distribution function (CDF). Figure 2.1: Histogram CDF plots empirical distribution continuous variable (serving size groundbeef data set) provided plotdist function. addition empirical plots, descriptive statistics may help choose candidates describe distribution among set parametric distributions. Especially skewness kurtosis, linked third fourth moments, useful purpose. non-zero skewness reveals lack symmetry empirical distribution, kurtosis value quantifies weight tails comparison normal distribution kurtosis equals 3. skewness kurtosis corresponding unbiased estimator (Casella Berger 2002) sample \\((X_i)_i \\stackrel{\\text{..d.}}{\\sim} X\\) observations \\((x_i)_i\\) given : \\[\\begin{equation} sk(X) = \\frac{E[(X-E(X))^3]}{Var(X)^{\\frac{3}{2}}}~,~\\widehat{sk}=\\frac{\\sqrt{n(n-1)}}{n-2}\\times\\frac{m_{3}}{m_{2}^{\\frac{3}{2}}},\\tag{2.1} \\end{equation}\\] \\[\\begin{equation} kr(X) = \\frac{E[(X-E(X))^4]}{Var(X)^{2}}~,~\\widehat{kr}=\\frac{n-1}{(n-2)(n-3)}((n+1) \\times \\frac{m_{4}}{m_{2}^{2}}-3(n-1)) + 3,\\tag{2.2} \\end{equation}\\] \\(m_{2}\\), \\(m_{3}\\), \\(m_{4}\\) denote empirical moments defined \\(m_{k}=\\frac{1}{n}\\sum_{=1}^n(x_{}-\\overline{x})^{k}\\), \\(x_{}\\) \\(n\\) observations variable \\(x\\) \\(\\overline{x}\\) mean value. descdist function provides classical descriptive statistics (minimum, maximum, median, mean, standard deviation), skewness kurtosis. default, unbiased estimations three last statistics provided. Nevertheless, argument method can changed \"unbiased\" (default) \"sample\" obtain without correction bias. skewness-kurtosis plot one proposed Cullen Frey (1999) provided descdist function empirical distribution (see Figure 2.2 groundbeef data set). plot, values common distributions displayed order help choice distributions fit data. distributions (normal, uniform, logistic, exponential), one possible value skewness kurtosis. Thus, distribution represented single point plot. distributions, areas possible values represented, consisting lines (gamma lognormal distributions), larger areas (beta distribution). Skewness kurtosis known robust. order take account uncertainty estimated values kurtosis skewness data, nonparametric bootstrap procedure (Efron Tibshirani 1994) can performed using argument boot. Values skewness kurtosis computed bootstrap samples (constructed random sampling replacement original data set) reported skewness-kurtosis plot. Nevertheless, user needs know skewness kurtosis, like higher moments, high variance. problem completely solved use bootstrap. skewness-kurtosis plot regarded indicative . properties random variable considered, notably expected value range, complement use plotdist descdist functions. call descdist function describe distribution serving size groundbeef data set draw corresponding skewness-kurtosis plot (see Figure 2.2). Looking results example positive skewness kurtosis far 3, fit three common right-skewed distributions considered, Weibull, gamma lognormal distributions. Figure 2.2: Skewness-kurtosis plot continuous variable (serving size groundbeef data set) provided descdist function.","code":"library(\"fitdistrplus\") ## Loading required package: MASS ## Loading required package: survival data(\"groundbeef\") str(groundbeef) ## 'data.frame': 254 obs. of 1 variable: ## $ serving: num 30 10 20 24 20 24 40 20 50 30 ... plotdist(groundbeef$serving, histo = TRUE, demp = TRUE) descdist(groundbeef$serving, boot = 1000) ## summary statistics ## ------ ## min: 10 max: 200 ## median: 79 ## mean: 73.65 ## estimated sd: 35.88 ## estimated skewness: 0.7353 ## estimated kurtosis: 3.551"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"FIT","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Fit of distributions by maximum likelihood estimation","title":"Overview of the fitdistrplus package","text":"selected, one parametric distributions \\(f(.\\vert \\theta)\\) (parameter \\(\\theta\\\\mathbb{R}^d\\)) may fitted data set, one time, using fitdist function. ..d. sample assumption, distribution parameters \\(\\theta\\) default estimated maximizing likelihood function defined : \\[\\begin{equation} L(\\theta)=\\prod_{=1}^n f(x_{}\\vert \\theta)\\tag{2.3} \\end{equation}\\] \\(x_{}\\) \\(n\\) observations variable \\(X\\) \\(f(.\\vert \\theta)\\) density function parametric distribution. proposed estimation methods described Section 3.1. fitdist function returns S3 object class fitdist print, summary plot functions provided. fit distribution using fitdist assumes corresponding d, p, q functions (standing respectively density, distribution quantile functions) defined. Classical distributions already defined way stats package, e.g., dnorm, pnorm qnorm normal distribution (see ?Distributions). Others may found various packages (see CRAN task view: Probability Distributions ). Distributions found package must implemented user d, p, q functions. call fitdist, distribution specified via argument dist either character string corresponding common root name used names d, p, q functions (e.g., \"norm\" normal distribution) density function , root name extracted (e.g., dnorm normal distribution). Numerical results returned fitdist function (1) parameter estimates, (2) estimated standard errors (computed estimate Hessian matrix maximum likelihood solution), (3) loglikelihood, (4) Akaike Bayesian information criteria (-called AIC BIC), (5) correlation matrix parameter estimates. call fitdist function fit Weibull distribution serving size groundbeef data set. plot object class fitdist provides four classical goodness--fit plots (Cullen Frey 1999) presented Figure 2.3: density plot representing density function fitted distribution along histogram empirical distribution, CDF plot empirical distribution fitted distribution, Q-Q plot representing empirical quantiles (y-axis) theoretical quantiles (x-axis), P-P plot representing empirical distribution function evaluated data point (y-axis) fitted distribution function (x-axis). CDF, Q-Q P-P plots, probability plotting position defined default using Hazen’s rule, probability points empirical distribution calculated (1:n - 0.5)/n, recommended Blom (1959). plotting position can easily changed (see reference manual details (Delignette-Muller et al. 2014)). Unlike generic plot function, denscomp, cdfcomp, qqcomp ppcomp functions enable draw separately four plots, order compare empirical distribution multiple parametric distributions fitted data set. functions must called first argument corresponding list objects class fitdist, optionally arguments customize plot (see reference manual lists arguments may specific plot (Delignette-Muller et al. 2014)). following example, compare fit Weibull, lognormal gamma distributions groundbeef data set (Figure 2.3). Figure 2.3: Four Goodness--fit plots various distributions fitted continuous data (Weibull, gamma lognormal distributions fitted serving sizes groundbeef data set) provided functions denscomp, qqcomp, cdfcomp ppcomp. density plot CDF plot may considered basic classical goodness--fit plots. two plots complementary can informative cases. Q-Q plot emphasizes lack--fit distribution tails P-P plot emphasizes lack--fit distribution center. present example (Figure 2.3), none three fitted distributions correctly describes center distribution, Weibull gamma distributions prefered better description right tail empirical distribution, especially tail important use fitted distribution, context food risk assessment. data set named endosulfan now used illustrate features fitdistrplus package. data set contains acute toxicity values organochlorine pesticide endosulfan (geometric mean LC50 ou EC50 values \\(\\mu g.L^{-1}\\)), tested Australian non-Australian laboratory-species (Hose Van den Brink 2004). ecotoxicology, lognormal loglogistic distribution often fitted data set order characterize species sensitivity distribution (SSD) pollutant. low percentile fitted distribution, generally 5% percentile, calculated named hazardous concentration 5% (HC5). interpreted value pollutant concentration protecting 95% species (Posthuma, Suter, Traas 2010). fit lognormal loglogistic distribution whole endosulfan data set rather bad (Figure 2.4), especially due minority high values. two-parameter Pareto distribution three-parameter Burr distribution (extension loglogistic Pareto distributions) fitted. Pareto Burr distributions provided package actuar. , define starting values (optimization process) reasonable starting values implicity defined within fitdist function distributions defined R (see ?fitdist details). distributions like Pareto Burr distribution, initial values distribution parameters supplied argument start, named list initial values parameter (appear d, p, q functions). defined reasonable starting values1 various distributions can fitted graphically compared. example, function cdfcomp can used report CDF values logscale emphasize discrepancies tail interest defining HC5 value (Figure 2.4). Figure 2.4: CDF plot compare fit four distributions acute toxicity values various organisms organochlorine pesticide endosulfan (endosulfan data set) provided cdfcomp function, CDF values logscale emphasize discrepancies left tail. None fitted distribution correctly describes right tail observed data set, shown Figure 2.4, left-tail seems better described Burr distribution. use considered estimate HC5 value 5% quantile distribution. can easily done using quantile generic function defined object class fitdist. calculation together calculation empirical quantile comparison. addition ecotoxicology context, quantile generic function also attractive actuarial-financial context. fact, value--risk \\(VAR_\\alpha\\) defined \\(1-\\alpha\\)-quantile loss distribution can computed quantile fitdist object. computation different goodness--fit statistics proposed fitdistrplus package order compare fitted distributions. purpose goodness--fit statistics aims measure distance fitted parametric distribution empirical distribution: e.g., distance fitted cumulative distribution function \\(F\\) empirical distribution function \\(F_{n}\\). fitting continuous distributions, three goodness--fit statistics classicaly considered: Cramer-von Mises, Kolmogorov-Smirnov Anderson-Darling statistics (D’Agostino Stephens 1986). Naming \\(x_{}\\) \\(n\\) observations continuous variable \\(X\\) arranged ascending order, Table 2.1 gives definition empirical estimate three considered goodness--fit statistics. can computed using function gofstat defined Stephens (D’Agostino Stephens 1986). Table 2.1: Goodness--fit statistics defined Stephens (D’Agostino Stephens 1986). \\(F_i\\stackrel{\\triangle}{=} F(x_i)\\) giving weight distribution tails, Anderson-Darling statistic special interest matters equally emphasize tails well main body distribution. often case risk assessment Vose (2010). reason, statistics often used select best distribution among fitted. Nevertheless, statistics used cautiously comparing fits various distributions. Keeping mind weighting CDF quadratic difference depends parametric distribution definition (see Table 2.1), Anderson-Darling statistics computed several distributions fitted data set theoretically difficult compare. Moreover, statistic, Cramer-von Mises Kolmogorov-Smirnov ones, take account complexity model (.e., parameter number). problem compared distributions characterized number parameters, systematically promote selection complex distributions case. Looking classical penalized criteria based loglikehood (AIC, BIC) seems thus also interesting, especially discourage overfitting. previous example, goodness--fit statistics based CDF distance favor Burr distribution, one characterized three parameters, AIC BIC values respectively give preference Burr distribution Pareto distribution. choice two distributions seems thus less obvious discussed. Even specifically recommended discrete distributions, Chi-squared statistic may also used continuous distributions (see Section 3.3 reference manual examples (Delignette-Muller et al. 2014)).","code":"fw <- fitdist(groundbeef$serving, \"weibull\") summary(fw) ## Fitting of the distribution ' weibull ' by maximum likelihood ## Parameters : ## estimate Std. Error ## shape 2.186 0.1046 ## scale 83.348 2.5269 ## Loglikelihood: -1255 AIC: 2514 BIC: 2522 ## Correlation matrix: ## shape scale ## shape 1.0000 0.3218 ## scale 0.3218 1.0000 par(mfrow = c(2, 2), mar = c(4, 4, 2, 1)) fg <- fitdist(groundbeef$serving, \"gamma\") fln <- fitdist(groundbeef$serving, \"lnorm\") plot.legend <- c(\"Weibull\", \"lognormal\", \"gamma\") denscomp(list(fw, fln, fg), legendtext = plot.legend) qqcomp(list(fw, fln, fg), legendtext = plot.legend) cdfcomp(list(fw, fln, fg), legendtext = plot.legend) ppcomp(list(fw, fln, fg), legendtext = plot.legend) library(actuar) ## ## Attaching package: 'actuar' ## The following objects are masked from 'package:stats': ## ## sd, var ## The following object is masked from 'package:grDevices': ## ## cm data(\"endosulfan\") ATV <- endosulfan$ATV fendo.ln <- fitdist(ATV, \"lnorm\") fendo.ll <- fitdist(ATV, \"llogis\", start = list(shape = 1, scale = 500)) fendo.P <- fitdist(ATV, \"pareto\", start = list(shape = 1, scale = 500)) fendo.B <- fitdist(ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) cdfcomp(list(fendo.ln, fendo.ll, fendo.P, fendo.B), xlogscale = TRUE, ylogscale = TRUE, legendtext = c(\"lognormal\", \"loglogistic\", \"Pareto\", \"Burr\")) quantile(fendo.B, probs = 0.05) ## Estimated quantiles for each specified probability (non-censored data) ## p=0.05 ## estimate 0.2939 quantile(ATV, probs = 0.05) ## 5% ## 0.2 gofstat(list(fendo.ln, fendo.ll, fendo.P, fendo.B), fitnames = c(\"lnorm\", \"llogis\", \"Pareto\", \"Burr\")) ## Goodness-of-fit statistics ## lnorm llogis Pareto Burr ## Kolmogorov-Smirnov statistic 0.1672 0.1196 0.08488 0.06155 ## Cramer-von Mises statistic 0.6374 0.3827 0.13926 0.06803 ## Anderson-Darling statistic 3.4721 2.8316 0.89206 0.52393 ## ## Goodness-of-fit criteria ## lnorm llogis Pareto Burr ## Akaike's Information Criterion 1069 1069 1048 1046 ## Bayesian Information Criterion 1074 1075 1053 1054"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Uncertainty","dir":"Articles","previous_headings":"2 Fitting distributions to continuous non-censored data","what":"Uncertainty in parameter estimates","title":"Overview of the fitdistrplus package","text":"uncertainty parameters fitted distribution can estimated parametric nonparametric bootstraps using boodist function non-censored data (Efron Tibshirani 1994). function returns bootstrapped values parameters S3 class object can plotted visualize bootstrap region. medians 95% confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations (due lack convergence optimization algorithm bootstrapped data sets), number iterations estimation converges also printed summary. plot object class bootdist consists scatterplot matrix scatterplots bootstrapped values parameters providing representation joint uncertainty distribution fitted parameters. example use bootdist function previous fit Burr distribution endosulfan data set (Figure 2.5). Figure 2.5: Bootstrappped values parameters fit Burr distribution characterized three parameters (example endosulfan data set) provided plot object class bootdist. Bootstrap samples parameter estimates useful especially calculate confidence intervals parameter fitted distribution marginal distribution bootstraped values. also interesting look joint distribution bootstraped values scatterplot (matrix scatterplots number parameters exceeds two) order understand potential structural correlation parameters (see Figure 2.5). use whole bootstrap sample also interest risk assessment field. use enables characterization uncertainty distribution parameters. can directly used within second-order Monte Carlo simulation framework, especially within package mc2d (Pouillot, Delignette-Muller, Denis 2011). One refer Pouillot Delignette-Muller (2010) introduction use mc2d fitdistrplus packages context quantitative risk assessment. bootstrap method can also used calculate confidence intervals quantiles fitted distribution. purpose, generic quantile function provided class bootdist. default, 95% percentiles bootstrap confidence intervals quantiles provided. Going back previous example ecotoxicolgy, function can used estimate uncertainty associated HC5 estimation, example previously fitted Burr distribution endosulfan data set.","code":"bendo.B <- bootdist(fendo.B, niter = 1001) summary(bendo.B) ## Parametric bootstrap medians and 95% percentile CI ## Median 2.5% 97.5% ## shape1 0.1983 0.09283 0.3606 ## shape2 1.5863 1.05306 3.0629 ## rate 1.4907 0.70828 2.7775 plot(bendo.B) quantile(bendo.B, probs = 0.05) ## (original) estimated quantiles for each specified probability (non-censored data) ## p=0.05 ## estimate 0.2939 ## Median of bootstrap estimates ## p=0.05 ## estimate 0.2994 ## ## two-sided 95 % CI of each quantile ## p=0.05 ## 2.5 % 0.1792 ## 97.5 % 0.4999"},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Alternatives","dir":"Articles","previous_headings":"3 Advanced topics","what":"Alternative methods for parameter estimation","title":"Overview of the fitdistrplus package","text":"subsection focuses alternative estimation methods. One alternative continuous distributions maximum goodness--fit estimation method also called minimum distance estimation method Dutang, Goulet, Pigeon (2008). package method proposed eight different distances: three classical distances defined Table 2.1, one variants Anderson-Darling distance proposed Luceno (2006) defined Table 3.1. right-tail AD gives weight right-tail, left-tail AD gives weight left tail. Either tails, , can receive even larger weights using second order Anderson-Darling Statistics. Table 3.1: Modified Anderson-Darling statistics defined Luceno (2006). \\(F_i\\stackrel{\\triangle}{=} F(x_{})\\) \\(\\overline F_i\\stackrel{\\triangle}{=}1-F(x_{})\\) fit distribution maximum goodness--fit estimation, one needs fix argument method mge call fitdist specify argument gof coding chosen goodness--fit distance. function intended used continuous non-censored data. Maximum goodness--fit estimation may useful give weight data one tail distribution. previous example ecotoxicology, used non classical distribution (Burr distribution) correctly fit empirical distribution especially left tail. order correctly estimate 5\\(\\%\\) percentile, also consider fit classical lognormal distribution, minimizing goodness--fit distance giving weight left tail empirical distribution. follows, left tail Anderson-Darling distances first second order used fit lognormal endosulfan data set (see Figure 3.1). Figure 3.1: Comparison lognormal distribution fitted MLE MGE using two different goodness--fit distances: left-tail Anderson-Darling left-tail Anderson Darling second order (example endosulfan data set) provided cdfcomp function, CDF values logscale emphasize discrepancies left tail. Comparing 5% percentiles (HC5) calculated using three fits one calculated MLE fit Burr distribution, can observe, example, fitting lognormal distribution maximizing left tail Anderson-Darling distances first second order enables approach value obtained fitting Burr distribution MLE. moment matching estimation (MME) another method commonly used fit parametric distributions (Vose 2010). MME consists finding value parameter \\(\\theta\\) equalizes first theoretical raw moments parametric distribution corresponding empirical raw moments Equation (3.1): \\[\\begin{equation} E(X^{k}|\\theta)=\\frac{1}{n}\\sum_{=1}^{n}x_{}^{k},\\tag{3.1} \\end{equation}\\] \\(k=1,\\ldots,d\\), \\(d\\) number parameters estimate \\(x_{}\\) \\(n\\) observations variable \\(X\\). moments order greater equal 2, may also relevant match centered moments. Therefore, match moments given Equation (3.2): \\[\\begin{equation} E(X\\vert \\theta) = \\overline{x} ~,~E\\left((X-E(X))^{k}|\\theta\\right)=m_k, \\text{ } k=2,\\ldots,d,\\tag{3.2} \\end{equation}\\] \\(m_k\\) denotes empirical centered moments. method can performed setting argument method \"mme\" call fitdist. estimate computed closed-form formula following distributions: normal, lognormal, exponential, Poisson, gamma, logistic, negative binomial, geometric, beta uniform distributions. case, distributions characterized one parameter (geometric, Poisson exponential), parameter simply estimated matching theoretical observed means, distributions characterized two parameters, parameters estimated matching theoretical observed means variances (Vose 2010). distributions, equation moments solved numerically using optim function minimizing sum squared differences observed theoretical moments (see fitdistrplus reference manual technical details (Delignette-Muller et al. 2014)). classical data set Danish insurance industry published McNeil (1997) used illustrate method. fitdistrplus, data set stored danishuni univariate version contains loss amounts collected Copenhagen Reinsurance 1980 1990. actuarial science, standard consider positive heavy-tailed distributions special focus right-tail distributions. numerical experiment, choose classic actuarial distributions loss modelling: lognormal distribution Pareto type II distribution (Klugman, Panjer, Willmot 2009). lognormal distribution fitted danishuni data set matching moments implemented closed-form formula. left-hand graph Figure 3.2, fitted distribution functions obtained using moment matching estimation (MME) maximum likelihood estimation (MLE) methods compared. MME method provides cautious estimation insurance risk MME-fitted distribution function (resp. MLE-fitted) underestimates (overestimates) empirical distribution function large values claim amounts. Figure 3.2: Comparison MME MLE fitting lognormal Pareto distribution loss data danishuni data set. second time, Pareto distribution, gives weight right-tail distribution, fitted. lognormal distribution, Pareto two parameters, allows fair comparison. use implementation actuar package providing raw centered moments distribution (addition d, p, q r functions (Goulet 2012). Fitting heavy-tailed distribution first second moments exist certain values shape parameter requires cautiousness. carried providing, optimization process, lower upper bound parameter. code calls L-BFGS-B optimization method optim, since quasi-Newton allows box constraints 2. choose match moments defined Equation (3.1), function computing empirical raw moment (called memp example) passed fitdist. two-parameter distributions (.e., \\(d=2\\)), Equations (3.1) (3.2) equivalent. shown Figure 3.2, MME MLE fits far less distant (looking right-tail) Pareto distribution lognormal distribution data set. Furthermore, two distributions, MME method better fits right-tail distribution visual point view. seems logical since empirical moments influenced large observed values. previous traces, gave values goodness--fit statistics. Whatever statistic considered, MLE-fitted lognormal always provides best fit observed data. Maximum likelihood moment matching estimations certainly commonly used method fitting distributions (Cullen Frey 1999). Keeping mind two methods may produce different results, user aware great sensitivity outliers choosing moment matching estimation. may seen advantage example objective better describe right tail distribution, may seen drawback objective different. Fitting parametric distribution may also done matching theoretical quantiles parametric distributions (specified probabilities) empirical quantiles (Tse 2009). equality theoretical empirical quantiles expressed Equation (3.3) , similar Equations (3.1) (3.2): \\[\\begin{equation} F^{-1}(p_{k}|\\theta)=Q_{n,p_{k}}\\tag{3.3} \\end{equation}\\] \\(k=1,\\ldots,d\\), \\(d\\) number parameters estimate (dimension \\(\\theta\\) fixed parameters) \\(Q_{n,p_{k}}\\) empirical quantiles calculated data specified probabilities \\(p_{k}\\). Quantile matching estimation (QME) performed setting argument method \"qme\" call fitdist adding argument probs defining probabilities quantile matching performed (see Figure 3.3). length vector must equal number parameters estimate (vector moment orders MME). Empirical quantiles computed using quantile function stats package using type=7 default (see ?quantile Hyndman Fan (1996)). type quantile can easily changed using qty argument call qme function. quantile matching carried numerically, minimizing sum squared differences observed theoretical quantiles. Figure 3.3: Comparison QME MLE fitting lognormal distribution loss data danishuni data set. example fitting lognormal distribution `danishuni} data set matching probabilities \\((p_1= 1/3, p_2=2/3)\\) \\((p_1= 8/10, p_2=9/10)\\). expected, second QME fit gives weight right-tail distribution. Compared maximum likelihood estimation, second QME fit best suits right-tail distribution, whereas first QME fit best models body distribution. quantile matching estimation particular interest need focus around particular quantiles, e.g., \\(p=99.5\\%\\) Solvency II insurance context \\(p=5\\%\\) HC5 estimation ecotoxicology context.","code":"fendo.ln.ADL <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"ADL\") fendo.ln.AD2L <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD2L\") cdfcomp(list(fendo.ln, fendo.ln.ADL, fendo.ln.AD2L), xlogscale = TRUE, ylogscale = TRUE, main = \"Fitting a lognormal distribution\", xlegend = \"bottomright\", legendtext = c(\"MLE\", \"Left-tail AD\", \"Left-tail AD 2nd order\")) (HC5.estimates <- c( empirical = as.numeric(quantile(ATV, probs = 0.05)), Burr = as.numeric(quantile(fendo.B, probs = 0.05)$quantiles), lognormal_MLE = as.numeric(quantile(fendo.ln, probs = 0.05)$quantiles), lognormal_AD2 = as.numeric(quantile(fendo.ln.ADL, probs = 0.05)$quantiles), lognormal_AD2L = as.numeric(quantile(fendo.ln.AD2L, probs = 0.05)$quantiles))) ## empirical Burr lognormal_MLE lognormal_AD2 lognormal_AD2L ## 0.20000 0.29393 0.07259 0.19591 0.25877 data(\"danishuni\") str(danishuni) ## 'data.frame': 2167 obs. of 2 variables: ## $ Date: Date, format: \"1980-01-03\" \"1980-01-04\" ... ## $ Loss: num 1.68 2.09 1.73 1.78 4.61 ... fdanish.ln.MLE <- fitdist(danishuni$Loss, \"lnorm\") fdanish.ln.MME <- fitdist(danishuni$Loss, \"lnorm\", method = \"mme\", order = 1:2) library(actuar) fdanish.P.MLE <- fitdist(danishuni$Loss, \"pareto\", start = list(shape = 10, scale = 10), lower = 2+1e-6, upper = Inf) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious ## Warning in sqrt(diag(varcovar)): NaNs produced memp <- function(x, order) mean(x^order) fdanish.P.MME <- fitdist(danishuni$Loss, \"pareto\", method = \"mme\", order = 1:2, memp = \"memp\", start = list(shape = 10, scale = 10), lower = c(2+1e-6, 2+1e-6), upper = c(Inf, Inf)) ## Warning in cov2cor(varcovar): diag(V) had non-positive or NA entries; the ## non-finite result may be dubious par(mfrow = c(1, 2)) cdfcomp(list(fdanish.ln.MLE, fdanish.ln.MME), legend = c(\"lognormal MLE\", \"lognormal MME\"), main = \"Fitting a lognormal distribution\", xlogscale = TRUE, datapch = 20) cdfcomp(list(fdanish.P.MLE, fdanish.P.MME), legend = c(\"Pareto MLE\", \"Pareto MME\"), main = \"Fitting a Pareto distribution\", xlogscale = TRUE, datapch = 20) gofstat(list(fdanish.ln.MLE, fdanish.P.MLE, fdanish.ln.MME, fdanish.P.MME), fitnames = c(\"lnorm.mle\", \"Pareto.mle\", \"lnorm.mme\", \"Pareto.mme\")) ## Goodness-of-fit statistics ## lnorm.mle Pareto.mle lnorm.mme Pareto.mme ## Kolmogorov-Smirnov statistic 0.1375 0.3124 0.4368 0.37 ## Cramer-von Mises statistic 14.7911 37.7227 88.9503 55.43 ## Anderson-Darling statistic 87.1933 208.3388 416.2567 281.58 ## ## Goodness-of-fit criteria ## lnorm.mle Pareto.mle lnorm.mme Pareto.mme ## Akaike's Information Criterion 8120 9250 9792 9409 ## Bayesian Information Criterion 8131 9261 9803 9420 fdanish.ln.QME1 <- fitdist(danishuni$Loss, \"lnorm\", method = \"qme\", probs = c(1/3, 2/3)) fdanish.ln.QME2 <- fitdist(danishuni$Loss, \"lnorm\", method = \"qme\", probs = c(8/10, 9/10)) cdfcomp(list(fdanish.ln.MLE, fdanish.ln.QME1, fdanish.ln.QME2), legend = c(\"MLE\", \"QME(1/3, 2/3)\", \"QME(8/10, 9/10)\"), main = \"Fitting a lognormal distribution\", xlogscale = TRUE, datapch = 20)"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"Customization","dir":"Articles","previous_headings":"3 Advanced topics","what":"Customization of the optimization algorithm","title":"Overview of the fitdistrplus package","text":"time numerical minimization carried fitdistrplus package, optim function stats package used default Nelder-Mead method distributions characterized one parameter BFGS method distributions characterized one parameter. Sometimes default algorithm fails converge. interesting change options optim function use another optimization function optim minimize objective function. argument optim.method can used call fitdist fitdistcens. internally passed mledist, mmedist, mgedist qmedist, optim (see ?optim details different algorithms available). Even error raised computing optimization, changing algorithm particular interest enforce bounds parameters. instance, volatility parameter \\(\\sigma\\) strictly positive \\(\\sigma>0\\) probability parameter \\(p\\) lies \\(p\\[0,1]\\). possible using arguments lower /upper, use automatically forces optim.method=\"L-BFGS-B\". examples fits gamma distribution \\(\\mathcal{G}(\\alpha, \\lambda)\\) groundbeef data set various algorithms. Note conjugate gradient algorithm (CG) needs far iterations converge (around 2500 iterations) compared algorithms (converging less 100 iterations). also possible use another function optim minimize objective function specifying argument custom.optim call fitdist. may necessary customize optimization function meet following requirements. (1) custom.optim function must following arguments: fn function optimized par initialized parameters. (2) custom.optim carry MINIMIZATION must return following components: par estimate, convergence convergence code, value=fn(par) hessian. example code written wrap genoud function rgenoud package order respect optimization ``template’’. rgenoud package implements genetic (stochastic) algorithm. customized optimization function can passed argument custom.optim call fitdist fitdistcens. following code can example used fit gamma distribution groundbeef data set. Note example various arguments also passed fitdist genoud: nvars, Domains, boundary.enforcement, print.level hessian. code compares parameter estimates (\\(\\hat\\alpha\\), \\(\\hat\\lambda\\)) different algorithms: shape \\(\\alpha\\) rate \\(\\lambda\\) parameters relatively similar example, roughly 4.00 0.05, respectively.","code":"data(\"groundbeef\") fNM <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"Nelder-Mead\") fBFGS <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"BFGS\") fSANN <- fitdist(groundbeef$serving, \"gamma\", optim.method = \"SANN\") fCG <- try(fitdist(groundbeef$serving, \"gamma\", optim.method = \"CG\", control = list(maxit = 10000))) if(inherits(fCG, \"try-error\")) {fCG <- list(estimate = NA)} mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values = par, ...) standardres <- c(res, convergence = 0) return(standardres) } fgenoud <- mledist(groundbeef$serving, \"gamma\", custom.optim = mygenoud, nvars = 2, max.generations = 10, Domains = cbind(c(0, 0), c(10, 10)), boundary.enforcement = 1, hessian = TRUE, print.level = 0, P9 = 10) ## Loading required package: rgenoud ## ## rgenoud (Version 5.9-0.10, Build Date: 2023-12-13) ## ## See http://sekhon.berkeley.edu/rgenoud for additional documentation. ## ## Please cite software as: ## ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011. ## ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.'' ## ## Journal of Statistical Software, 42(11): 1-26. ## ## cbind(NM = fNM$estimate, BFGS = fBFGS$estimate, SANN = fSANN$estimate, CG = fCG$estimate, fgenoud = fgenoud$estimate) ## NM BFGS SANN CG fgenoud ## shape 4.00956 4.2118 4.058 4.12783 4.00834 ## rate 0.05444 0.0572 0.055 0.05605 0.05443"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"otherdata","dir":"Articles","previous_headings":"3 Advanced topics","what":"Fitting distributions to other types of data","title":"Overview of the fitdistrplus package","text":"section modified since publication vignette Journal Statistical Software order include new goodness--fit plots censored discrete data. Analytical methods often lead semi-quantitative results referred censored data. Observations known limit detection left-censored data. Observations known limit quantification right-censored data. Results known lie two bounds interval-censored data. two bounds may correspond limit detection limit quantification, generally uncertainty bounds around observation. Right-censored data also commonly encountered survival data (Klein Moeschberger 2003). data set may thus contain right-, left-, interval-censored data, may mixture categories, possibly different upper lower bounds. Censored data sometimes excluded data analysis replaced fixed value, cases may lead biased results. recommended approach correctly model data based upon maximum likelihood Helsel (2005). Censored data may thus contain left-censored, right-censored interval-censored values, several lower upper bounds. use package fitdistrplus, data must coded dataframe two columns, respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. illustrate use package fitdistrplus fit distributions censored continous data, use another data set ecotoxicology, included package named salinity. data set contains acute salinity tolerance (LC50 values electrical conductivity, \\(mS\\).\\(cm^{-1}\\)) riverine macro-invertebrates taxa southern Murray-Darling Basin Central Victoria, Australia (Kefford et al. 2007). Using censored data coded salinity} data set, empirical distribution can plotted using theplotdistcens} function. older versions package, default function used Expectation-Maximization approach Turnbull (1974) compute overall empirical cdf curve optional confidence intervals, calls survfit plot.survfit functions survival package. Even representation always available (fixing argument NPMLE.method \"Turnbull.middlepoints\"), now default plot empirical cumulative distribution function (ECDF) explicitly represents regions non uniqueness NPMLE ECDF. default computation regions non uniqueness associated masses uses non parametric maximum likelihood estimation (NPMLE) approach developped Wang Wang Fani (2018). Figure 3.5 shows top left new plot data together two fitted distributions. Grey filled rectangles plot represent regions non uniqueness NPMLE ECDF. less rigorous sometimes illustrative plot can obtained fixing argument NPMLE FALSE call plotdistcens (see Figure 3.4 example help page Function plotdistcens details). plot enables see real nature censored data, points intervals, difficulty building plot define relevant ordering observations. Figure 3.4: Simple plot censored raw data (72-hour acute salinity tolerance riverine macro-invertebrates salinity data set) ordered points intervals. non censored data, one parametric distributions can fitted censored data set, one time, using case fitdistcens function. function estimates vector distribution parameters \\(\\theta\\) maximizing likelihood censored data defined : \\[\\begin{equation} L(\\theta) = \\prod_{=1}^{N_{nonC}} f(x_{}|\\theta)\\times \\prod_{j=1}^{N_{leftC}} F(x^{upper}_{j}|\\theta) \\\\ \\times \\prod_{k=1}^{N_{rightC}} (1- F(x^{lower}_{k}|\\theta))\\times \\prod_{m=1}^{N_{intC}} (F(x^{upper}_{m}|\\theta)- F(x^{lower}_{j}|\\theta))\\tag{3.4} \\end{equation}\\] \\(x_{}\\) \\(N_{nonC}\\) non-censored observations, \\(x^{upper}_{j}\\) upper values defining \\(N_{leftC}\\) left-censored observations, \\(x^{lower}_{k}\\) lower values defining \\(N_{rightC}\\) right-censored observations, \\([x^{lower}_{m} ; x^{upper}_{m}]\\) intervals defining \\(N_{intC}\\) interval-censored observations, F cumulative distribution function parametric distribution Helsel (2005). fitdist, fitdistcens returns results fit parametric distribution data set S3 class object can easily printed, summarized plotted. salinity data set, lognormal distribution loglogistic can fitted commonly done ecotoxicology data. fitdist, distributions (see Delignette-Muller et al. (2014) details), necessary specify initial values distribution parameters argument start. plotdistcens function can help find correct initial values distribution parameters non trivial cases, manual iterative use necessary. Computations goodness--fit statistics yet developed fits using censored data quality fit can judged using Akaike Schwarz’s Bayesian information criteria (AIC BIC) goodness--fit CDF plot, respectively provided summarizing plotting object class fitdistcens. Functions cdfcompcens, qqcompcens ppcompcens can also used compare fit various distributions censored data set. calls similar ones cdfcomp, qqcomp ppcomp. examples use functions two fitted distributions salinity data set (see Figure 3.5). qqcompcens ppcompcens used one fitted distribution, non uniqueness rectangles filled small noise added y-axis order help visualization various fits. rather recommend use plotstyle ggplot qqcompcens ppcompcens compare fits various distributions provides clearer plot splitted facets (see ?graphcompcens). Figure 3.5: goodness--fit plots fits lognormal loglogistic distribution censored data: LC50 values salinity data set. Function bootdistcens equivalent bootdist censored data, except proposes nonparametric bootstrap. Indeed, obvious simulate censoring within parametric bootstrap resampling procedure. generic function quantile can also applied object class fitdistcens bootdistcens, continuous non-censored data. addition fit distributions censored non censored continuous data, package can also accomodate discrete variables, count numbers, using functions developped continuous non-censored data. functions provide somewhat different graphs statistics, taking account discrete nature modeled variable. discrete nature variable automatically recognized classical distribution fitted data (binomial, negative binomial, geometric, hypergeometric Poisson distributions) must indicated fixing argument discrete TRUE call functions cases. toxocara data set included package corresponds observation discrete variable. Numbers Toxocara cati parasites present digestive tract reported random sampling feral cats living Kerguelen island (Fromont et al. 2001). use illustrate case discrete data. fit discrete distribution discrete data maximum likelihood estimation requires procedure continuous non-censored data. example, using toxocara data set, Poisson negative binomial distributions can easily fitted. discrete distributions, plot object class fitdist simply provides two goodness--fit plots comparing empirical theoretical distributions density CDF. Functions cdfcomp denscomp can also used compare several plots data set, follows previous fits (Figure 3.6). Figure 3.6: Comparison fits negative binomial Poisson distribution numbers Toxocara cati parasites toxocara data set. fitting discrete distributions, Chi-squared statistic computed gofstat function using cells defined argument chisqbreaks cells automatically defined data order reach roughly number observations per cell. number roughly equal argument meancount, sligthly greater ties. choice define cells empirical distribution (data), theoretical distribution, done enable comparison Chi-squared values obtained different distributions fitted data set. arguments chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells, \\(n\\) length data set (Vose 2010). Using default option two previous fits compared follows, giving preference negative binomial distribution, Chi-squared statistics information criteria:","code":"data(\"salinity\") str(salinity) ## 'data.frame': 108 obs. of 2 variables: ## $ left : num 20 20 20 20 20 21.5 15 20 23.7 25 ... ## $ right: num NA NA NA NA NA 21.5 30 25 23.7 NA ... plotdistcens(salinity, NPMLE = FALSE) fsal.ln <- fitdistcens(salinity, \"lnorm\") fsal.ll <- fitdistcens(salinity, \"llogis\", start = list(shape = 5, scale = 40)) summary(fsal.ln) ## Fitting of the distribution ' lnorm ' By maximum likelihood on censored data ## Parameters ## estimate Std. Error ## meanlog 3.3854 0.06487 ## sdlog 0.4961 0.05455 ## Loglikelihood: -139.1 AIC: 282.1 BIC: 287.5 ## Correlation matrix: ## meanlog sdlog ## meanlog 1.0000 0.2938 ## sdlog 0.2938 1.0000 summary(fsal.ll) ## Fitting of the distribution ' llogis ' By maximum likelihood on censored data ## Parameters ## estimate Std. Error ## shape 3.421 0.4158 ## scale 29.930 1.9447 ## Loglikelihood: -140.1 AIC: 284.1 BIC: 289.5 ## Correlation matrix: ## shape scale ## shape 1.0000 -0.2022 ## scale -0.2022 1.0000 par(mfrow = c(2, 2)) cdfcompcens(list(fsal.ln, fsal.ll), legendtext = c(\"lognormal\", \"loglogistic \")) qqcompcens(fsal.ln, legendtext = \"lognormal\") ppcompcens(fsal.ln, legendtext = \"lognormal\") qqcompcens(list(fsal.ln, fsal.ll), legendtext = c(\"lognormal\", \"loglogistic \"), main = \"Q-Q plot with 2 dist.\") data(\"toxocara\") str(toxocara) ## 'data.frame': 53 obs. of 1 variable: ## $ number: int 0 0 0 0 0 0 0 0 0 0 ... (ftoxo.P <- fitdist(toxocara$number, \"pois\")) ## Fitting of the distribution ' pois ' by maximum likelihood ## Parameters: ## estimate Std. Error ## lambda 8.679 0.4047 (ftoxo.nb <- fitdist(toxocara$number, \"nbinom\")) ## Fitting of the distribution ' nbinom ' by maximum likelihood ## Parameters: ## estimate Std. Error ## size 0.3971 0.08289 ## mu 8.6803 1.93501 par(mfrow = c(1, 2)) denscomp(list(ftoxo.P, ftoxo.nb), legendtext = c(\"Poisson\", \"negative binomial\"), fitlty = 1) cdfcomp(list(ftoxo.P, ftoxo.nb), legendtext = c(\"Poisson\", \"negative binomial\"), fitlty = 1) gofstat(list(ftoxo.P, ftoxo.nb), fitnames = c(\"Poisson\", \"negative binomial\")) ## Chi-squared statistic: 31257 7.486 ## Degree of freedom of the Chi-squared distribution: 5 4 ## Chi-squared p-value: 0 0.1123 ## the p-value may be wrong with some theoretical counts < 5 ## Chi-squared table: ## obscounts theo Poisson theo negative binomial ## <= 0 14 0.009014 15.295 ## <= 1 8 0.078237 5.809 ## <= 3 6 1.321767 6.845 ## <= 4 6 2.131298 2.408 ## <= 9 6 29.827829 7.835 ## <= 21 6 19.626224 8.271 ## > 21 7 0.005631 6.537 ## ## Goodness-of-fit criteria ## Poisson negative binomial ## Akaike's Information Criterion 1017 322.7 ## Bayesian Information Criterion 1019 326.6"},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"ccl","dir":"Articles","previous_headings":"","what":"Conclusion","title":"Overview of the fitdistrplus package","text":"R package fitdistrplus allows easily fit distributions. main objective developing package provide tools helping R users fit distributions data. encouraged pursue work feedbacks users package various areas food environmental risk assessment, epidemiology, ecology, molecular biology, genomics, bioinformatics, hydraulics, mechanics, financial actuarial mathematics operations research. Indeed, package already used lot practionners academics simple MLE fits Voigt et al. (2014), MLE fits goodness--fit statistics Vaninsky (2013), MLE fits bootstrap Rigaux et al. (2014), MLE fits, bootstrap goodness--fit statistics (Larras, Montuelle, Bouchez 2013), MME fit Sato et al. (2013), censored MLE bootstrap Contreras, Huerta, Arnold (2013), graphic analysing (Anand, Yeturu, Chandra 2012), grouped-data fitting methods (Fu, Steiner, Costafreda 2012) generally Drake, Chalabi, Coker (2014). fitdistrplus package complementary distrMod package (Kohl Ruckdeschel 2010). distrMod provides even flexible way estimate distribution parameters use requires greater initial investment learn manipulate S4 classes methods developed distr-family packages. Many extensions fitdistrplus package planned future: target extend censored data methods moment available non-censored data, especially concerning goodness--fit evaluation fitting methods. also enlarge choice fitting methods non-censored data, proposing new goodness--fit distances (e.g., distances based quantiles) maximum goodness--fit estimation new types moments (e.g., limited expected values) moment matching estimation. last, consider case multivariate distribution fitting.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/fitdistrplus_vignette.html","id":"acknowledgments","dir":"Articles","previous_headings":"","what":"Acknowledgments","title":"Overview of the fitdistrplus package","text":"package stage without stimulating contribution Régis Pouillot Jean-Baptiste Denis, especially conceptualization. also want thank Régis Pouillot valuable comments first version paper. authors gratefully acknowledges two anonymous referees Editor useful constructive comments. remaining errors, course, attributed authors alone.","code":""},{"path":[]},{"path":[]},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"geometric-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Geometric distribution","title":"Starting values used in fitdistrplus","text":"MME used \\(\\hat p=1/(1+m_1)\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"negative-binomial-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Negative binomial distribution","title":"Starting values used in fitdistrplus","text":"MME used \\(\\hat n = m_1^2/(\\mu_2-m_1)\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"poisson-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Poisson distribution","title":"Starting values used in fitdistrplus","text":"MME MLE \\(\\hat \\lambda = m_1\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"binomial-distribution","dir":"Articles","previous_headings":"1 Discrete distributions > 1.1 Base R distribution","what":"Binomial distribution","title":"Starting values used in fitdistrplus","text":"MME used \\[ Var[X]/E[X] = 1-p \\Rightarrow \\hat p = 1- \\mu_2/m_1. \\] size parameter \\[ \\hat n = \\lceil\\max(\\max_i x_i, m_1/\\hat p)\\rceil. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"logarithmic-distribution","dir":"Articles","previous_headings":"1 Discrete distributions","what":"logarithmic distribution","title":"Starting values used in fitdistrplus","text":"expectation simplifies small values \\(p\\) \\[ E[X] = -\\frac{1}{\\log(1-p)}\\frac{p}{1-p} \\approx -\\frac{1}{-p}\\frac{p}{1-p} =\\frac{1}{1-p}. \\] initial estimate \\[ \\hat p = 1-1/m_1. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"zero-truncated-distributions","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Zero truncated distributions","title":"Starting values used in fitdistrplus","text":"distribution distribution \\(X\\vert X>0\\) \\(X\\) follows particular discrete distributions. Hence initial estimate one used base R sample \\(x-1\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"zero-modified-distributions","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Zero modified distributions","title":"Starting values used in fitdistrplus","text":"MLE probability parameter empirical mass 0 \\(\\hat p_0=\\frac1n \\sum_i 1_{x_i=0}\\). estimators use classical estimator probability parameter \\(1-\\hat p_0\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"poisson-inverse-gaussian-distribution","dir":"Articles","previous_headings":"1 Discrete distributions","what":"Poisson inverse Gaussian distribution","title":"Starting values used in fitdistrplus","text":"first two moments \\[ E[X]=\\mu, Var[X] = \\mu+\\phi\\mu^3. \\] initial estimate \\[ \\hat\\mu=m_1, \\hat\\phi = (\\mu_2 - m_1)/m_1^3. \\]","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"normal-distribution","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Normal distribution","title":"Starting values used in fitdistrplus","text":"MLE MME use empirical mean variance.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"lognormal-distribution","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Lognormal distribution","title":"Starting values used in fitdistrplus","text":"log sample follows normal distribution, normal log sample.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"beta-distribution-of-the-first-kind","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Beta distribution (of the first kind)","title":"Starting values used in fitdistrplus","text":"density function beta \\(\\mathcal (,b)\\) \\[ f_X(x) = \\frac{\\Gamma()\\Gamma(b)}{\\Gamma(+b)} x^{-1}(1-x)^{b-1}. \\] initial estimate MME \\[\\begin{equation} \\hat = m_1 \\delta, \\hat b = (1-m_1)\\delta, \\delta = \\frac{m_1(1-m_1)}{\\mu_2}-1, \\tag{2.1} \\end{equation}\\]","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"log-gamma","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Log-gamma","title":"Starting values used in fitdistrplus","text":"Use gamma initial values sample \\(\\log(x)\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"gumbel","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Gumbel","title":"Starting values used in fitdistrplus","text":"distribution function \\[ F(x) = \\exp(-\\exp(-\\frac{x-\\alpha}{\\theta})). \\] Let \\(q_1\\) \\(q_3\\) first third quartiles. \\[ \\left\\{\\begin{array} -\\theta\\log(-\\log(p_1)) = q_1-\\alpha \\\\ -\\theta\\log(-\\log(p_3)) = q_3-\\alpha \\end{array}\\right. \\Leftrightarrow \\left\\{\\begin{array} -\\theta\\log(-\\log(p_1))+\\theta\\log(-\\log(p_3)) = q_1-q_3 \\\\ \\alpha= \\theta\\log(-\\log(p_3)) + q_3 \\end{array}\\right. \\Leftrightarrow \\left\\{\\begin{array} \\theta= \\frac{q_1-q_3}{\\log(-\\log(p_3)) - \\log(-\\log(p_1))} \\\\ \\alpha= \\theta\\log(-\\log(p_3)) + q_3 \\end{array}\\right.. \\] Using median location parameter \\(\\alpha\\) yields initial estimate \\[ \\hat\\theta= \\frac{q_1-q_3}{\\log(\\log(4/3)) - \\log(\\log(4))}, \\hat\\alpha = \\hat\\theta\\log(\\log(2)) + q_2. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-gaussian-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Inverse Gaussian distribution","title":"Starting values used in fitdistrplus","text":"moments distribution \\[ E[X] = \\mu, Var[X] = \\mu^3\\phi. \\] Hence initial estimate \\(\\hat\\mu=m_1\\), \\(\\hat\\phi=\\mu_2/m_1^3\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"generalized-beta","dir":"Articles","previous_headings":"2 Continuous distributions > 2.4 Other continuous distribution in actuar","what":"Generalized beta","title":"Starting values used in fitdistrplus","text":"distribution \\(\\theta X^{1/\\tau}\\) \\(X\\) beta distributed \\(\\mathcal (,b)\\) moments \\[ E[X] = \\theta \\beta(+1/\\tau, b)/\\beta(,b) = \\theta \\frac{\\Gamma(+1/\\tau)}{\\Gamma()}\\frac{\\Gamma(+b)}{\\Gamma(+b+1/\\tau)}, \\] \\[ E[X^2] = \\theta^2 \\frac{\\Gamma(+2/\\tau)}{\\Gamma()}\\frac{\\Gamma(+b)}{\\Gamma(+b+2/\\tau)}. \\] Hence large value \\(\\tau\\), \\[ E[X^2] /E[X] = \\theta \\frac{\\Gamma(+2/\\tau)}{\\Gamma(+b+2/\\tau)} \\frac{\\Gamma(+b+1/\\tau)}{\\Gamma(+1/\\tau)} \\approx \\theta. \\] Note MLE \\(\\theta\\) maximum use \\[ \\hat\\tau=3, \\hat\\theta = \\frac{m_2}{m_1}\\max_i x_i 1_{m_2>m_1} +\\frac{m_1}{m_2}\\max_i x_i 1_{m_2\\geq m_1}. \\] use beta initial estimate sample \\((\\frac{x_i}{\\hat\\theta})^{\\hat\\tau}\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"feller-pareto-family","dir":"Articles","previous_headings":"2 Continuous distributions","what":"Feller-Pareto family","title":"Starting values used in fitdistrplus","text":"Feller-Pareto distribution distribution \\(X=\\mu+\\theta(1/B-1)^{1/\\gamma}\\) \\(B\\) follows beta distribution shape parameters \\(\\alpha\\) \\(\\tau\\). See details https://doi.org/10.18637/jss.v103.i06 Hence let \\(Y = (X-\\mu)/\\theta\\), \\[ \\frac{Y}{1+Y} = \\frac{X-\\mu}{\\theta+X-\\mu} = (1-B)^{1/\\gamma}. \\] \\(\\gamma\\) close 1, \\(\\frac{Y}{1+Y}\\) approximately beta distributed \\(\\tau\\) \\(\\alpha\\). log-likelihood \\[\\begin{equation} \\mathcal L(\\mu, \\theta, \\alpha, \\gamma, \\tau) = (\\tau \\gamma - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - (\\alpha+\\tau)\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) + n\\log(\\gamma) - n\\log(\\theta) -n \\log(\\beta(\\alpha,\\tau)). \\tag{2.2}. \\end{equation}\\] MLE \\(\\mu\\) minimum. gradient respect \\(\\theta, \\alpha, \\gamma, \\tau\\) \\[\\begin{equation} \\nabla \\mathcal L(\\mu, \\theta, \\alpha, \\gamma, \\tau) = \\begin{pmatrix} -(\\tau \\gamma - 1) \\sum_{} \\frac{x_i}{\\theta(x_i-\\mu)} + (\\alpha+\\tau)\\sum_i \\frac{x_i\\gamma(\\frac{x_i-\\mu}\\theta)^{\\gamma-1}}{\\theta^2(1+(\\frac{x_i-\\mu}\\theta)^\\gamma)} - n/\\theta \\\\ - \\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) -n(\\psi(\\tau) - \\psi(\\alpha+\\tau)) \\\\ (\\tau - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - (\\alpha+\\tau)\\sum_i \\frac{(\\frac{x_i-\\mu}\\theta)^\\gamma}{ 1+(\\frac{x_i-\\mu}\\theta)^\\gamma}\\log(\\frac{x_i-\\mu}\\theta) + n/\\gamma \\\\ (\\gamma - 1) \\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - \\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) -n (\\psi(\\tau) - \\psi(\\alpha+\\tau)) \\end{pmatrix}. \\tag{2.3} \\end{equation}\\] Cancelling first component score \\(\\gamma=\\alpha=2\\), get \\[ -(2\\tau - 1) \\sum_{} \\frac{x_i}{\\theta(x_i-\\mu)} + (2+\\tau)\\sum_i \\frac{x_i 2(x_i-\\mu)}{\\theta^3(1+(\\frac{x_i-\\mu}\\theta)^2)} = \\frac{n}{\\theta} \\Leftrightarrow -(2\\tau - 1)\\theta^2\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} + (2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{(1+(\\frac{x_i-\\mu}\\theta)^2)} = \\theta^2 \\] \\[ \\Leftrightarrow (2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{1+(\\frac{x_i-\\mu}\\theta)^2} = (2\\tau - 1)\\theta^2\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} -1\\right) \\Leftrightarrow \\sqrt{ \\frac{(2+\\tau) \\frac1n\\sum_i \\frac{x_i 2(x_i-\\mu)}{1+(\\frac{x_i-\\mu}\\theta)^2} }{(2\\tau - 1)\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\mu} -1\\right)} } = \\theta. \\] Neglecting unknown value \\(\\tau\\) denominator \\(\\theta\\), get \\(\\hat\\mu\\) set ((2.16)) \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac{ \\frac1n\\sum_i \\frac{x_i 2(x_i-\\hat\\mu)}{1+(x_i-\\hat\\mu)^2} }{\\left(\\frac1n \\sum_{} \\frac{x_i}{x_i-\\hat\\mu} -1\\right)} }. \\tag{2.4} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = (x_i - \\hat\\mu)/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)). Cancelling last component gradient leads \\[ (\\gamma - 1) \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) - \\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) = \\psi(\\tau) - \\psi(\\alpha+\\tau) \\Leftrightarrow (\\gamma - 1) \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) = \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)^\\gamma) . \\] Neglecting value \\(\\gamma\\) right-hand side obtain \\[\\begin{equation} \\hat\\gamma = 1+ \\frac{ \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+(\\frac{x_i-\\mu}\\theta)) }{ \\frac1n\\sum_{} \\log(\\frac{x_i-\\mu}\\theta) }. \\tag{2.5} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"transformed-beta","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Transformed beta","title":"Starting values used in fitdistrplus","text":"Feller-Pareto \\(\\mu=0\\). first component (2.3) simplifies \\(\\gamma=\\alpha=2\\) \\[ -(2\\tau - 1) \\sum_{} \\frac{x_i}{\\theta(x_i)} + (2+\\tau)\\sum_i \\frac{2x_i^2}{\\theta^3(1+(\\frac{x_i}\\theta)^2)} = \\frac{n}{\\theta} \\Leftrightarrow -(2\\tau - 1) \\theta^2 + (2+\\tau)\\frac1n\\sum_i \\frac{2x_i^2}{1+(\\frac{x_i}\\theta)^2} = \\theta^2 \\] \\[ \\theta^2=\\frac{2+\\tau}{2\\tau}\\frac1n\\sum_i \\frac{2x_i^2}{1+(\\frac{x_i}\\theta)^2}. \\] Neglecting unknown value \\(\\tau\\) denominator \\(\\theta\\), get \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac1n\\sum_i \\frac{2x_i^2}{1+x_i^2} }. \\tag{2.6} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = x_i/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)). Similar Feller-Pareto, set \\[\\begin{equation} \\hat\\gamma = 1+ \\frac{ \\psi(\\tau) - \\psi(\\alpha+\\tau) +\\frac1n\\sum_i \\log(1+\\frac{x_i}\\theta) }{ \\frac1n\\sum_{} \\log(\\frac{x_i}\\theta) }. \\tag{2.5} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"generalized-pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Generalized Pareto","title":"Starting values used in fitdistrplus","text":"Feller-Pareto \\(\\mu=0\\) \\(\\gamma=1\\). first component (2.3) simplifies \\(\\gamma=2\\) \\[ -(\\tau - 1) \\frac{n}{\\theta} + (2+\\tau)\\sum_i \\frac{x_i}{\\theta^2(1+\\frac{x_i}\\theta} = n/\\theta \\Leftrightarrow -(\\tau - 1) \\theta + (2+\\tau)\\frac1n\\sum_i \\frac{x_i}{(1+\\frac{x_i}\\theta} = \\theta. \\] Neglecting unknown value \\(\\tau\\) leads \\[\\begin{equation} \\hat\\theta = \\frac1n\\sum_i \\frac{x_i}{1+x_i} \\tag{2.7} \\end{equation}\\] Initial value \\(\\tau,\\alpha\\) obtained sample \\((z_i)_i\\) \\[ z_i = y_i/(1+y_i), y_i = x_i/\\hat\\theta, \\] initial values beta distribution based MME ((2.1)).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"burr","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Burr","title":"Starting values used in fitdistrplus","text":"Burr Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\gamma)^{-\\alpha}. \\] Using median \\(q_2\\), \\[ \\log(1/2) = - \\alpha \\log(1+(q_2/\\theta)^\\gamma). \\] initial value \\[\\begin{equation} \\alpha = \\frac{\\log(2)}{\\log(1+(q_2/\\theta)^\\gamma)}, \\tag{2.8} \\end{equation}\\] first component (2.3) simplifies \\(\\gamma=\\alpha=2\\), \\(\\tau=1\\), \\(\\mu=0\\). \\[ - n/\\theta + 3\\sum_i \\frac{2x_i(\\frac{x_i}\\theta)}{\\theta^2(1+(\\frac{x_i}\\theta)^2)} = n/\\theta \\Leftrightarrow \\theta^2\\frac1n\\sum_i \\frac{2x_i(\\frac{x_i}\\theta)}{(1+(\\frac{x_i}\\theta)^2)} = 2/3. \\] Neglecting unknown value denominator \\(\\theta\\), get \\[\\begin{equation} \\hat\\theta = \\sqrt{ \\frac{2}{3 \\frac1n\\sum_i \\frac{2x_i^2}{1+(x_i)^2} } }. \\tag{2.6} \\end{equation}\\] use \\(\\hat\\gamma\\) (2.5) \\(\\tau=1\\) \\(\\alpha=2\\) previous \\(\\hat\\theta\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"loglogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Loglogistic","title":"Starting values used in fitdistrplus","text":"Loglogistic Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\), \\(\\alpha=1\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\gamma)^{-1}. \\] \\[ \\frac1{1-F(x)}-1 = (x/\\theta)^\\gamma \\Leftrightarrow \\log(\\frac{F(x)}{1-F(x)}) = \\gamma\\log(x/\\theta). \\] Let \\(q_1\\) \\(q_3\\) first third quartile. \\[ \\log(\\frac{1/3}{2/3})= \\gamma\\log(q_1/\\theta), \\log(\\frac{2/3}{1/3})= \\gamma\\log(q_3/\\theta) \\Leftrightarrow -\\log(2)= \\gamma\\log(q_1/\\theta), \\log(2)= \\gamma\\log(q_3/\\theta). \\] difference previous equations simplifies \\[ \\hat\\gamma=\\frac{2\\log(2)}{\\log(q_3/q_1)}. \\] sum previous equations \\[ 0 = \\gamma\\log(q_1)+\\gamma\\log(q_3) - 2\\gamma\\log(\\theta). \\] \\[\\begin{equation} \\hat\\theta = \\frac12 e^{\\log(q_1q_3)}. \\tag{2.9} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"paralogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Paralogistic","title":"Starting values used in fitdistrplus","text":"Paralogistic Feller-Pareto distribution \\(\\mu=0\\), \\(\\tau=1\\), \\(\\alpha=\\gamma\\). survival function \\[ 1-F(x) = (1+(x/\\theta)^\\alpha)^{-\\alpha}. \\] \\[ \\log(1-F(x)) = -\\alpha \\log(1+(x/\\theta)^\\alpha). \\] log-likelihood \\[\\begin{equation} \\mathcal L(\\theta, \\alpha) = ( \\alpha - 1) \\sum_{} \\log(\\frac{x_i}\\theta) - (\\alpha+1)\\sum_i \\log(1+(\\frac{x_i}\\theta)^\\alpha) + 2n\\log(\\alpha) - n\\log(\\theta). \\tag{2.10} \\end{equation}\\] gradient respect \\(\\theta\\), \\(\\alpha\\) \\[ \\begin{pmatrix} ( \\alpha - 1)\\frac{-n}{\\theta} - (\\alpha+1)\\sum_i \\frac{-x_i\\alpha(x_i/\\theta)^{\\alpha-1}}{1+(\\frac{x_i}\\theta)^\\alpha} - n/\\theta \\\\ \\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+(\\frac{x_i}\\theta)^\\alpha }) - (\\alpha+1)\\sum_i \\frac{(\\frac{x_i}\\theta)^\\alpha \\log(x_i/\\theta)}{1+(\\frac{x_i}\\theta)^\\alpha} + 2n/\\alpha \\\\ \\end{pmatrix}. \\] first component cancels \\[ - (\\alpha+1)\\sum_i \\frac{-x_i\\alpha(x_i/\\theta)^{\\alpha-1}}{1+(\\frac{x_i}\\theta)^\\alpha} = \\alpha n/\\theta \\Leftrightarrow (\\alpha+1)\\frac1n\\sum_i \\frac{ (x_i)^{\\alpha+1}}{1+(\\frac{x_i}\\theta)^\\alpha} = \\theta^\\alpha. \\] second component cancels \\[ \\frac1n\\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+(\\frac{x_i}\\theta)^\\alpha }) = -2/\\alpha +(\\alpha+1)\\frac1n\\sum_i \\frac{(\\frac{x_i}\\theta)^\\alpha \\log(x_i/\\theta)}{1+(\\frac{x_i}\\theta)^\\alpha}. \\] Choosing \\(\\theta=1\\), \\(\\alpha=2\\) sums leads \\[ \\frac1n\\sum_{} \\log(\\frac{ \\frac{x_i}\\theta}{1+x_i^2 }) - \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} = -2/\\alpha +(\\alpha)\\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2}. \\] Initial estimators \\[\\begin{equation} \\hat\\alpha = \\frac{ \\frac1n\\sum_{} \\log(\\frac{ x_i}{1+x_i^2 }) - \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} }{ \\frac1n\\sum_i \\frac{x_i^2\\log(x_i)}{1+x_i^2} - 2 }, \\tag{2.11} \\end{equation}\\] \\[\\begin{equation} \\hat\\theta = (\\hat\\alpha+1)\\frac1n\\sum_i \\frac{ (x_i)^{\\hat\\alpha+1}}{1+(x_i)^{\\hat\\alpha}}. \\tag{2.12} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-burr","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse Burr","title":"Starting values used in fitdistrplus","text":"Use Burr estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-paralogistic","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse paralogistic","title":"Starting values used in fitdistrplus","text":"Use paralogistic estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Inverse pareto","title":"Starting values used in fitdistrplus","text":"Use pareto estimate sample \\(1/x\\)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-iv","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto IV","title":"Starting values used in fitdistrplus","text":"survival function \\[ 1-F(x) = \\left(1+ \\left(\\frac{x-\\mu}{\\theta}\\right)^{\\gamma} \\right)^{-\\alpha}, \\] see ?Pareto4 actuar. first third quartiles \\(q_1\\) \\(q_3\\) verify \\[ ((\\frac34)^{-1/\\alpha}-1)^{1/\\gamma} = \\frac{q_1-\\mu}{\\theta}, ((\\frac14)^{-1/\\alpha}-1)^{1/\\gamma} = \\frac{q_3-\\mu}{\\theta}. \\] Hence get two useful relations \\[\\begin{equation} \\gamma = \\frac{ \\log\\left( \\frac{ (\\frac43)^{1/\\alpha}-1 }{ (4)^{1/\\alpha}-1 } \\right) }{ \\log\\left(\\frac{q_1-\\mu}{q_3-\\mu}\\right) }, \\tag{2.13} \\end{equation}\\] \\[\\begin{equation} \\theta = \\frac{q_1- q_3 }{ ((\\frac43)^{1/\\alpha}-1)^{1/\\gamma} - ((4)^{1/\\alpha}-1)^{1/\\gamma} }. \\tag{2.14} \\end{equation}\\] log-likelihood Pareto 4 sample (see Equation (5.2.94) Arnold (2015) updated Goulet et al. notation) \\[ \\mathcal L(\\mu,\\theta,\\gamma,\\alpha) = (\\gamma -1) \\sum_i \\log(\\frac{x_i-\\mu}{\\theta}) -(\\alpha+1)\\sum_i \\log(1+ (\\frac{x_i-\\mu}{\\theta})^{\\gamma}) +n\\log(\\gamma) -n\\log(\\theta)+n\\log(\\alpha). \\] Cancelling derivate \\(\\mathcal L(\\mu,\\theta,\\gamma,\\alpha)\\) respect \\(\\alpha\\) leads \\[\\begin{equation} \\alpha =n/\\sum_i \\log(1+ (\\frac{x_i-\\mu}{\\theta})^{\\gamma}). \\tag{2.15} \\end{equation}\\] MLE threshold parameter \\(\\mu\\) minimum. initial estimate slightly minimum order observations strictly \\[\\begin{equation} \\hat\\mu = \\left\\{ \\begin{array}{ll} (1-\\epsilon) \\min_i x_i & \\text{} \\min_i x_i <0 \\\\ (1+\\epsilon)\\min_i x_i & \\text{} \\min_i x_i \\geq 0 \\\\ \\end{array} \\right. . \\tag{2.16} \\end{equation}\\] \\(\\epsilon=0.05\\). Initial parameter estimation \\(\\hat\\mu\\), \\(\\alpha^\\star = 2\\) , \\(\\hat\\gamma\\) (2.13) \\(\\alpha^\\star\\), \\(\\hat\\theta\\) (2.14) \\(\\alpha^\\star\\) \\(\\hat\\gamma\\), \\(\\hat\\alpha\\) (2.15) \\(\\hat\\mu\\), \\(\\hat\\theta\\) \\(\\hat\\gamma\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-iii","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto III","title":"Starting values used in fitdistrplus","text":"Pareto III corresponds Pareto IV \\(\\alpha=1\\). \\[\\begin{equation} \\gamma = \\frac{ \\log\\left( \\frac{ \\frac43-1 }{ 4-1 } \\right) }{ \\log\\left(\\frac{q_1-\\mu}{q_3-\\mu}\\right) }, \\label{eq:pareto3:gamma:relation} \\end{equation}\\] \\[\\begin{equation} \\theta = \\frac{ (\\frac13)^{1/\\gamma} - (3)^{1/\\gamma} }{q_1- q_3 }. \\label{eq:pareto3:theta:relation} \\end{equation}\\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\hat\\gamma\\) \\(\\eqref{eq:pareto3:gamma:relation}\\), \\(\\hat\\theta\\) \\(\\eqref{eq:pareto3:theta:relation}\\) \\(\\hat\\gamma\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-ii","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto II","title":"Starting values used in fitdistrplus","text":"Pareto II corresponds Pareto IV \\(\\gamma=1\\). \\[\\begin{equation} \\theta = \\frac{ (\\frac43)^{1/\\alpha} - 4^{1/\\alpha} }{q_1- q_3 }. \\label{eq:pareto2:theta:relation} \\end{equation}\\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\alpha^\\star = 2\\) , \\(\\hat\\theta\\) \\(\\eqref{eq:pareto4:theta:relation}\\) \\(\\alpha^\\star\\) \\(\\gamma=1\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto4:alpha:relation}\\) \\(\\hat\\mu\\), \\(\\hat\\theta\\) \\(\\gamma=1\\),","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto-i","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto I","title":"Starting values used in fitdistrplus","text":"Pareto corresponds Pareto IV \\(\\gamma=1\\), \\(\\mu=\\theta\\). MLE \\[\\begin{equation} \\hat\\mu = \\min_i X_i, \\hat\\alpha = \\left(\\frac1n \\sum_{=1}^n \\log(X_i/\\hat\\mu) \\right)^{-1}. \\label{eq:pareto1:alpha:mu:relation} \\end{equation}\\] can rewritten geometric mean sample \\(G_n = (\\prod_{=1}^n X_i)^{1/n}\\) \\[ \\hat\\alpha = \\log(G_n/\\hat\\mu). \\] Initial parameter estimation \\(\\hat\\mu\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto1:alpha:mu:relation}\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"pareto","dir":"Articles","previous_headings":"2 Continuous distributions > 2.5 Feller-Pareto family","what":"Pareto","title":"Starting values used in fitdistrplus","text":"Pareto corresponds Pareto IV \\(\\gamma=1\\), \\(\\mu=0\\). \\[\\begin{equation} \\theta = \\frac{ (\\frac43)^{1/\\alpha} - 4^{1/\\alpha} }{q_1- q_3 }. \\label{eq:pareto:theta:relation} \\end{equation}\\] Initial parameter estimation \\[ \\alpha^\\star = \\max(2, 2(m_2-m_1^2)/(m_2-2m_1^2)), \\] \\(m_i\\) empirical raw moment order \\(\\), \\(\\hat\\theta\\) \\(\\eqref{eq:pareto4:theta:relation}\\) \\(\\alpha^\\star\\) \\(\\gamma=1\\), \\(\\hat\\alpha\\) \\(\\eqref{eq:pareto4:alpha:relation}\\) \\(\\mu=0\\), \\(\\hat\\theta\\) \\(\\gamma=1\\).","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"transformed-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Transformed gamma distribution","title":"Starting values used in fitdistrplus","text":"log-likelihood given \\[ \\mathcal L(\\alpha,\\tau,\\theta) = n\\log(\\tau) + \\alpha\\tau\\sum_i \\log(x_i/\\theta) -\\sum_i (x_i/\\theta)^\\tau - \\sum_i\\log(x_i) - n\\log(Gamma(\\alpha)). \\] gradient respect \\(\\alpha,\\tau,\\theta\\) given \\[ \\begin{pmatrix} \\tau- n\\psi(\\alpha)) \\\\ n/\\tau + \\alpha\\sum_i \\log(x_i/\\theta) -\\sum_i (x_i/\\theta)^{\\tau} \\log(x_i/\\theta) \\\\ -\\alpha\\tau /\\theta +\\sum_i \\tau \\frac{x_i}{\\theta^2}(x_i/\\theta)^{\\tau-1} \\end{pmatrix}. \\] compute moment-estimator gamma \\(\\eqref{eq:gamma:relation}\\) \\[ \\hat\\alpha = m_2^2/\\mu_2, \\hat\\theta= \\mu_2/m_1. \\] cancelling first component gradient set \\[ \\hat\\tau = \\frac{\\psi(\\hat\\alpha)}{\\frac1n\\sum_i \\log(x_i/\\hat\\theta) }. \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"gamma distribution","title":"Starting values used in fitdistrplus","text":"Transformed gamma \\(\\tau=1\\) compute moment-estimator given \\[\\begin{equation} \\hat\\alpha = m_2^2/\\mu_2, \\hat\\theta= \\mu_2/m_1. \\label{eq:gamma:relation} \\end{equation}\\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"weibull-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Weibull distribution","title":"Starting values used in fitdistrplus","text":"Transformed gamma \\(\\alpha=1\\) Let \\(\\tilde m=\\frac1n\\sum_i \\log(x_i)\\) \\(\\tilde v=\\frac1n\\sum_i (\\log(x_i) - \\tilde m)^2\\). use approximate MME \\[ \\hat\\tau = 1.2/sqrt(\\tilde v), \\hat\\theta = exp(\\tilde m + 0.572/\\hat \\tau). \\] Alternatively, can use distribution function \\[ F(x) = 1 - e^{-(x/\\sigma)^\\tau} \\Rightarrow \\log(-\\log(1-F(x))) = \\tau\\log(x) - \\tau\\log(\\theta), \\] Hence QME Weibull \\[ \\tilde\\tau = \\frac{ \\log(-\\log(1-p_1)) - \\log(-\\log(1-p_2)) }{ \\log(x_1) - \\log(x_2) }, \\tilde\\tau = x_3/(-\\log(1-p_3))^{1/\\tilde\\tau} \\] \\(p_1=1/4\\), \\(p_2=3/4\\), \\(p_3=1/2\\), \\(x_i\\) corresponding empirical quantiles. Initial parameters \\(\\tilde\\tau\\) \\(\\tilde\\theta\\) unless empirical quantiles \\(x_1=x_2\\), case use \\(\\hat\\tau\\), \\(\\hat\\theta\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"exponential-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.6 Transformed gamma family","what":"Exponential distribution","title":"Starting values used in fitdistrplus","text":"MLE MME \\(\\hat\\lambda = 1/m_1.\\)","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-transformed-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse transformed gamma distribution","title":"Starting values used in fitdistrplus","text":"transformed gamma distribution \\((1/x_i)_i\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-gamma-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse gamma distribution","title":"Starting values used in fitdistrplus","text":"compute moment-estimator \\[ \\hat\\alpha = (2m_2-m_1^2)/(m_2-m_1^2), \\hat\\theta= m_1m_2/(m_2-m_1^2). \\]","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-weibull-distribution","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse Weibull distribution","title":"Starting values used in fitdistrplus","text":"use QME.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"inverse-exponential","dir":"Articles","previous_headings":"2 Continuous distributions > 2.7 Inverse transformed gamma family","what":"Inverse exponential","title":"Starting values used in fitdistrplus","text":"transformed gamma distribution \\((1/x_i)_i\\).","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"general-books","dir":"Articles","previous_headings":"3 Bibliography","what":"General books","title":"Starting values used in fitdistrplus","text":"N. L. Johnson, S. Kotz, N. Balakrishnan (1994). Continuous univariate distributions, Volume 1, Wiley. N. L. Johnson, S. Kotz, N. Balakrishnan (1995). Continuous univariate distributions, Volume 2, Wiley. N. L. Johnson, . W. Kemp, S. Kotz (2008). Univariate discrete distributions, Wiley. G. Wimmer (1999), Thesaurus univariate discrete probability distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"books-dedicated-to-a-distribution-family","dir":"Articles","previous_headings":"3 Bibliography","what":"Books dedicated to a distribution family","title":"Starting values used in fitdistrplus","text":"M. Ahsanullah, B.M. Golam Kibria, M. Shakil (2014). Normal Student’s t Distributions Applications, Springer. B. C. Arnold (2010). Pareto Distributions, Chapman Hall. . Azzalini (2013). Skew-Normal Related Families. N. Balakrishnan (2014). Handbook Logistic Distribution, CRC Press.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/articles/starting-values.html","id":"books-with-applications","dir":"Articles","previous_headings":"3 Bibliography","what":"Books with applications","title":"Starting values used in fitdistrplus","text":"C. Forbes, M. Evans, N. Hastings, B. Peacock (2011). Statistical Distributions, Wiley. Z. . Karian, E. J. Dudewicz, K. Shimizu (2010). Handbook Fitting Statistical Distributions R, CRC Press. K. Krishnamoorthy (2015). Handbook Statistical Distributions Applications, Chapman Hall. Klugman, S., Panjer, H. & Willmot, G. (2019). Loss Models: Data Decisions, 5th ed., John Wiley & Sons.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Marie-Laure Delignette-Muller. Author. Christophe Dutang. Author. Regis Pouillot. Contributor. Jean-Baptiste Denis. Contributor. Aurélie Siberchicot. Author, maintainer.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Marie Laure Delignette-Muller, Christophe Dutang (2015). fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34. DOI 10.18637/jss.v064.i04.","code":"@Article{, title = {{fitdistrplus}: An {R} Package for Fitting Distributions}, author = {Marie Laure Delignette-Muller and Christophe Dutang}, journal = {Journal of Statistical Software}, year = {2015}, volume = {64}, number = {4}, pages = {1--34}, doi = {10.18637/jss.v064.i04}, }"},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"help-to-fit-of-a-parametric-distribution-to-non-censored-or-censored-data","dir":"","previous_headings":"","what":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Please note! Since January 2024, repository belonged lbbe-software organization. avoid confusion, strongly recommend updating existing local clones point new repository URL. can using git remote command line: git remote set-url origin git@github.com:lbbe-software/fitdistrplus.git git remote set-url origin https://github.com/lbbe-software/fitdistrplus.git fitdistrplus extends fitdistr() function (MASS package) several functions help fit parametric distribution non-censored censored data. Censored data may contain left censored, right censored interval censored values, several lower upper bounds. addition maximum likelihood estimation (MLE), package provides moment matching (MME), quantile matching (QME) maximum goodness--fit estimation (MGE) methods (available non-censored data). Weighted versions MLE, MME QME available. fitdistrplus allows fit probability distribution provided user restricted base R distributions (see ?Distributions). strongly encourage users visit CRAN task view Distributions proposed Dutang, Kiener & Swihart (2024).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"the-package","dir":"","previous_headings":"","what":"The package","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"stable version fitdistrplus can installed CRAN using: development version fitdistrplus can installed GitHub (remotes needed): Finally load package current R session following R command:","code":"install.packages(\"fitdistrplus\") if (!requireNamespace(\"remotes\", quietly = TRUE)) install.packages(\"remotes\") remotes::install_github(\"lbbe-software/fitdistrplus\") library(fitdistrplus)"},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"documentation","dir":"","previous_headings":"","what":"Documentation","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Four vignettes attached fitdistrplus package. Two beginners Overview fitdistrplus package Frequently Asked Questions last two vignettes deal advanced topics optimization algorithm choose? Starting values used fitdistrplus","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"authors--contacts","dir":"","previous_headings":"","what":"Authors & Contacts","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"Please read FAQ contacting authors Marie-Laure Delignette-Muller: marielaure.delignettemuller<<@))vetagro-sup.fr Christophe Dutang: dutangc<<@))gmail.com Aurélie Siberchicot: aurelie.siberchicot<<@))univ-lyon1.fr Issues can reported fitdistrplus-issues.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Help to Fit of a Parametric Distribution to Non-Censored or Censored Data","text":"use fitdistrplus, cite: Marie Laure Delignette-Muller, Christophe Dutang (2015). fitdistrplus: R Package Fitting Distributions. Journal Statistical Software. https://www.jstatsoft.org/article/view/v064i04 DOI 10.18637/jss.v064.i04.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":null,"dir":"Reference","previous_headings":"","what":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"cdfband plots empirical cumulative distribution function bootstraped pointwise confidence intervals probabilities quantiles.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"","code":"CIcdfplot(b, CI.output, CI.type = \"two.sided\", CI.level = 0.95, CI.col = \"red\", CI.lty = 2, CI.fill = NULL, CI.only = FALSE, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datapch, datacol, fitlty, fitcol, fitlwd, horizontals = TRUE, verticals = FALSE, do.points = TRUE, use.ppoints = TRUE, a.ppoints = 0.5, name.points = NULL, lines01 = FALSE, plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"b One \"bootdist\" object. CI.output quantity (bootstraped) bootstraped confidence intervals computed: either \"probability\" \"quantile\"). CI.type Type confidence intervals : either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. CI.col color confidence intervals. CI.lty line type confidence intervals. CI.fill color fill confidence area. Default NULL corresponding filling. CI.logical whether plot empirical fitted distribution functions confidence intervals. Default FALSE. xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot, see also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datapch integer specifying symbol used plotting data points, see also points (non censored data). datacol specification color used plotting data points. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. fitlty (vector ) line type(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. horizontals TRUE, draws horizontal lines step empirical cdf function (non censored data). See also plot.stepfun. verticals TRUE, draws also vertical lines empirical cdf function. taken account horizontals=TRUE (non censored data). .points logical; TRUE, also draw points x-locations. Default TRUE (non censored data). use.ppoints TRUE, probability points empirical distribution defined using function ppoints (1:n - .ppoints)/(n - 2a.ppoints + 1) (non censored data). FALSE, probability points simply defined (1:n)/n. argument ignored discrete data. .ppoints use.ppoints=TRUE, passed function ppoints (non censored data). name.points Label vector points drawn .e. .points = TRUE (non censored data). lines01 logical plot two horizontal lines h=0 h=1 cdfcomp. plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). ... graphical arguments passed matlines polygon, respectively CI.fill=FALSE CI.fill=TRUE.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"CIcdfplot provides plot empirical distribution using cdfcomp cdfcompcens, bootstraped pointwise confidence intervals probabilities (y values) quantiles (x values). interval computed evaluating quantity interest (probability associated x value quantile associated y value) using bootstraped values parameters get bootstraped sample quantity interest calculating percentiles sample get confidence interval (classically 2.5 97.5 percentiles 95 percent confidence level). CI.fill != NULL, whole confidence area filled color CI.fill thanks function polygon, otherwise borders drawn thanks function matline. graphical arguments can passed functions using three dots arguments ....","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/CIcdfplot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Empirical cumulative distribution function with pointwise confidence intervals on probabilities or on quantiles — CIcdfplot","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. if (requireNamespace (\"ggplot2\", quietly = TRUE)) {ggplotEx <- TRUE} # (1) Fit of an exponential distribution # set.seed(123) s1 <- rexp(50, 1) f1 <- fitdist(s1, \"exp\") b1 <- bootdist(f1, niter= 11) #voluntarily low to decrease computation time # plot 95 percent bilateral confidence intervals on y values (probabilities) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", plotstyle = \"ggplot\") # \\donttest{ # plot of the previous intervals as a band CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.fill = \"pink\", CI.col = \"red\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.fill = \"pink\", CI.col = \"red\", plotstyle = \"ggplot\") # plot of the previous intervals as a band without empirical and fitted dist. functions CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"red\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"red\", plotstyle = \"ggplot\") # same plot without contours CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"pink\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"probability\", CI.only = TRUE, CI.fill = \"pink\", CI.col = \"pink\", plotstyle = \"ggplot\") # plot 95 percent bilateral confidence intervals on x values (quantiles) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quantile\") if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quantile\", plotstyle = \"ggplot\") # plot 95 percent unilateral confidence intervals on quantiles CIcdfplot(b1, CI.level = 95/100, CI.output = \"quant\", CI.type = \"less\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1) if (ggplotEx) CIcdfplot(b1, CI.level = 95/100, CI.output = \"quant\", CI.type = \"less\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1, plotstyle = \"ggplot\") CIcdfplot(b1, CI.level= 95/100, CI.output = \"quant\", CI.type = \"greater\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1) if (ggplotEx) CIcdfplot(b1, CI.level= 95/100, CI.output = \"quant\", CI.type = \"greater\", CI.fill = \"grey80\", CI.col = \"black\", CI.lty = 1, plotstyle = \"ggplot\") # (2) Fit of a normal distribution on acute toxicity log-transformed values of # endosulfan for nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, with their # confidence intervals, from a small number of bootstrap # iterations to satisfy CRAN running times constraint and plot of the band # representing pointwise confidence intervals on any quantiles (any HCx values) # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(endosulfan) log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) namesATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa fln <- fitdist(log10ATV, \"norm\") bln <- bootdist(fln, bootmethod =\"param\", niter=101) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.844443 2.190122 2.565053 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.334340 1.697255 2.099378 #> 97.5 % 2.531564 2.770455 3.053706 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlim = c(0,5), name.points=namesATV) if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlim = c(0,5), name.points=namesATV, plotstyle = \"ggplot\") # (3) Same type of example as example (2) from ecotoxicology # with censored data # data(salinity) log10LC50 <-log10(salinity) fln <- fitdistcens(log10LC50,\"norm\") bln <- bootdistcens(fln, niter=101) (HC5ln <- quantile(bln,probs = 0.05)) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 #> estimate 1.11584 #> Median of bootstrap estimates #> p=0.05 #> estimate 1.120901 #> #> two-sided 95 % CI of each quantile #> p=0.05 #> 2.5 % 1.045539 #> 97.5 % 1.191979 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\",xlim=c(0.5,2),lines01 = TRUE) if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\",xlim=c(0.5,2),lines01 = TRUE, plotstyle = \"ggplot\") # zoom around the HC5 CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\", lines01 = TRUE, xlim = c(0.8, 1.5), ylim = c(0, 0.1)) abline(h = 0.05, lty = 2) # line corresponding to a CDF of 5 percent if (ggplotEx) CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"lightblue\", CI.col = \"blue\", xlab = \"log10(LC50)\", lines01 = TRUE, xlim = c(0.8, 1.5), ylim = c(0, 0.1), plotstyle = \"ggplot\") + ggplot2::geom_hline(yintercept = 0.05, lty = 2) # line corresponding to a CDF of 5 percent # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Provide function prepare data frame needed fitdistcens() data classically coded using Surv() function survival package","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"","code":"Surv2fitdistcens(time, time2, event, type = c('right', 'left', 'interval', 'interval2'))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"time right censored data, follow time. interval data, first argument starting time interval. event status indicator, normally 0=alive, 1=dead. choices TRUE/FALSE (TRUE = death) 1/2 (2=death). interval censored data, status indicator 0=right censored, 1=event time, 2=left censored, 3=interval censored. factor data, assume two levels second level coding death. time2 ending time interval interval censored. Intervals assumed open left closed right, (start, end]. type character string specifying type censoring. Possible values \"right\", \"left\", \"interval\", \"interval2\".","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Surv2fitdistcens makes data.frame two columns respectively named left right, describing observed value interval required fitdistcens(): left column contains either NA left-censored observations, left bound interval interval-censored observations, observed value non-censored observations. right column contains either NA right-censored observations, right bound interval interval censored observations, observed value non-censored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Surv2fitdistcens returns data.frame two columns respectively named left right.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/Surv2fitdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Handling of data formated as in the survival package for use in fitdistcens() — Surv2fitdistcens","text":"","code":"# (1) randomized fictive survival data - right-censored # origdata <- data.frame(rbind( c( 43.01, 55.00, 0), c( 36.37, 47.17, 0), c( 33.10, 34.51, 0), c( 71.00, 81.15, 1), c( 80.89, 81.91, 1), c( 67.81, 78.48, 1), c( 73.98, 76.92, 1), c( 53.19, 54.80, 1))) colnames(origdata) <- c(\"AgeIn\", \"AgeOut\", \"Death\") # add of follow-up time (for type = \"right\" in Surv()) origdata$followuptime <- origdata$AgeOut - origdata$AgeIn origdata #> AgeIn AgeOut Death followuptime #> 1 43.01 55.00 0 11.99 #> 2 36.37 47.17 0 10.80 #> 3 33.10 34.51 0 1.41 #> 4 71.00 81.15 1 10.15 #> 5 80.89 81.91 1 1.02 #> 6 67.81 78.48 1 10.67 #> 7 73.98 76.92 1 2.94 #> 8 53.19 54.80 1 1.61 ### use of default survival type \"right\" # in Surv() survival::Surv(time = origdata$followuptime, event = origdata$Death, type = \"right\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 # for fitdistcens() Surv2fitdistcens(origdata$followuptime, event = origdata$Death, type = \"right\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # use of survival type \"interval\" # in Surv() survival::Surv(time = origdata$followuptime, time2 = origdata$followuptime, event = origdata$Death, type = \"interval\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 # for fitdistcens() Surv2fitdistcens(time = origdata$followuptime, time2 = origdata$followuptime, event = origdata$Death, type = \"interval\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # use of survival type \"interval2\" origdata$survivalt1 <- origdata$followuptime origdata$survivalt2 <- origdata$survivalt1 origdata$survivalt2[1:3] <- Inf origdata #> AgeIn AgeOut Death followuptime survivalt1 survivalt2 #> 1 43.01 55.00 0 11.99 11.99 Inf #> 2 36.37 47.17 0 10.80 10.80 Inf #> 3 33.10 34.51 0 1.41 1.41 Inf #> 4 71.00 81.15 1 10.15 10.15 10.15 #> 5 80.89 81.91 1 1.02 1.02 1.02 #> 6 67.81 78.48 1 10.67 10.67 10.67 #> 7 73.98 76.92 1 2.94 2.94 2.94 #> 8 53.19 54.80 1 1.61 1.61 1.61 survival::Surv(time = origdata$survivalt1, time2 = origdata$survivalt2, type = \"interval2\") #> [1] 11.99+ 10.80+ 1.41+ 10.15 1.02 10.67 2.94 1.61 Surv2fitdistcens(origdata$survivalt1, time2 = origdata$survivalt2, type = \"interval2\") #> left right #> 1 11.99 NA #> 2 10.80 NA #> 3 1.41 NA #> 4 10.15 10.15 #> 5 1.02 1.02 #> 6 10.67 10.67 #> 7 2.94 2.94 #> 8 1.61 1.61 # (2) Other examples with various left, right and interval censored values # # with left censored data (d1 <- data.frame(time = c(2, 5, 3, 7), ind = c(0, 1, 1, 1))) #> time ind #> 1 2 0 #> 2 5 1 #> 3 3 1 #> 4 7 1 survival::Surv(time = d1$time, event = d1$ind, type = \"left\") #> [1] 2- 5 3 7 Surv2fitdistcens(time = d1$time, event = d1$ind, type = \"left\") #> left right #> 1 NA 2 #> 2 5 5 #> 3 3 3 #> 4 7 7 (d1bis <- data.frame(t1 = c(2, 5, 3, 7), t2 = c(2, 5, 3, 7), censtype = c(2, 1, 1, 1))) #> t1 t2 censtype #> 1 2 2 2 #> 2 5 5 1 #> 3 3 3 1 #> 4 7 7 1 survival::Surv(time = d1bis$t1, time2 = d1bis$t2, event = d1bis$censtype, type = \"interval\") #> [1] 2- 5 3 7 Surv2fitdistcens(time = d1bis$t1, time2 = d1bis$t2, event = d1bis$censtype, type = \"interval\") #> left right #> 1 NA 2 #> 2 5 5 #> 3 3 3 #> 4 7 7 # with interval, left and right censored data (d2 <- data.frame(t1 = c(-Inf, 2, 3, 4, 3, 7), t2 = c(2, 5, 3, 7, 8, Inf))) #> t1 t2 #> 1 -Inf 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 Inf survival::Surv(time = d2$t1, time2 = d2$t2, type = \"interval2\") #> [1] 2- [2, 5] 3 [4, 7] [3, 8] 7+ Surv2fitdistcens(time = d2$t1, time2 = d2$t2, type = \"interval2\") #> left right #> 1 NA 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 NA (d2bis <- data.frame(t1 = c(2, 2, 3, 4, 3, 7), t2 = c(2, 5, 3, 7, 8, 7), censtype = c(2,3,1,3,3,0))) #> t1 t2 censtype #> 1 2 2 2 #> 2 2 5 3 #> 3 3 3 1 #> 4 4 7 3 #> 5 3 8 3 #> 6 7 7 0 survival::Surv(time = d2bis$t1, time2 = d2bis$t2, event = d2bis$censtype, type = \"interval\") #> [1] 2- [2, 5] 3 [4, 7] [3, 8] 7+ Surv2fitdistcens(time = d2bis$t1, time2 = d2bis$t2, event = d2bis$censtype, type = \"interval\") #> left right #> 1 NA 2 #> 2 2 5 #> 3 3 3 #> 4 4 7 #> 5 3 8 #> 6 7 NA"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap simulation of uncertainty for non-censored data — bootdist","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Uses parametric nonparametric bootstrap resampling order simulate uncertainty parameters distribution fitted non-censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"","code":"bootdist(f, bootmethod = \"param\", niter = 1001, silent = TRUE, parallel = c(\"no\", \"snow\", \"multicore\"), ncpus) # S3 method for class 'bootdist' print(x, ...) # S3 method for class 'bootdist' plot(x, main = \"Bootstrapped values of parameters\", enhance = FALSE, trueval = NULL, rampcol = NULL, nbgrid = 100, nbcol = 100, ...) # S3 method for class 'bootdist' summary(object, ...) # S3 method for class 'bootdist' density(..., bw = nrd0, adjust = 1, kernel = \"gaussian\") # S3 method for class 'density.bootdist' plot(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) # S3 method for class 'density.bootdist' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"f object class \"fitdist\", output fitdist function. bootmethod character string coding type resampling : \"param\" parametric resampling \"nonparam\" nonparametric resampling data. niter number samples drawn bootstrap. silent logical remove show warnings errors bootstraping. parallel type parallel operation used, \"snow\" \"multicore\" (second one available Windows), \"\" parallel operation. ncpus Number processes used parallel operation : typically one fix number available CPUs. x object class \"bootdist\" \"density.bootdist\". object object class \"bootdist\". main overall title plot: see title, default \"Bootstrapped values parameters\". enhance logical get enhanced plot. trueval relevant, numeric vector true value parameters (backfitting purposes). rampcol colors interpolate; must valid argument colorRampPalette(). nbgrid Number grid points direction. Can scalar length-2 integer vector. nbcol integer argument, required number colors ... arguments passed generic methods \"bootdist\" objects density. bw, adjust, kernel resp. smoothing bandwidth, scaling factor, kernel used, see density. mar numerical vector form c(bottom, left, top, right), see par. lty, col, lwd resp. line type, color, line width, see par.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Samples drawn parametric bootstrap (resampling distribution fitted fitdist) nonparametric bootstrap (resampling replacement data set). bootstrap sample function mledist (mmedist, qmedist, mgedist according component f$method object class \"fitdist\") used estimate bootstrapped values parameters. function fails converge, NA values returned. Medians 2.5 97.5 percentiles computed removing NA values. medians 95 percent confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations, number iterations function converges also printed summary. default (enhance=FALSE), plot object class \"bootdist\" consists scatterplot matrix scatterplots bootstrapped values parameters. uses function stripchart fitted distribution characterized one parameter, function plot two paramters function pairs cases. last cases, provides representation joint uncertainty distribution fitted parameters. enhance=TRUE, personalized plot version pairs used upper graphs scatterplots lower graphs heatmap image using image based kernel based estimator 2D density function (using kde2d MASS package). Arguments rampcol, nbgrid, nbcol can used customize plots. Defautls values rampcol=c(\"green\", \"yellow\", \"orange\", \"red\"), nbcol=100 (see colorRampPalette()), nbgrid=100 (see kde2d). addition, fitting parameters simulated datasets backtesting purposes, additional argument trueval can used plot cross true value. possible accelerate bootstrap using parallelization. recommend use parallel = \"multicore\", parallel = \"snow\" work Windows, fix ncpus number available processors. density computes empirical density bootdist objects using density function (Gaussian kernel default). returns object class density.bootdist print plot methods provided.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"bootdist returns object class \"bootdist\", list 6 components, estim data frame containing bootstrapped values parameters. converg vector containing codes convergence obtained iterative method used estimate parameters bootstraped data set (0 closed formula used). method character string coding type resampling : \"param\" parametric resampling \"nonparam\" nonparametric resampling. nbboot number samples drawn bootstrap. CI bootstrap medians 95 percent confidence percentile intervals parameters. fitpart object class \"fitdist\" bootstrap procedure applied. Generic functions: print print \"bootdist\" object shows bootstrap parameter estimates. inferior whole number bootstrap iterations, number iterations estimation converges also printed. summary summary provides median 2.5 97.5 percentiles parameter. inferior whole number bootstrap iterations, number iterations estimation converges also printed summary. plot plot shows bootstrap estimates stripchart function univariate parameters plot function multivariate parameters. density density computes empirical densities return object class density.bootdist.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 181-241. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bootstrap simulation of uncertainty for non-censored data — bootdist","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # (1) Fit of a gamma distribution to serving size data # using default method (maximum likelihood estimation) # followed by parametric bootstrap # data(groundbeef) x1 <- groundbeef$serving f1 <- fitdist(x1, \"gamma\") b1 <- bootdist(f1, niter=51) print(b1) #> Parameter values obtained with parametric bootstrap #> shape rate #> 1 4.015562 0.05365499 #> 2 4.214437 0.05762101 #> 3 4.176366 0.05807901 #> 4 4.119164 0.05944029 #> 5 5.013486 0.07194809 #> 6 4.461409 0.05807600 plot(b1) plot(b1, enhance=TRUE) summary(b1) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.12112272 3.32325118 5.11745944 #> rate 0.05518452 0.04684843 0.07170367 quantile(b1) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.16733 42.32692 50.91831 59.15298 67.62801 76.88308 87.67764 #> p=0.8 p=0.9 #> estimate 101.5208 122.9543 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> estimate 32.71222 42.80078 50.98942 59.25093 67.5939 76.42124 87.17521 100.8405 #> p=0.9 #> estimate 121.5466 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> 2.5 % 27.77396 37.42586 45.73489 53.96687 62.26638 71.30894 81.64618 93.9737 #> 97.5 % 35.67197 45.22459 53.97730 62.58326 71.31751 81.30652 92.96508 107.7329 #> p=0.9 #> 2.5 % 113.9634 #> 97.5 % 130.6715 CIcdfplot(b1, CI.output = \"quantile\") density(b1) #> #> Bootstrap values for: gamma for 1 object(s) with 51 bootstrap values (original sample size 254). plot(density(b1)) # (2) non parametric bootstrap on the same fit # b1b <- bootdist(f1, bootmethod=\"nonparam\", niter=51) summary(b1b) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.08546931 3.47931694 4.71280030 #> rate 0.05561944 0.04797494 0.06302539 quantile(b1b) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.16733 42.32692 50.91831 59.15298 67.62801 76.88308 87.67764 #> p=0.8 p=0.9 #> estimate 101.5208 122.9543 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 32.25183 42.25577 51.01738 59.05788 67.47548 76.95389 87.65113 #> p=0.8 p=0.9 #> estimate 100.8612 121.7738 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 #> 2.5 % 28.77577 38.76800 47.17181 55.16178 63.29618 72.15077 82.21068 95.20268 #> 97.5 % 36.49366 46.74605 55.27953 63.37110 71.62773 80.58611 91.32593 105.92939 #> p=0.9 #> 2.5 % 115.0083 #> 97.5 % 128.1651 # (3) Fit of a normal distribution on acute toxicity values of endosulfan in log10 for # nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution, what is called the 5 percent hazardous concentration (HC5) # in ecotoxicology, with its two-sided 95 percent confidence interval calculated by # parametric bootstrap # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") bln <- bootdist(fln, bootmethod = \"param\", niter=51) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.811067 2.156258 2.529461 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.187935 1.634263 2.095273 #> 97.5 % 2.276507 2.563692 2.917189 # (4) comparison of sequential and parallel versions of bootstrap # to be tried with a greater number of iterations (1001 or more) # # \\donttest{ niter <- 1001 data(groundbeef) x1 <- groundbeef$serving f1 <- fitdist(x1, \"gamma\") # sequential version ptm <- proc.time() summary(bootdist(f1, niter = niter)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.02609408 3.46463055 4.71706986 #> rate 0.05458836 0.04622389 0.06476728 proc.time() - ptm #> user system elapsed #> 3.981 0.089 3.964 # parallel version using snow require(parallel) #> Loading required package: parallel ptm <- proc.time() summary(bootdist(f1, niter = niter, parallel = \"snow\", ncpus = 2)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.02321963 3.45598967 4.80078519 #> rate 0.05450354 0.04632331 0.06524721 proc.time() - ptm #> user system elapsed #> 0.036 0.004 3.859 # parallel version using multicore (not available on Windows) ptm <- proc.time() summary(bootdist(f1, niter = niter, parallel = \"multicore\", ncpus = 2)) #> Parametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> shape 4.04947721 3.47970416 4.71828189 #> rate 0.05496497 0.04672265 0.06498123 proc.time() - ptm #> user system elapsed #> 0.026 0.023 2.059 # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Bootstrap simulation of uncertainty for censored data — bootdistcens","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Uses nonparametric bootstrap resampling order simulate uncertainty parameters distribution fitted censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"","code":"bootdistcens(f, niter = 1001, silent = TRUE, parallel = c(\"no\", \"snow\", \"multicore\"), ncpus) # S3 method for class 'bootdistcens' print(x, ...) # S3 method for class 'bootdistcens' plot(x, ...) # S3 method for class 'bootdistcens' summary(object, ...) # S3 method for class 'bootdistcens' density(..., bw = nrd0, adjust = 1, kernel = \"gaussian\") # S3 method for class 'density.bootdistcens' plot(x, mar=c(4,4,2,1), lty=NULL, col=NULL, lwd=NULL, ...) # S3 method for class 'density.bootdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"f object class \"fitdistcens\", output fitdistcens function. niter number samples drawn bootstrap. silent logical remove show warnings errors bootstraping. parallel type parallel operation used, \"snow\" \"multicore\" (second one available Windows), \"\" parallel operation. ncpus Number processes used parallel operation : typically one fix number available CPUs. x object class \"bootdistcens\". object object class \"bootdistcens\". ... arguments passed generic methods \"bootdistcens\" objects density. bw, adjust, kernel resp. smoothing bandwidth, scaling factor, kernel used, see density. mar numerical vector form c(bottom, left, top, right), see par. lty, col, lwd resp. line type, color, line width, see par.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Samples drawn nonparametric bootstrap (resampling replacement data set). bootstrap sample function mledist used estimate bootstrapped values parameters. mledist fails converge, NA values returned. Medians 2.5 97.5 percentiles computed removing NA values. medians 95 percent confidence intervals parameters (2.5 97.5 percentiles) printed summary. inferior whole number iterations, number iterations mledist converges also printed summary. plot object class \"bootdistcens\" consists scatterplot matrix scatterplots bootstrapped values parameters. uses function stripchart fitted distribution characterized one parameter, function plot cases. last cases, provides representation joint uncertainty distribution fitted parameters. possible accelerate bootstrap using parallelization. recommend use parallel = \"multicore\", parallel = \"snow\" work Windows, fix ncpus number available processors. density computes empirical density bootdistcens objects using density function (Gaussian kernel default). returns object class density.bootdistcens print plot methods provided.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"bootdistcens returns object class \"bootdistcens\", list 6 components, estim data frame containing bootstrapped values parameters. converg vector containing codes convergence iterative method used estimate parameters bootstraped data set. method character string coding type resampling : case \"nonparam\" available method censored data. nbboot number samples drawn bootstrap. CI bootstrap medians 95 percent confidence percentile intervals parameters. fitpart object class \"fitdistcens\" bootstrap procedure applied. Generic functions: print print \"bootdistcens\" object shows bootstrap parameter estimates. inferior whole number bootstrap iterations, number iterations estimation converges also printed. summary summary provides median 2.5 97.5 percentiles parameter. inferior whole number bootstrap iterations, number iterations estimation converges also printed summary. plot plot shows bootstrap estimates stripchart function univariate parameters plot function multivariate parameters. density density computes empirical densities return object class density.bootdistcens.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 181-241. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/bootdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bootstrap simulation of uncertainty for censored data — bootdistcens","text":"","code":"# We choose a low number of bootstrap replicates in order to satisfy CRAN running times # constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # (1) Fit of a normal distribution to fluazinam data in log10 # followed by nonparametric bootstrap and calculation of quantiles # with 95 percent confidence intervals # data(fluazinam) (d1 <-log10(fluazinam)) #> left right #> 1 0.5797836 0.5797836 #> 2 1.5263393 1.5263393 #> 3 1.9395193 1.9395193 #> 4 3.2304489 NA #> 5 2.8061800 2.8061800 #> 6 3.0625820 NA #> 7 2.0530784 2.0530784 #> 8 2.1105897 2.1105897 #> 9 2.7678976 2.7678976 #> 10 3.2685780 NA #> 11 0.2041200 0.2041200 #> 12 0.6812412 0.6812412 #> 13 1.9138139 1.9138139 #> 14 2.1903317 2.1903317 f1 <- fitdistcens(d1, \"norm\") b1 <- bootdistcens(f1, niter = 51) b1 #> Parameter values obtained with nonparametric bootstrap #> mean sd #> 1 2.148176 1.2301856 #> 2 2.359487 1.1144722 #> 3 1.886811 0.7960468 #> 4 1.983487 0.9941790 #> 5 1.912052 0.9906398 #> 6 2.189226 0.9088450 #> 7 2.287131 1.2049569 #> 8 2.288832 0.7645444 #> 9 1.787691 1.0077846 #> 10 2.893830 1.2229467 #> 11 2.569893 0.9597859 #> 12 2.343772 1.2402711 #> 13 2.645568 1.2934746 #> 14 1.942141 0.5982854 #> 15 1.932680 1.0077309 #> 16 1.824771 1.0653955 #> 17 2.983895 1.8018944 #> 18 2.347785 1.3994097 #> 19 1.845464 0.8555560 #> 20 2.427059 1.5893095 #> 21 1.948223 0.8705864 #> 22 1.692356 1.0223265 #> 23 2.275639 0.8147514 #> 24 2.148972 1.0345423 #> 25 2.348520 1.1739100 #> 26 1.893396 1.1106869 #> 27 1.911591 1.1574565 #> 28 2.610027 1.0803468 #> 29 2.080525 1.3340362 #> 30 1.985938 0.9870137 #> 31 1.742953 1.0956522 #> 32 2.549440 1.0330325 #> 33 2.268481 0.4832085 #> 34 2.144250 1.3228431 #> 35 2.184267 1.2698264 #> 36 1.821893 1.5316162 #> 37 2.085662 1.1654912 #> 38 1.868720 1.0912928 #> 39 2.138497 1.1356628 #> 40 2.119477 0.9868753 #> 41 2.153767 1.1818298 #> 42 1.933517 0.5773863 #> 43 2.074073 0.7280150 #> 44 2.421981 1.1254148 #> 45 2.486787 0.6096348 #> 46 2.030623 1.0934793 #> 47 1.938514 1.0258803 #> 48 1.678181 1.2224439 #> 49 2.339840 1.3061770 #> 50 2.278660 0.7921537 #> 51 2.195027 1.1382020 summary(b1) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.144250 1.7050054 2.831765 #> sd 1.091293 0.5826111 1.574886 plot(b1) quantile(b1) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.6655064 1.179033 1.549321 1.86572 2.161449 2.457179 2.773577 #> p=0.8 p=0.9 #> estimate 3.143865 3.657392 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.7210295 1.215519 1.593624 1.854354 2.14425 2.418499 2.691487 #> p=0.8 p=0.9 #> estimate 2.961351 3.394931 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> 2.5 % 0.1683713 0.6922166 1.066910 1.433480 1.705005 1.996046 2.241195 #> 97.5 % 1.5718878 1.8638800 2.141966 2.479624 2.831765 3.146062 3.482325 #> p=0.8 p=0.9 #> 2.5 % 2.472445 2.753590 #> 97.5 % 3.883480 4.463155 CIcdfplot(b1, CI.output = \"quantile\") plot(density(b1)) #> List of 1 #> $ :List of 6 #> ..$ estim :'data.frame':\t51 obs. of 2 variables: #> .. ..$ mean: num [1:51] 2.15 2.36 1.89 1.98 1.91 ... #> .. ..$ sd : num [1:51] 1.23 1.114 0.796 0.994 0.991 ... #> ..$ converg: num [1:51] 0 0 0 0 0 0 0 0 0 0 ... #> ..$ method : chr \"nonparam\" #> ..$ nbboot : num 51 #> ..$ CI : num [1:2, 1:3] 2.144 1.091 1.705 0.583 2.832 ... #> .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. ..$ : chr [1:3] \"Median\" \"2.5%\" \"97.5%\" #> ..$ fitpart:List of 17 #> .. ..$ estimate : Named num [1:2] 2.16 1.17 #> .. .. ..- attr(*, \"names\")= chr [1:2] \"mean\" \"sd\" #> .. ..$ method : chr \"mle\" #> .. ..$ sd : Named num [1:2] 0.322 0.263 #> .. .. ..- attr(*, \"names\")= chr [1:2] \"mean\" \"sd\" #> .. ..$ cor : num [1:2, 1:2] 1 0.135 0.135 1 #> .. .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. ..$ vcov : num [1:2, 1:2] 0.1039 0.0114 0.0114 0.0692 #> .. .. ..- attr(*, \"dimnames\")=List of 2 #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. .. .. ..$ : chr [1:2] \"mean\" \"sd\" #> .. ..$ loglik : num -20.4 #> .. ..$ aic : num 44.8 #> .. ..$ bic : num 46.1 #> .. ..$ n : int 14 #> .. ..$ censdata :'data.frame':\t14 obs. of 2 variables: #> .. .. ..$ left : num [1:14] 0.58 1.53 1.94 3.23 2.81 ... #> .. .. ..$ right: num [1:14] 0.58 1.53 1.94 NA 2.81 ... #> .. ..$ distname : chr \"norm\" #> .. ..$ fix.arg : NULL #> .. ..$ fix.arg.fun: NULL #> .. ..$ dots : NULL #> .. ..$ convergence: int 0 #> .. ..$ discrete : logi FALSE #> .. ..$ weights : NULL #> .. ..- attr(*, \"class\")= chr \"fitdistcens\" #> ..- attr(*, \"class\")= chr \"bootdistcens\" #> NULL # (2) Estimation of the mean of the normal distribution # by maximum likelihood with the standard deviation fixed at 1 # using the argument fix.arg # followed by nonparametric bootstrap # and calculation of quantiles with 95 percent confidence intervals # f1b <- fitdistcens(d1, \"norm\", start = list(mean = 1),fix.arg = list(sd = 1)) b1b <- bootdistcens(f1b, niter = 51) summary(b1b) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> 2.175510 1.729164 2.788775 plot(b1b) quantile(b1b) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.8527324 1.292663 1.609883 1.880937 2.134284 2.387631 2.658684 #> p=0.8 p=0.9 #> estimate 2.975905 3.415836 #> Median of bootstrap estimates #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> estimate 0.8939584 1.333889 1.651109 1.922163 2.17551 2.428857 2.699911 #> p=0.8 p=0.9 #> estimate 3.017131 3.457062 #> #> two-sided 95 % CI of each quantile #> p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 #> 2.5 % 0.4476124 0.8875427 1.204763 1.475817 1.729164 1.982511 2.253564 #> 97.5 % 1.5072239 1.9471543 2.264375 2.535428 2.788775 3.042123 3.313176 #> p=0.8 p=0.9 #> 2.5 % 2.570785 3.010716 #> 97.5 % 3.630397 4.070327 # (3) comparison of sequential and parallel versions of bootstrap # to be tried with a greater number of iterations (1001 or more) # # \\donttest{ niter <- 1001 data(fluazinam) d1 <-log10(fluazinam) f1 <- fitdistcens(d1, \"norm\") # sequential version ptm <- proc.time() summary(bootdistcens(f1, niter = niter)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.146743 1.5792689 2.877993 #> sd 1.129426 0.6853478 1.709083 proc.time() - ptm #> user system elapsed #> 4.422 0.101 4.414 # parallel version using snow require(parallel) ptm <- proc.time() summary(bootdistcens(f1, niter = niter, parallel = \"snow\", ncpus = 2)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.144793 1.5914352 2.899763 #> sd 1.108123 0.6912424 1.673702 proc.time() - ptm #> user system elapsed #> 0.006 0.004 3.412 # parallel version using multicore (not available on Windows) ptm <- proc.time() summary(bootdistcens(f1, niter = niter, parallel = \"multicore\", ncpus = 2)) #> Nonparametric bootstrap medians and 95% percentile CI #> Median 2.5% 97.5% #> mean 2.163302 1.5524788 2.874380 #> sd 1.119044 0.7072572 1.656059 proc.time() - ptm #> user system elapsed #> 0.008 0.019 2.360 # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":null,"dir":"Reference","previous_headings":"","what":"Danish reinsurance claim dataset — danish","title":"Danish reinsurance claim dataset — danish","text":"univariate dataset collected Copenhagen Reinsurance comprise 2167 fire losses period 1980 1990. adjusted inflation reflect 1985 values expressed millions Danish Krone. multivariate data set data total claim divided building loss, loss contents loss profits.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Danish reinsurance claim dataset — danish","text":"","code":"data(danishuni) data(danishmulti)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Danish reinsurance claim dataset — danish","text":"danishuni contains two columns: Date day claim occurence. Loss total loss amount millions Danish Krone (DKK). danishmulti contains five columns: Date day claim occurence. Building loss amount (mDKK) building coverage. Contents loss amount (mDKK) contents coverage. Profits loss amount (mDKK) profit coverage. Total total loss amount (mDKK). columns numeric except Date columns class Date.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Danish reinsurance claim dataset — danish","text":"Embrechts, P., Kluppelberg, C. Mikosch, T. (1997) Modelling Extremal Events Insurance Finance. Berlin: Springer.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Danish reinsurance claim dataset — danish","text":"Dataset used McNeil (1996), Estimating Tails Loss Severity Distributions using Extreme Value Theory, ASTIN Bull. Davison, . C. (2003) Statistical Models. Cambridge University Press. Page 278.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/danish.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Danish reinsurance claim dataset — danish","text":"","code":"# (1) load of data # data(danishuni) # (2) plot and description of data # plotdist(danishuni$Loss) # (3) load of data # data(danishmulti) # (4) plot and description of data # idx <- sample(1:NROW(danishmulti), 10) barplot(danishmulti$Building[idx], col = \"grey25\", ylim = c(0, max(danishmulti$Total[idx])), main = \"Some claims of danish data set\") barplot(danishmulti$Content[idx], add = TRUE, col = \"grey50\", axes = FALSE) barplot(danishmulti$Profits[idx], add = TRUE, col = \"grey75\", axes = FALSE) legend(\"topleft\", legend = c(\"Building\", \"Content\", \"Profits\"), fill = c(\"grey25\", \"grey50\", \"grey75\"))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":null,"dir":"Reference","previous_headings":"","what":"Datasets for the FAQ — dataFAQ","title":"Datasets for the FAQ — dataFAQ","text":"Datasets used FAQ vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Datasets for the FAQ — dataFAQ","text":"","code":"data(dataFAQlog1) data(dataFAQscale1) data(dataFAQscale2)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Datasets for the FAQ — dataFAQ","text":"dataFAQlog1 dataFAQscale1 dataFAQscale2 vectors numeric data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/dataFAQ.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Datasets for the FAQ — dataFAQ","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Description of an empirical distribution for non-censored data — descdist","title":"Description of an empirical distribution for non-censored data — descdist","text":"Computes descriptive parameters empirical distribution non-censored data provides skewness-kurtosis plot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Description of an empirical distribution for non-censored data — descdist","text":"","code":"descdist(data, discrete = FALSE, boot = NULL, method = \"unbiased\", graph = TRUE, print = TRUE, obs.col = \"red\", obs.pch = 16, boot.col = \"orange\") # S3 method for class 'descdist' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Description of an empirical distribution for non-censored data — descdist","text":"data numeric vector. discrete TRUE, distribution considered discrete. boot NULL, boot values skewness kurtosis plotted bootstrap samples data. boot must fixed case integer 10. method \"unbiased\" unbiased estimated values statistics \"sample\" sample values. graph FALSE, skewness-kurtosis graph plotted. print FALSE, descriptive parameters computed printed. obs.col Color used observed point skewness-kurtosis graph. obs.pch plotting character used observed point skewness-kurtosis graph. boot.col Color used bootstrap sample points skewness-kurtosis graph. x object class \"descdist\". ... arguments passed generic functions","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Description of an empirical distribution for non-censored data — descdist","text":"Minimum, maximum, median, mean, sample sd, sample (method==\"sample\") default unbiased estimations skewness Pearsons's kurtosis values printed (Sokal Rohlf, 1995). skewness-kurtosis plot one proposed Cullen Frey (1999) given empirical distribution. plot, values common distributions also displayed tools help choice distributions fit data. distributions (normal, uniform, logistic, exponential example), one possible value skewness kurtosis (normal distribution example, skewness = 0 kurtosis = 3), distribution thus represented point plot. distributions, areas possible values represented, consisting lines (gamma lognormal distributions example), larger areas (beta distribution example). Weibull distribution represented graph indicated legend shapes close lognormal gamma distributions may obtained distribution. order take account uncertainty estimated values kurtosis skewness data, data set may bootstraped fixing argument boot integer 10. boot values skewness kurtosis corresponding boot bootstrap samples computed reported blue color skewness-kurtosis plot. discrete TRUE, represented distributions Poisson, negative binomial distributions, normal distribution previous discrete distributions may converge. discrete FALSE, uniform, normal, logistic, lognormal, beta gamma distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Description of an empirical distribution for non-censored data — descdist","text":"descdist returns list 7 components, min minimum value max maximum value median median value mean mean value sd standard deviation sample estimated value skewness skewness sample estimated value kurtosis kurtosis sample estimated value method method specified input (\"unbiased\" unbiased estimated values statistics \"sample\" sample values.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Description of an empirical distribution for non-censored data — descdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-159. Evans M, Hastings N Peacock B (2000), Statistical distributions. John Wiley Sons Inc, doi:10.1002/9780470627242 . Sokal RR Rohlf FJ (1995), Biometry. W.H. Freeman Company, USA, pp. 111-115. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Description of an empirical distribution for non-censored data — descdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/descdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Description of an empirical distribution for non-censored data — descdist","text":"","code":"# (1) Description of a sample from a normal distribution # with and without uncertainty on skewness and kurtosis estimated by bootstrap # set.seed(1234) x1 <- rnorm(100) descdist(x1) #> summary statistics #> ------ #> min: -2.345698 max: 2.548991 #> median: -0.384628 #> mean: -0.1567617 #> estimated sd: 1.004405 #> estimated skewness: 0.6052442 #> estimated kurtosis: 3.102441 descdist(x1,boot=11) #> summary statistics #> ------ #> min: -2.345698 max: 2.548991 #> median: -0.384628 #> mean: -0.1567617 #> estimated sd: 1.004405 #> estimated skewness: 0.6052442 #> estimated kurtosis: 3.102441 # (2) Description of a sample from a beta distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # with changing of default colors and plotting character for observed point # descdist(rbeta(100,shape1=0.05,shape2=1),boot=11, obs.col=\"blue\", obs.pch = 15, boot.col=\"darkgreen\") #> summary statistics #> ------ #> min: 3.937372e-36 max: 0.8890347 #> median: 5.660314e-06 #> mean: 0.04094397 #> estimated sd: 0.1281058 #> estimated skewness: 4.368522 #> estimated kurtosis: 25.02241 # (3) Description of a sample from a gamma distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # without plotting # descdist(rgamma(100,shape=2,rate=1),boot=11,graph=FALSE) #> summary statistics #> ------ #> min: 0.0753002 max: 8.631328 #> median: 1.627968 #> mean: 1.989657 #> estimated sd: 1.443636 #> estimated skewness: 1.509842 #> estimated kurtosis: 6.691933 # (4) Description of a sample from a Poisson distribution # with uncertainty on skewness and kurtosis estimated by bootstrap # descdist(rpois(100,lambda=2),discrete=TRUE,boot=11) #> summary statistics #> ------ #> min: 0 max: 6 #> median: 2 #> mean: 1.98 #> estimated sd: 1.377892 #> estimated skewness: 0.5802731 #> estimated kurtosis: 3.037067 # (5) Description of serving size data # with uncertainty on skewness and kurtosis estimated by bootstrap # data(groundbeef) serving <- groundbeef$serving descdist(serving, boot=11) #> summary statistics #> ------ #> min: 10 max: 200 #> median: 79 #> mean: 73.64567 #> estimated sd: 35.88487 #> estimated skewness: 0.7352745 #> estimated kurtosis: 3.551384"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":null,"dir":"Reference","previous_headings":"","what":"Detect bounds for density function — detectbound","title":"Detect bounds for density function — detectbound","text":"Manual detection bounds parameter density function/","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Detect bounds for density function — detectbound","text":"","code":"detectbound(distname, vstart, obs, fix.arg=NULL, echo=FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Detect bounds for density function — detectbound","text":"distname character string \"name\" naming distribution corresponding density function dname must classically defined. vstart named vector giving initial values parameters named distribution. obs numeric vector non censored data. fix.arg optional named vector giving values fixed parameters named distribution. Default NULL. echo logical show traces.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Detect bounds for density function — detectbound","text":"function manually tests following bounds : -1, 0, 1.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Detect bounds for density function — detectbound","text":"detectbound returns 2-row matrix lower bounds first row upper bounds second row.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Detect bounds for density function — detectbound","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Detect bounds for density function — detectbound","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/detectbound.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Detect bounds for density function — detectbound","text":"","code":"# case where the density returns a Not-an-Numeric value. detectbound(\"exp\", c(rate=3), 1:10) #> rate #> lowb 0 #> uppb Inf detectbound(\"binom\", c(size=3, prob=1/2), 1:10) #> size prob #> lowb -Inf 0 #> uppb Inf 1 detectbound(\"nbinom\", c(size=3, prob=1/2), 1:10) #> size prob mu #> lowb 0 0 -Inf #> uppb Inf 1 Inf"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":null,"dir":"Reference","previous_headings":"","what":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"Summary 48- 96-hour acute toxicity values (LC50 EC50 values) exposure Australian Non-Australian taxa endosulfan.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"","code":"data(endosulfan)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"endosulfan data frame 4 columns, named ATV Acute Toxicity Value (geometric mean LC50 ou EC50 values micrograms per liter), Australian (coding Australian another origin), group (arthropods, fish non-arthropod invertebrates) taxa.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"Hose, G.C., Van den Brink, P.J. 2004. Confirming Species-Sensitivity Distribution Concept Endosulfan Using Laboratory, Mesocosms, Field Data. Archives Environmental Contamination Toxicology, 47, 511-520.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/endosulfan.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species Sensitivity Distribution (SSD) for endosulfan — endosulfan","text":"","code":"# (1) load of data # data(endosulfan) # (2) plot and description of data for non Australian fish in decimal logarithm # log10ATV <-log10(subset(endosulfan,(Australian == \"no\") & (group == \"Fish\"))$ATV) plotdist(log10ATV) descdist(log10ATV,boot=11) #> summary statistics #> ------ #> min: -0.69897 max: 3.60206 #> median: 0.4911356 #> mean: 0.5657595 #> estimated sd: 0.7034928 #> estimated skewness: 1.764601 #> estimated kurtosis: 9.759505 # (3) fit of a normal and a logistic distribution to data in log10 # (classical distributions used for SSD) # and visual comparison of the fits # fln <- fitdist(log10ATV,\"norm\") summary(fln) #> Fitting of the distribution ' norm ' by maximum likelihood #> Parameters : #> estimate Std. Error #> mean 0.5657595 0.10259072 #> sd 0.6958041 0.07254192 #> Loglikelihood: -48.58757 AIC: 101.1751 BIC: 104.8324 #> Correlation matrix: #> mean sd #> mean 1 0 #> sd 0 1 #> fll <- fitdist(log10ATV,\"logis\") summary(fll) #> Fitting of the distribution ' logis ' by maximum likelihood #> Parameters : #> estimate Std. Error #> location 0.5082818 0.08701594 #> scale 0.3457256 0.04301025 #> Loglikelihood: -44.31825 AIC: 92.6365 BIC: 96.29378 #> Correlation matrix: #> location scale #> location 1.00000000 0.04028287 #> scale 0.04028287 1.00000000 #> cdfcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\"), xlab=\"log10ATV\") denscomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\"), xlab=\"log10ATV\") qqcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\")) ppcomp(list(fln,fll),legendtext=c(\"normal\",\"logistic\")) gofstat(list(fln,fll), fitnames = c(\"lognormal\", \"loglogistic\")) #> Goodness-of-fit statistics #> lognormal loglogistic #> Kolmogorov-Smirnov statistic 0.1267649 0.08457997 #> Cramer-von Mises statistic 0.1555576 0.04058514 #> Anderson-Darling statistic 1.0408045 0.37407465 #> #> Goodness-of-fit criteria #> lognormal loglogistic #> Akaike's Information Criterion 101.1751 92.63650 #> Bayesian Information Criterion 104.8324 96.29378 # (4) estimation of the 5 percent quantile value of # logistic fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # parametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(ATV) bll <- bootdist(fll,niter=51) HC5ll <- quantile(bll,probs = 0.05) # in ATV 10^(HC5ll$quantiles) #> p=0.05 #> estimate 0.309253 10^(HC5ll$quantCI) #> p=0.05 #> 2.5 % 0.1891451 #> 97.5 % 0.5457214 # (5) estimation of the 5 percent quantile value of # the fitted logistic distribution (5 percent hazardous concentration : HC5) # with its one-sided 95 percent confidence interval (type \"greater\") # calculated by # nonparametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(ATV) bllnonpar <- bootdist(fll,niter=51,bootmethod = \"nonparam\") HC5llgreater <- quantile(bllnonpar,probs = 0.05, CI.type=\"greater\") # in ATV 10^(HC5llgreater$quantiles) #> p=0.05 #> estimate 0.309253 10^(HC5llgreater$quantCI) #> p=0.05 #> 5 % 0.1860103 # (6) fit of a logistic distribution # by minimizing the modified Anderson-Darling AD2L distance # cf. ?mgedist for definition of this distance # fllAD2L <- fitdist(log10ATV,\"logis\",method=\"mge\",gof=\"AD2L\") summary(fllAD2L) #> Fitting of the distribution ' logis ' by maximum goodness-of-fit #> Parameters : #> estimate #> location 0.4965288 #> scale 0.3013154 #> Loglikelihood: -44.96884 AIC: 93.93767 BIC: 97.59496 plot(fllAD2L)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Fit of univariate distributions to non-censored data — fitdist","title":"Fit of univariate distributions to non-censored data — fitdist","text":"Fit univariate distributions non-censored data maximum likelihood (mle), moment matching (mme), quantile matching (qme) maximizing goodness--fit estimation (mge). latter also known minimizing distance estimation. Generic methods print, plot, summary, quantile, logLik, AIC, BIC, vcov coef.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fit of univariate distributions to non-censored data — fitdist","text":"","code":"fitdist(data, distr, method = c(\"mle\", \"mme\", \"qme\", \"mge\", \"mse\"), start=NULL, fix.arg=NULL, discrete, keepdata = TRUE, keepdata.nb=100, calcvcov=TRUE, ...) # S3 method for class 'fitdist' print(x, ...) # S3 method for class 'fitdist' plot(x, breaks=\"default\", ...) # S3 method for class 'fitdist' summary(object, ...) # S3 method for class 'fitdist' logLik(object, ...) # S3 method for class 'fitdist' AIC(object, ..., k = 2) # S3 method for class 'fitdist' BIC(object, ...) # S3 method for class 'fitdist' vcov(object, ...) # S3 method for class 'fitdist' coef(object, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fit of univariate distributions to non-censored data — fitdist","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. method character string coding fitting method: \"mle\" 'maximum likelihood estimation', \"mme\" 'moment matching estimation', \"qme\" 'quantile matching estimation', \"mge\" 'maximum goodness--fit estimation' \"mse\" 'maximum spacing estimation'. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). may account closed-form formulas. fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. use argument possible method=\"mme\" closed-form formula used. keepdata logical. TRUE, dataset returned, otherwise sample subset returned. keepdata.nb keepdata=FALSE, length (>1) subset returned. calcvcov logical indicating (asymptotic) covariance matrix required. discrete TRUE, distribution considered discrete. discrete missing, \t discrete automaticaly set TRUE distr belongs \t \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\" FALSE cases. thus recommended enter argument using another discrete distribution. argument directly affect results fit passed functions gofstat, plotdist cdfcomp. x object class \"fitdist\". object object class \"fitdist\". breaks \"default\" histogram plotted function hist default breaks definition. Else breaks passed function hist. argument taken account discrete distributions: \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\". k penalty per parameter passed AIC generic function (2 default). ... arguments passed generic functions, one functions \"mledist\", \"mmedist\", \"qmedist\" \"mgedist\" depending chosen method. See mledist, mmedist, qmedist, mgedist details parameter estimation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fit of univariate distributions to non-censored data — fitdist","text":"assumed distr argument specifies distribution probability density function, cumulative distribution function quantile function (d, p, q). four possible fitting methods described : method=\"mle\" Maximum likelihood estimation consists maximizing log-likelihood. numerical optimization carried mledist via optim find best values (see mledist details). method=\"mme\" Moment matching estimation consists equalizing theoretical empirical moments. Estimated values distribution parameters computed closed-form formula following distributions : \"norm\", \"lnorm\", \"pois\", \"exp\", \"gamma\", \"nbinom\", \"geom\", \"beta\", \"unif\" \"logis\". Otherwise theoretical empirical moments matched numerically, minimization sum squared differences observed theoretical moments. last case, arguments needed call fitdist: order memp (see mmedist details). Since Version 1.2-0, mmedist automatically computes asymptotic covariance matrix, hence theoretical moments mdist defined order equals twice maximal order given order. method = \"qme\" Quantile matching estimation consists equalizing theoretical empirical quantile. numerical optimization carried qmedist via optim minimize sum squared differences observed theoretical quantiles. use method requires additional argument probs, defined numeric vector probabilities quantile(s) () matched (see qmedist details). method = \"mge\" Maximum goodness--fit estimation consists maximizing goodness--fit statistics. numerical optimization carried mgedist via optim minimize goodness--fit distance. use method requires additional argument gof coding goodness--fit distance chosen. One can use classical Cramer-von Mises distance (\"CvM\"), classical Kolmogorov-Smirnov distance (\"KS\"), classical Anderson-Darling distance (\"AD\") gives weight tails distribution, one variants last distance proposed Luceno (2006) (see mgedist details). method suitable discrete distributions. method = \"mse\" Maximum goodness--fit estimation consists maximizing average log spacing. numerical optimization carried msedist via optim. default, direct optimization log-likelihood (criteria depending chosen method) performed using optim, \"Nelder-Mead\" method distributions characterized one parameter \"BFGS\" method distributions characterized one parameter. optimization algorithm used optim can chosen another optimization function can specified using ... argument (see mledist details). start may omitted (.e. NULL) classic distributions (see 'details' section mledist). Note errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1) ... argument. parameter(s) () estimated, fitdist computes log-likelihood every estimation method maximum likelihood estimation standard errors estimates calculated Hessian solution found optim user-supplied function passed mledist. default (keepdata = TRUE), object returned fitdist contains data vector given input. dealing large datasets, can remove original dataset output setting keepdata = FALSE. case, keepdata.nb points () kept random subsampling keepdata.nb-2 points dataset adding minimum maximum. combined bootdist, use non-parametric bootstrap aware bootstrap performed subset randomly selected fitdist. Currently, graphical comparisons multiple fits available framework. Weighted version estimation process available method = \"mle\", \"mme\", \"qme\" using weights=.... See corresponding man page details. Weighted maximum GOF estimation (method = \"mge\") allowed. yet possible take account weighths functions plotdist, plot.fitdist, cdfcomp, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). parameter(s) () estimated, gofstat allows compute goodness--fit statistics. NB: data values particularly small large, scaling may needed optimization process. See example (14) man page examples (14,15) test file package. Please also take look Rmpfr package available CRAN numerical accuracy issues.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fit of univariate distributions to non-censored data — fitdist","text":"fitdist returns object class \"fitdist\", list following components: estimate parameter estimates. method character string coding fitting method : \"mle\" 'maximum likelihood estimation', \"mme\" 'matching moment estimation', \"qme\" 'matching quantile estimation' \"mge\" 'maximum goodness--fit estimation' \"mse\" 'maximum spacing estimation'. sd estimated standard errors, NA numerically computable NULL available. cor estimated correlation matrix, NA numerically computable NULL available. vcov estimated variance-covariance matrix, NULL available estimation method considered. loglik log-likelihood. aic Akaike information criterion. bic -called BIC SBC (Schwarz Bayesian criterion). n length data set. data data set. distname name distribution. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. dots list arguments passed ... used bootdist iterative calls mledist, mmedist, qmedist, mgedist NULL arguments. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. discrete input argument automatic definition function passed functions gofstat, plotdist cdfcomp. weights vector weigths used estimation process NULL. Generic functions: print print \"fitdist\" object shows traces fitting method fitted distribution. summary summary provides parameter estimates fitted distribution, log-likelihood, AIC BIC statistics maximum likelihood used, standard errors parameter estimates correlation matrix parameter estimates. plot plot object class \"fitdist\" returned fitdist uses function plotdist. object class \"fitdist\" list objects class \"fitdist\" corresponding various fits using data set may also plotted using cdf plot (function cdfcomp), density plot(function denscomp), density Q-Q plot (function qqcomp), P-P plot (function ppcomp). logLik Extracts estimated log-likelihood \"fitdist\" object. AIC Extracts AIC \"fitdist\" object. BIC Extracts estimated BIC \"fitdist\" object. vcov Extracts estimated var-covariance matrix \"fitdist\" object (available method = \"mle\"). coef Extracts fitted coefficients \"fitdist\" object.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fit of univariate distributions to non-censored data — fitdist","text":". Ibragimov R. 'minskii (1981), Statistical Estimation - Asymptotic Theory, Springer-Verlag, doi:10.1007/978-1-4899-0027-2 Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Fit of univariate distributions to non-censored data — fitdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fit of univariate distributions to non-censored data — fitdist","text":"","code":"# (1) fit of a gamma distribution by maximum likelihood estimation # data(groundbeef) serving <- groundbeef$serving fitg <- fitdist(serving, \"gamma\") summary(fitg) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> plot(fitg) plot(fitg, demp = TRUE) plot(fitg, histo = FALSE, demp = TRUE) cdfcomp(fitg, addlegend=FALSE) denscomp(fitg, addlegend=FALSE) ppcomp(fitg, addlegend=FALSE) qqcomp(fitg, addlegend=FALSE) # (2) use the moment matching estimation (using a closed formula) # fitgmme <- fitdist(serving, \"gamma\", method=\"mme\") summary(fitgmme) #> Fitting of the distribution ' gamma ' by matching moments #> Parameters : #> estimate Std. Error #> shape 4.22848617 6.64959843 #> rate 0.05741663 0.09451052 #> Loglikelihood: -1253.825 AIC: 2511.65 BIC: 2518.724 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9553622 #> rate 0.9553622 1.0000000 #> # (3) Comparison of various fits # fitW <- fitdist(serving, \"weibull\") fitg <- fitdist(serving, \"gamma\") fitln <- fitdist(serving, \"lnorm\") summary(fitW) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> summary(fitg) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> summary(fitln) #> Fitting of the distribution ' lnorm ' by maximum likelihood #> Parameters : #> estimate Std. Error #> meanlog 4.1693701 0.03366988 #> sdlog 0.5366095 0.02380783 #> Loglikelihood: -1261.319 AIC: 2526.639 BIC: 2533.713 #> Correlation matrix: #> meanlog sdlog #> meanlog 1 0 #> sdlog 0 1 #> cdfcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) denscomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) qqcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) ppcomp(list(fitW, fitg, fitln), legendtext=c(\"Weibull\", \"gamma\", \"lognormal\")) gofstat(list(fitW, fitg, fitln), fitnames=c(\"Weibull\", \"gamma\", \"lognormal\")) #> Goodness-of-fit statistics #> Weibull gamma lognormal #> Kolmogorov-Smirnov statistic 0.1396646 0.1281486 0.1493090 #> Cramer-von Mises statistic 0.6840994 0.6936274 0.8277358 #> Anderson-Darling statistic 3.5736460 3.5672625 4.5436542 #> #> Goodness-of-fit criteria #> Weibull gamma lognormal #> Akaike's Information Criterion 2514.449 2511.250 2526.639 #> Bayesian Information Criterion 2521.524 2518.325 2533.713 # (4) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view # dedicated to probability distributions # dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q, a, b) exp(-exp((a-q)/b)) qgumbel <- function(p, a, b) a-b*log(-log(p)) fitgumbel <- fitdist(serving, \"gumbel\", start=list(a=10, b=10)) #> Error in fitdist(serving, \"gumbel\", start = list(a = 10, b = 10)): The dgumbel function must be defined summary(fitgumbel) #> Error in eval(expr, envir, enclos): object 'fitgumbel' not found plot(fitgumbel) #> Error in eval(expr, envir, enclos): object 'fitgumbel' not found # (5) fit discrete distributions (Poisson and negative binomial) # data(toxocara) number <- toxocara$number fitp <- fitdist(number,\"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) fitnb <- fitdist(number,\"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb) cdfcomp(list(fitp,fitnb)) gofstat(list(fitp,fitnb)) #> Chi-squared statistic: 31256.96 7.48606 #> Degree of freedom of the Chi-squared distribution: 5 4 #> Chi-squared p-value: 0 0.1123255 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo 1-mle-pois theo 2-mle-nbinom #> <= 0 14 0.009014207 15.295027 #> <= 1 8 0.078236512 5.808596 #> <= 3 6 1.321767215 6.845015 #> <= 4 6 2.131297776 2.407815 #> <= 9 6 29.827829221 7.835196 #> <= 21 6 19.626223732 8.271110 #> > 21 7 0.005631339 6.537242 #> #> Goodness-of-fit criteria #> 1-mle-pois 2-mle-nbinom #> Akaike's Information Criterion 1017.067 322.6882 #> Bayesian Information Criterion 1019.037 326.6288 # (6) how to change the optimisation method? # data(groundbeef) serving <- groundbeef$serving fitdist(serving, \"gamma\", optim.method=\"Nelder-Mead\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 fitdist(serving, \"gamma\", optim.method=\"BFGS\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.21183435 0.359345536 #> rate 0.05719546 0.005181142 fitdist(serving, \"gamma\", optim.method=\"SANN\") #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.02694892 0.342992129 #> rate 0.05478925 0.004968512 # (7) custom optimization function # # \\donttest{ #create the sample set.seed(1234) mysample <- rexp(100, 5) mystart <- list(rate=8) res1 <- fitdist(mysample, dexp, start= mystart, optim.method=\"Nelder-Mead\") #show the result summary(res1) #> Fitting of the distribution ' exp ' by maximum likelihood #> Parameters : #> estimate Std. Error #> rate 5.120312 0.5120312 #> Loglikelihood: 63.32596 AIC: -124.6519 BIC: -122.0467 #the warning tell us to use optimise, because the Nelder-Mead is not adequate. #to meet the standard 'fn' argument and specific name arguments, we wrap optimize, myoptimize <- function(fn, par, ...) { res <- optimize(f=fn, ..., maximum=FALSE) #assume the optimization function minimize standardres <- c(res, convergence=0, value=res$objective, par=res$minimum, hessian=NA) return(standardres) } #call fitdist with a 'custom' optimization function res2 <- fitdist(mysample, \"exp\", start=mystart, custom.optim=myoptimize, interval=c(0, 100)) #show the result summary(res2) #> Fitting of the distribution ' exp ' by maximum likelihood #> Parameters : #> estimate #> rate 5.120531 #> Loglikelihood: 63.32596 AIC: -124.6519 BIC: -122.0467 # } # (8) custom optimization function - another example with the genetic algorithm # # \\donttest{ #set a sample fit1 <- fitdist(serving, \"gamma\") summary(fit1) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384578 #> rate 0.9384578 1.0000000 #> #wrap genoud function rgenoud package mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values=par, ...) standardres <- c(res, convergence=0) return(standardres) } #call fitdist with a 'custom' optimization function fit2 <- fitdist(serving, \"gamma\", custom.optim=mygenoud, nvars=2, Domains=cbind(c(0, 0), c(10, 10)), boundary.enforcement=1, print.level=1, hessian=TRUE) #> Loading required package: rgenoud #> ## rgenoud (Version 5.9-0.10, Build Date: 2023-12-13) #> ## See http://sekhon.berkeley.edu/rgenoud for additional documentation. #> ## Please cite software as: #> ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011. #> ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.'' #> ## Journal of Statistical Software, 42(11): 1-26. #> ## #> #> #> Wed Aug 28 14:28:45 2024 #> Domains: #> 0.000000e+00 <= X1 <= 1.000000e+01 #> 0.000000e+00 <= X2 <= 1.000000e+01 #> #> Data Type: Floating Point #> Operators (code number, name, population) #> \t(1) Cloning........................... \t122 #> \t(2) Uniform Mutation.................. \t125 #> \t(3) Boundary Mutation................. \t125 #> \t(4) Non-Uniform Mutation.............. \t125 #> \t(5) Polytope Crossover................ \t125 #> \t(6) Simple Crossover.................. \t126 #> \t(7) Whole Non-Uniform Mutation........ \t125 #> \t(8) Heuristic Crossover............... \t126 #> \t(9) Local-Minimum Crossover........... \t0 #> #> HARD Maximum Number of Generations: 100 #> Maximum Nonchanging Generations: 10 #> Population size : 1000 #> Convergence Tolerance: 1.000000e-03 #> #> Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation. #> Checking Gradients before Stopping. #> Not Using Out of Bounds Individuals But Allowing Trespassing. #> #> Minimization Problem. #> #> #> Generation#\t Solution Value #> #> 0 \t1.253796e+03 #> 1 \t1.253625e+03 #> #> 'wait.generations' limit reached. #> No significant improvement in 10 generations. #> #> Solution Fitness Value: 1.253625e+03 #> #> Parameters at the Solution (parameter, gradient): #> #> X[ 1] :\t4.008339e+00\tG[ 1] :\t-2.378827e-07 #> X[ 2] :\t5.442736e-02\tG[ 2] :\t6.872187e-05 #> #> Solution Found Generation 1 #> Number of Generations Run 12 #> #> Wed Aug 28 14:28:46 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit2) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 4.00833912 0.341343848 #> rate 0.05442736 0.004936215 #> Loglikelihood: -1253.625 AIC: 2511.25 BIC: 2518.325 #> Correlation matrix: #> shape rate #> shape 1.0000000 0.9384395 #> rate 0.9384395 1.0000000 #> # } # (9) estimation of the standard deviation of a gamma distribution # by maximum likelihood with the shape fixed at 4 using the argument fix.arg # data(groundbeef) serving <- groundbeef$serving f1c <- fitdist(serving,\"gamma\",start=list(rate=0.1),fix.arg=list(shape=4)) summary(f1c) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters : #> estimate Std. Error #> rate 0.05431772 0.001703521 #> Fixed parameters: #> value #> shape 4 #> Loglikelihood: -1253.625 AIC: 2509.251 BIC: 2512.788 plot(f1c) # (10) fit of a Weibull distribution to serving size data # by maximum likelihood estimation # or by quantile matching estimation (in this example # matching first and third quartiles) # data(groundbeef) serving <- groundbeef$serving fWmle <- fitdist(serving, \"weibull\") summary(fWmle) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> plot(fWmle) gofstat(fWmle) #> Goodness-of-fit statistics #> 1-mle-weibull #> Kolmogorov-Smirnov statistic 0.1396646 #> Cramer-von Mises statistic 0.6840994 #> Anderson-Darling statistic 3.5736460 #> #> Goodness-of-fit criteria #> 1-mle-weibull #> Akaike's Information Criterion 2514.449 #> Bayesian Information Criterion 2521.524 fWqme <- fitdist(serving, \"weibull\", method=\"qme\", probs=c(0.25, 0.75)) summary(fWqme) #> Fitting of the distribution ' weibull ' by matching quantiles #> Parameters : #> estimate #> shape 2.268699 #> scale 86.590853 #> Loglikelihood: -1256.129 AIC: 2516.258 BIC: 2523.332 plot(fWqme) gofstat(fWqme) #> Goodness-of-fit statistics #> 1-qme-weibull #> Kolmogorov-Smirnov statistic 0.1692858 #> Cramer-von Mises statistic 0.9664709 #> Anderson-Darling statistic 4.8479858 #> #> Goodness-of-fit criteria #> 1-qme-weibull #> Akaike's Information Criterion 2516.258 #> Bayesian Information Criterion 2523.332 # (11) Fit of a Pareto distribution by numerical moment matching estimation # # \\donttest{ require(actuar) #> Loading required package: actuar #> #> Attaching package: ‘actuar’ #> The following objects are masked from ‘package:stats’: #> #> sd, var #> The following object is masked from ‘package:grDevices’: #> #> cm #simulate a sample x4 <- rpareto(1000, 6, 2) #empirical raw moment memp <- function(x, order) mean(x^order) #fit fP <- fitdist(x4, \"pareto\", method=\"mme\", order=c(1, 2), memp=\"memp\", start=list(shape=10, scale=10), lower=1, upper=Inf) #> Error in mmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, checkstartfix = TRUE, calcvcov = calcvcov, ...): the empirical moment must be defined as a function summary(fP) #> Error in eval(expr, envir, enclos): object 'fP' not found plot(fP) #> Error in eval(expr, envir, enclos): object 'fP' not found # } # (12) Fit of a Weibull distribution to serving size data by maximum # goodness-of-fit estimation using all the distances available # # \\donttest{ data(groundbeef) serving <- groundbeef$serving (f1 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"CvM\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.093204 #> scale 82.660014 (f2 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"KS\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.065634 #> scale 81.450487 (f3 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.125473 #> scale 82.890260 (f4 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"ADR\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.072087 #> scale 82.761868 (f5 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"ADL\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.197498 #> scale 82.016005 (f6 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2R\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 1.90328 #> scale 81.33464 (f7 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2L\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.483836 #> scale 78.252113 (f8 <- fitdist(serving, \"weibull\", method=\"mge\", gof=\"AD2\")) #> Fitting of the distribution ' weibull ' by maximum goodness-of-fit #> Parameters: #> estimate #> shape 2.081168 #> scale 85.281194 cdfcomp(list(f1, f2, f3, f4, f5, f6, f7, f8)) cdfcomp(list(f1, f2, f3, f4, f5, f6, f7, f8), xlogscale=TRUE, xlim=c(8, 250), verticals=TRUE) denscomp(list(f1, f2, f3, f4, f5, f6, f7, f8)) # } # (13) Fit of a uniform distribution using maximum likelihood # (a closed formula is used in this special case where the loglikelihood is not defined), # or maximum goodness-of-fit with Cramer-von Mises or Kolmogorov-Smirnov distance # set.seed(1234) u <- runif(50, min=5, max=10) fumle <- fitdist(u, \"unif\", method=\"mle\") summary(fumle) #> Fitting of the distribution ' unif ' by maximum likelihood #> Parameters : #> estimate #> min 5.047479 #> max 9.960752 #> Loglikelihood: -79.59702 AIC: 163.194 BIC: 167.0181 plot(fumle) gofstat(fumle) #> Goodness-of-fit statistics #> 1-mle-unif #> Kolmogorov-Smirnov statistic 0.1340723 #> Cramer-von Mises statistic 0.1566892 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mle-unif #> Akaike's Information Criterion 163.1940 #> Bayesian Information Criterion 167.0181 fuCvM <- fitdist(u, \"unif\", method=\"mge\", gof=\"CvM\") summary(fuCvM) #> Fitting of the distribution ' unif ' by maximum goodness-of-fit #> Parameters : #> estimate #> min 5.110497 #> max 9.552878 #> Loglikelihood: -Inf AIC: Inf BIC: Inf plot(fuCvM) gofstat(fuCvM) #> Goodness-of-fit statistics #> 1-mge-unif #> Kolmogorov-Smirnov statistic 0.11370966 #> Cramer-von Mises statistic 0.07791651 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mge-unif #> Akaike's Information Criterion Inf #> Bayesian Information Criterion Inf fuKS <- fitdist(u, \"unif\", method=\"mge\", gof=\"KS\") summary(fuKS) #> Fitting of the distribution ' unif ' by maximum goodness-of-fit #> Parameters : #> estimate #> min 5.092357 #> max 9.323818 #> Loglikelihood: -Inf AIC: Inf BIC: Inf plot(fuKS) gofstat(fuKS) #> Goodness-of-fit statistics #> 1-mge-unif #> Kolmogorov-Smirnov statistic 0.09216159 #> Cramer-von Mises statistic 0.12241830 #> Anderson-Darling statistic Inf #> #> Goodness-of-fit criteria #> 1-mge-unif #> Akaike's Information Criterion Inf #> Bayesian Information Criterion Inf # (14) scaling problem # the simulated dataset (below) has particularly small values, hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 0:6) cat(i, try(fitdist(x2*10^i, \"cauchy\", method=\"mle\")$estimate, silent=TRUE), \"\\n\") #> #> 0 Error in fitdist(x2 * 10^i, \"cauchy\", method = \"mle\") : #> the function mle failed to estimate the parameters, #> with the error code 100 #> #> #> #> 1 Error in fitdist(x2 * 10^i, \"cauchy\", method = \"mle\") : #> the function mle failed to estimate the parameters, #> with the error code 100 #> #> #> 2 0.001870693 0.01100646 #> 3 0.01871473 0.1100713 #> 4 0.1870693 1.100646 #> 5 1.876032 11.0131 #> 6 18.76032 110.131 # (15) Fit of a normal distribution on acute toxicity values of endosulfan in log10 for # nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution (which is called the 5 percent hazardous concentration, HC5, # in ecotoxicology) and estimation of other quantiles. # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") quantile(fln, probs = 0.05) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 #> estimate 1.744227 quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 # (16) Fit of a triangular distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # # \\donttest{ set.seed(1234) require(mc2d) #> Loading required package: mc2d #> Loading required package: mvtnorm #> #> Attaching package: ‘mc2d’ #> The following objects are masked from ‘package:base’: #> #> pmax, pmin t <- rtriang(100, min=5, mode=6, max=10) fCvM <- fitdist(t, \"triang\", method=\"mge\", start = list(min=4, mode=6,max=9), gof=\"CvM\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. fKS <- fitdist(t, \"triang\", method=\"mge\", start = list(min=4, mode=6,max=9), gof=\"KS\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. cdfcomp(list(fCvM,fKS)) # } # (17) fit a non classical discrete distribution (the zero inflated Poisson distribution) # # \\donttest{ require(gamlss.dist) #> Loading required package: gamlss.dist set.seed(1234) x <- rZIP(n = 30, mu = 5, sigma = 0.2) plotdist(x, discrete = TRUE) fitzip <- fitdist(x, \"ZIP\", start = list(mu = 4, sigma = 0.15), discrete = TRUE, optim.method = \"L-BFGS-B\", lower = c(0, 0), upper = c(Inf, 1)) #> Warning: The dZIP function should return a zero-length vector when input has length zero #> Warning: The pZIP function should return a zero-length vector when input has length zero summary(fitzip) #> Fitting of the distribution ' ZIP ' by maximum likelihood #> Parameters : #> estimate Std. Error #> mu 4.3166139 0.4341218 #> sigma 0.1891806 0.0741692 #> Loglikelihood: -67.13886 AIC: 138.2777 BIC: 141.0801 #> Correlation matrix: #> mu sigma #> mu 1.00000000 0.06418863 #> sigma 0.06418863 1.00000000 #> plot(fitzip) fitp <- fitdist(x, \"pois\") cdfcomp(list(fitzip, fitp)) gofstat(list(fitzip, fitp)) #> Chi-squared statistic: 3.579721 35.91516 #> Degree of freedom of the Chi-squared distribution: 3 4 #> Chi-squared p-value: 0.3105689 3.012341e-07 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo 1-mle-ZIP theo 2-mle-pois #> <= 0 6 6.000030 0.9059215 #> <= 2 7 4.425490 8.7194943 #> <= 4 5 9.047502 12.1379326 #> <= 5 5 4.054138 3.9650580 #> <= 7 5 4.715296 3.4694258 #> > 7 2 1.757544 0.8021677 #> #> Goodness-of-fit criteria #> 1-mle-ZIP 2-mle-pois #> Akaike's Information Criterion 138.2777 153.7397 #> Bayesian Information Criterion 141.0801 155.1409 # } # (18) examples with distributions in actuar (predefined starting values) # # \\donttest{ require(actuar) x <- c(2.3,0.1,2.7,2.2,0.4,2.6,0.2,1.,7.3,3.2,0.8,1.2,33.7,14., 21.4,7.7,1.,1.9,0.7,12.6,3.2,7.3,4.9,4000.,2.5,6.7,3.,63., 6.,1.6,10.1,1.2,1.5,1.2,30.,3.2,3.5,1.2,0.2,1.9,0.7,17., 2.8,4.8,1.3,3.7,0.2,1.8,2.6,5.9,2.6,6.3,1.4,0.8) #log logistic ft_llogis <- fitdist(x,'llogis') x <- c(0.3837053, 0.8576858, 0.3552237, 0.6226119, 0.4783756, 0.3139799, 0.4051403, 0.4537631, 0.4711057, 0.5647414, 0.6479617, 0.7134207, 0.5259464, 0.5949068, 0.3509200, 0.3783077, 0.5226465, 1.0241043, 0.4384580, 1.3341520) #inverse weibull ft_iw <- fitdist(x,'invweibull') # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Fitting of univariate distributions to censored data — fitdistcens","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Fits univariate distribution censored data maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"","code":"fitdistcens(censdata, distr, start=NULL, fix.arg=NULL, keepdata = TRUE, keepdata.nb=100, calcvcov=TRUE, ...) # S3 method for class 'fitdistcens' print(x, ...) # S3 method for class 'fitdistcens' plot(x, ...) # S3 method for class 'fitdistcens' summary(object, ...) # S3 method for class 'fitdistcens' logLik(object, ...) # S3 method for class 'fitdistcens' AIC(object, ..., k = 2) # S3 method for class 'fitdistcens' BIC(object, ...) # S3 method for class 'fitdistcens' vcov(object, ...) # S3 method for class 'fitdistcens' coef(object, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"censdata dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution, corresponding density function dname corresponding distribution function pname must defined, directly density function. start named list giving initial values parameters named distribution. argument may omitted distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood. x object class \"fitdistcens\". object object class \"fitdistcens\". keepdata logical. TRUE, dataset returned, otherwise sample subset returned. keepdata.nb keepdata=FALSE, length subset returned. calcvcov logical indicating (asymptotic) covariance matrix required. k penalty per parameter passed AIC generic function (2 default). ... arguments passed generic functions, function plotdistcens order control type ecdf-plot used censored data, function mledist order control optimization method.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Maximum likelihood estimations distribution parameters computed using function mledist. default direct optimization log-likelihood performed using optim, \"Nelder-Mead\" method distributions characterized one parameter \"BFGS\" method distributions characterized one parameter. algorithm used optim can chosen another optimization function can specified using ... argument (see mledist details). start may omitted (.e. NULL) classic distributions (see 'details' section mledist). Note errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1) ... argument. function able fit uniform distribution. parameter estimates, function returns log-likelihood standard errors estimates calculated Hessian solution found optim user-supplied function passed mledist. default (keepdata = TRUE), object returned fitdist contains data vector given input. dealing large datasets, can remove original dataset output setting keepdata = FALSE. case, keepdata.nb points () kept random subsampling keepdata.nb-4 points dataset adding component-wise minimum maximum. combined bootdistcens, aware bootstrap performed subset randomly selected fitdistcens. Currently, graphical comparisons multiple fits available framework. Weighted version estimation process available method = \"mle\" using weights=.... See corresponding man page details. yet possible take account weighths functions plotdistcens, plot.fitdistcens cdfcompcens (developments planned future). parameter(s) () estimated, gofstat allows compute goodness--fit statistics.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"fitdistcens returns object class \"fitdistcens\", list following components: estimate parameter estimates. method character string coding fitting method : \"mle\" 'maximum likelihood estimation'. sd estimated standard errors. cor estimated correlation matrix, NA numerically computable NULL available. vcov estimated variance-covariance matrix, NULL available. loglik log-likelihood. aic Akaike information criterion. bic -called BIC SBC (Schwarz Bayesian criterion). censdata censored data set. distname name distribution. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. dots list arguments passed ... used bootdistcens control optimization method used iterative calls mledist NULL arguments. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. discrete always FALSE. weights vector weigths used estimation process NULL. Generic functions: print print \"fitdist\" object shows traces fitting method fitted distribution. summary summary provides parameter estimates fitted distribution, log-likelihood, AIC BIC statistics, standard errors parameter estimates correlation matrix parameter estimates. plot plot object class \"fitdistcens\" returned fitdistcens uses function plotdistcens. logLik Extracts estimated log-likelihood \"fitdistcens\" object. AIC Extracts AIC \"fitdistcens\" object. BIC Extracts BIC \"fitdistcens\" object. vcov Extracts estimated var-covariance matrix \"fitdistcens\" object (available method = \"mle\"). coef Extracts fitted coefficients \"fitdistcens\" object.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fitting of univariate distributions to censored data — fitdistcens","text":"","code":"# (1) Fit of a lognormal distribution to bacterial contamination data # data(smokedfish) fitsf <- fitdistcens(smokedfish,\"lnorm\") summary(fitsf) #> Fitting of the distribution ' lnorm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> meanlog -3.627606 0.4637122 #> sdlog 3.544570 0.4876610 #> Loglikelihood: -90.65154 AIC: 185.3031 BIC: 190.5725 #> Correlation matrix: #> meanlog sdlog #> meanlog 1.0000000 -0.4325873 #> sdlog -0.4325873 1.0000000 #> # default plot using the Wang technique (see ?plotdiscens for details) plot(fitsf) # plot using the Turnbull algorithm (see ?plotdiscens for details) # with confidence intervals for the empirical distribution plot(fitsf, NPMLE = TRUE, NPMLE.method = \"Turnbull\", Turnbull.confint = TRUE) #> Warning: Turnbull is now a deprecated option for NPMLE.method. You should use Turnbull.middlepoints #> of Turnbull.intervals. It was here fixed as Turnbull.middlepoints, equivalent to former Turnbull. #> Warning: Q-Q plot and P-P plot are available only #> with the arguments NPMLE.method at Wang (default value) or Turnbull.intervals. # basic plot using intervals and points (see ?plotdiscens for details) plot(fitsf, NPMLE = FALSE) #> Warning: When NPMLE is FALSE the nonparametric maximum likelihood estimation #> of the cumulative distribution function is not computed. #> Q-Q plot and P-P plot are available only with the arguments NPMLE.method at Wang #> (default value) or Turnbull.intervals. # plot of the same fit using the Turnbull algorithm in logscale cdfcompcens(fitsf,main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", addlegend = FALSE,lines01 = TRUE, xlogscale = TRUE, xlim = c(1e-2,1e2)) # zoom on large values of F cdfcompcens(fitsf,main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", addlegend = FALSE,lines01 = TRUE, xlogscale = TRUE, xlim = c(1e-2,1e2),ylim=c(0.4,1)) # (2) Fit of a normal distribution on acute toxicity values # of fluazinam (in decimal logarithm) for # macroinvertebrates and zooplancton, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology # data(fluazinam) log10EC50 <-log10(fluazinam) fln <- fitdistcens(log10EC50,\"norm\") fln #> Fitting of the distribution ' norm ' on censored data by maximum likelihood #> Parameters: #> estimate #> mean 2.161449 #> sd 1.167290 summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 2.161449 0.3223366 #> sd 1.167290 0.2630390 #> Loglikelihood: -20.41212 AIC: 44.82424 BIC: 46.10235 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.1350237 #> sd 0.1350237 1.0000000 #> plot(fln) # (3) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view dedicated to # probability distributions # dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) fg <- fitdistcens(log10EC50,\"gumbel\",start=list(a=1,b=1)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. summary(fg) #> Error in eval(expr, envir, enclos): object 'fg' not found plot(fg) #> Error in eval(expr, envir, enclos): object 'fg' not found # (4) comparison of fits of various distributions # fll <- fitdistcens(log10EC50,\"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1518291 0.3222830 #> scale 0.6910423 0.1745231 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05097494 #> scale 0.05097494 1.00000000 #> cdfcompcens(list(fln,fll,fg),legendtext=c(\"normal\",\"logistic\",\"gumbel\"), xlab = \"log10(EC50)\") #> Error in eval(expr, envir, enclos): object 'fg' not found # (5) how to change the optimisation method? # fitdistcens(log10EC50,\"logis\",optim.method=\"Nelder-Mead\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1518291 #> scale 0.6910423 fitdistcens(log10EC50,\"logis\",optim.method=\"BFGS\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1519103 #> scale 0.6909664 fitdistcens(log10EC50,\"logis\",optim.method=\"SANN\") #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1569256 #> scale 0.6925307 # (6) custom optimisation function - example with the genetic algorithm # # \\donttest{ #wrap genoud function rgenoud package mygenoud <- function(fn, par, ...) { require(rgenoud) res <- genoud(fn, starting.values=par, ...) standardres <- c(res, convergence=0) return(standardres) } # call fitdistcens with a 'custom' optimization function fit.with.genoud <- fitdistcens(log10EC50,\"logis\", custom.optim=mygenoud, nvars=2, Domains=cbind(c(0,0), c(5, 5)), boundary.enforcement=1, print.level=1, hessian=TRUE) #> #> #> Wed Aug 28 14:28:56 2024 #> Domains: #> 0.000000e+00 <= X1 <= 5.000000e+00 #> 0.000000e+00 <= X2 <= 5.000000e+00 #> #> Data Type: Floating Point #> Operators (code number, name, population) #> \t(1) Cloning........................... \t122 #> \t(2) Uniform Mutation.................. \t125 #> \t(3) Boundary Mutation................. \t125 #> \t(4) Non-Uniform Mutation.............. \t125 #> \t(5) Polytope Crossover................ \t125 #> \t(6) Simple Crossover.................. \t126 #> \t(7) Whole Non-Uniform Mutation........ \t125 #> \t(8) Heuristic Crossover............... \t126 #> \t(9) Local-Minimum Crossover........... \t0 #> #> HARD Maximum Number of Generations: 100 #> Maximum Nonchanging Generations: 10 #> Population size : 1000 #> Convergence Tolerance: 1.000000e-03 #> #> Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation. #> Checking Gradients before Stopping. #> Not Using Out of Bounds Individuals But Allowing Trespassing. #> #> Minimization Problem. #> #> #> Generation#\t Solution Value #> #> 0 \t2.076816e+01 #> 1 \t2.055391e+01 #> #> 'wait.generations' limit reached. #> No significant improvement in 10 generations. #> #> Solution Fitness Value: 2.055391e+01 #> #> Parameters at the Solution (parameter, gradient): #> #> X[ 1] :\t2.151910e+00\tG[ 1] :\t-4.027682e-06 #> X[ 2] :\t6.909667e-01\tG[ 2] :\t1.633468e-05 #> #> Solution Found Generation 1 #> Number of Generations Run 12 #> #> Wed Aug 28 14:28:57 2024 #> Total run time : 0 hours 0 minutes and 1 seconds summary(fit.with.genoud) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1519100 0.3222568 #> scale 0.6909667 0.1744837 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05106485 #> scale 0.05106485 1.00000000 #> # } # (7) estimation of the mean of a normal distribution # by maximum likelihood with the standard deviation fixed at 1 using the argument fix.arg # flnb <- fitdistcens(log10EC50, \"norm\", start = list(mean = 1),fix.arg = list(sd = 1)) # (8) Fit of a lognormal distribution on acute toxicity values of fluazinam for # macroinvertebrates and zooplancton, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of # the fitted distribution (which is called the 5 percent hazardous concentration, HC5, # in ecotoxicology) and estimation of other quantiles. data(fluazinam) log10EC50 <-log10(fluazinam) fln <- fitdistcens(log10EC50,\"norm\") quantile(fln, probs = 0.05) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 #> estimate 0.2414275 quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 0.2414275 0.6655064 1.179033 # (9) Fit of a lognormal distribution on 72-hour acute salinity tolerance (LC50 values) # of riverine macro-invertebrates using maximum likelihood estimation data(salinity) log10LC50 <-log10(salinity) fln <- fitdistcens(log10LC50,\"norm\") plot(fln)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistrplus.html","id":null,"dir":"Reference","previous_headings":"","what":"Overview of the fitdistrplus package — fitdistrplus-package","title":"Overview of the fitdistrplus package — fitdistrplus-package","text":"idea package emerged 2008 collaboration JB Denis, R Pouillot ML Delignette time worked area quantitative risk assessment. implementation package part general project named \"Risk assessment R\" gathering different packages hosted R-forge. fitdistrplus package first written ML Delignette-Muller made available CRAN 2009 presented 2009 useR conference Rennes. months , C Dutang joined project starting participate implementation fitdistrplus package. package also presented 2011 useR conference 2eme rencontres R 2013 (https://r2013-lyon.sciencesconf.org/). Four vignettes available within package: general overview package published Journal Statistical Software (doi:10.18637/jss.v064.i04 ), document answering Frequently Asked Questions, document presenting benchmark optimization algorithms finding parameters, document starting values. fitdistrplus package general package aims helping fit univariate parametric distributions censored non-censored data. two main functions fitdist fit non-censored data fitdistcens fit censored data. choice candidate distributions fit may helped using functions descdist plotdist non-censored data plotdistcens censored data). Using functions fitdist fitdistcens, different methods can used estimate distribution parameters: maximum likelihood estimation default (mledist), moment matching estimation (mmedist), quantile matching estimation (qmedist), maximum goodness--fit estimation (mgedist). classical distributions initial values automatically calculated provided user. Graphical functions plotdist plotdistcens can used help manual calibration initial values parameters non-classical distributions. Function prefit proposed help definition good starting values special case constrained parameters. case maximum likelihood chosen estimation method, function llplot enables visualize loglikelihood contours. goodness--fit fitted distributions (single fit multiple fits) can explored using different graphical functions (cdfcomp, denscomp, qqcomp ppcomp non-censored data cdfcompcens censored data). Goodness--fit statistics also provided non-censored data using function gofstat. Bootstrap proposed quantify uncertainty parameter estimates (functions bootdist bootdistcens) also quantify uncertainty CDF quantiles estimated fitted distribution (quantile CIcdfplot).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fitdistrplus.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Overview of the fitdistrplus package — fitdistrplus-package","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":null,"dir":"Reference","previous_headings":"","what":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"48-hour acute toxicity values (EC50 values) exposure macroinvertebrates zooplancton fluazinam.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"","code":"data(fluazinam)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"fluazinam data frame 2 columns named left right, describing observed EC50 value (micrograms per liter) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value noncensored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"Hose, G.C., Van den Brink, P.J. 2004. species sensitivity distribution approach compared microcosm study: case study fungicide fluazinam. Ecotoxicology Environmental Safety, 73, 109-122.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fluazinam.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species-Sensitivity Distribution (SSD) for Fluazinam — fluazinam","text":"","code":"# (1) load of data # data(fluazinam) # (2) plot of data using Turnbull cdf plot # log10EC50 <- log10(fluazinam) plotdistcens(log10EC50) # (3) fit of a lognormal and a logistic distribution to data # (classical distributions used for species sensitivity # distributions, SSD, in ecotoxicology) # and visual comparison of the fits using Turnbull cdf plot # fln <- fitdistcens(log10EC50, \"norm\") summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 2.161449 0.3223366 #> sd 1.167290 0.2630390 #> Loglikelihood: -20.41212 AIC: 44.82424 BIC: 46.10235 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.1350237 #> sd 0.1350237 1.0000000 #> fll <- fitdistcens(log10EC50, \"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 2.1518291 0.3222830 #> scale 0.6910423 0.1745231 #> Loglikelihood: -20.55391 AIC: 45.10781 BIC: 46.38593 #> Correlation matrix: #> location scale #> location 1.00000000 0.05097494 #> scale 0.05097494 1.00000000 #> cdfcompcens(list(fln,fll), legendtext = c(\"normal\", \"logistic\"), xlab = \"log10(EC50)\") # (4) estimation of the 5 percent quantile value of # the normal fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # non parametric bootstrap # with a small number of iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(EC50) bln <- bootdistcens(fln, niter = 101) HC5ln <- quantile(bln, probs = 0.05) # in EC50 10^(HC5ln$quantiles) #> p=0.05 #> estimate 1.743522 10^(HC5ln$quantCI) #> p=0.05 #> 2.5 % 0.2358685 #> 97.5 % 11.5133957 # (5) estimation of the HC5 value # with its one-sided 95 percent confidence interval (type \"greater\") # # in log10(EC50) HC5lnb <- quantile(bln, probs = 0.05, CI.type = \"greater\") # in LC50 10^(HC5lnb$quantiles) #> p=0.05 #> estimate 1.743522 10^(HC5lnb$quantCI) #> p=0.05 #> 5 % 0.3474539"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":null,"dir":"Reference","previous_headings":"","what":"Fictive survival dataset of a french Male population — fremale","title":"Fictive survival dataset of a french Male population — fremale","text":"100 male individuals randomly taken frefictivetable CASdatasets package","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fictive survival dataset of a french Male population — fremale","text":"","code":"data(fremale)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Fictive survival dataset of a french Male population — fremale","text":"fremale data frame 3 columns names AgeIn, AgeOut respectively entry age exit age; Death binary dummy: 1 indicating death individual; 0 censored observation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Fictive survival dataset of a french Male population — fremale","text":"See full dataset frefictivetable CASdatasets http://dutangc.perso.math.cnrs.fr/RRepository/","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/fremale.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fictive survival dataset of a french Male population — fremale","text":"","code":"# (1) load of data # data(fremale) summary(fremale) #> AgeIn AgeOut Death #> Min. :23.87 Min. :30.20 Min. :0.0 #> 1st Qu.:47.29 1st Qu.:53.82 1st Qu.:1.0 #> Median :63.95 Median :69.49 Median :1.0 #> Mean :60.34 Mean :67.00 Mean :0.8 #> 3rd Qu.:72.00 3rd Qu.:80.23 3rd Qu.:1.0 #> Max. :89.17 Max. :97.11 Max. :1.0"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":null,"dir":"Reference","previous_headings":"","what":"Goodness-of-fit statistics — gofstat","title":"Goodness-of-fit statistics — gofstat","text":"Computes goodness--fit statistics parametric distributions fitted censored non-censored data set.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Goodness-of-fit statistics — gofstat","text":"","code":"gofstat(f, chisqbreaks, meancount, discrete, fitnames=NULL) # S3 method for class 'gofstat.fitdist' print(x, ...) # S3 method for class 'gofstat.fitdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Goodness-of-fit statistics — gofstat","text":"f object class \"fitdist\" (\"fitdistcens\" ), output function fitdist() (resp. \"fitdist()\"), \tlist \"fitdist\" objects, list \"fitdistcens\" objects. chisqbreaks usable non censored data, numeric vector defining breaks cells used compute chi-squared statistic. omitted, breaks automatically computed data order reach roughly number observations per cell, roughly equal argument meancount, sligthly ties. meancount usable non censored data, mean number observations per cell expected definition breaks cells used compute chi-squared statistic. argument taken account breaks directly defined argument chisqbreaks. chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells \\(n\\) length dataset. discrete TRUE, Chi-squared statistic information criteria computed. \tmissing, discrete passed first object class \"fitdist\" list f. \tcensored data argument ignored, censored data considered continuous. fitnames vector defining names fits. x object class \"gofstat.fitdist\" \"gofstat.fitdistcens\". ... arguments passed generic functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Goodness-of-fit statistics — gofstat","text":"type data (censored ), information criteria calculated. non censored data, added Goodness--fit statistics computed described . Chi-squared statistic computed using cells defined argument chisqbreaks cells automatically defined data, order reach roughly number observations per cell, roughly equal argument meancount, sligthly ties. choice define cells empirical distribution (data), theoretical distribution, done enable comparison Chi-squared values obtained different distributions fitted data set. chisqbreaks meancount omitted, meancount fixed order obtain roughly \\((4n)^{2/5}\\) cells, \\(n\\) length data set (Vose, 2000). Chi-squared statistic computed program fails define enough cells due small dataset. Chi-squared statistic computed, degree freedom (nb cells - nb parameters - 1) corresponding distribution strictly positive, p-value Chi-squared test returned. continuous distributions, Kolmogorov-Smirnov, Cramer-von Mises \tAnderson-Darling statistics also computed, defined Stephens (1986). approximate Kolmogorov-Smirnov test performed assuming distribution parameters known. critical value defined Stephens (1986) completely specified distribution used reject distribution significance level 0.05. approximation, result test (decision rejection distribution ) returned data sets 30 observations. Note approximate test may conservative. data sets 5 observations distributions test described Stephens (1986) maximum likelihood estimations (\"exp\", \"cauchy\", \"gamma\" \"weibull\"), Cramer-von Mises Anderson-darling tests performed described Stephens (1986). tests take account fact parameters known estimated data maximum likelihood. result decision reject distribution significance level 0.05. tests available maximum likelihood estimations. recommended statistics automatically printed, .e. Cramer-von Mises, Anderson-Darling Kolmogorov statistics continuous distributions Chi-squared statistics discrete ones ( \"binom\", \"nbinom\", \"geom\", \"hyper\" \"pois\" ). Results tests printed stored output function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Goodness-of-fit statistics — gofstat","text":"gofstat() returns object class \"gofstat.fitdist\" \"gofstat.fitdistcens\" following components sublist (aic, bic nbfit censored data) , chisq named vector Chi-squared statistics NULL computed chisqbreaks common breaks used define cells Chi-squared statistic chisqpvalue named vector p-values Chi-squared statistic NULL computed chisqdf named vector degrees freedom Chi-squared distribution NULL computed chisqtable table observed theoretical counts used Chi-squared calculations cvm named vector Cramer-von Mises statistics \"computed\" computed cvmtest named vector decisions Cramer-von Mises test \"computed\" computed ad named vector Anderson-Darling statistics \"computed\" computed adtest named vector decisions Anderson-Darling test \"computed\" computed ks named vector Kolmogorov-Smirnov statistic \"computed\" computed kstest named vector decisions Kolmogorov-Smirnov test \"computed\" computed aic named vector values Akaike's Information Criterion. bic named vector values Bayesian Information Criterion. discrete input argument automatic definition function first object class \"fitdist\" list input. nbfit Number fits argument.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Goodness-of-fit statistics — gofstat","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Stephens MA (1986), Tests based edf statistics. Goodness--fit techniques (D'Agostino RB Stephens MA, eds), Marcel Dekker, New York, pp. 97-194. Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Goodness-of-fit statistics — gofstat","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/gofstat.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Goodness-of-fit statistics — gofstat","text":"","code":"# (1) fit of two distributions to the serving size data # by maximum likelihood estimation # and comparison of goodness-of-fit statistics # data(groundbeef) serving <- groundbeef$serving (fitg <- fitdist(serving, \"gamma\")) #> Fitting of the distribution ' gamma ' by maximum likelihood #> Parameters: #> estimate Std. Error #> shape 4.00955898 0.341451641 #> rate 0.05443907 0.004937239 gofstat(fitg) #> Goodness-of-fit statistics #> 1-mle-gamma #> Kolmogorov-Smirnov statistic 0.1281486 #> Cramer-von Mises statistic 0.6936274 #> Anderson-Darling statistic 3.5672625 #> #> Goodness-of-fit criteria #> 1-mle-gamma #> Akaike's Information Criterion 2511.250 #> Bayesian Information Criterion 2518.325 (fitln <- fitdist(serving, \"lnorm\")) #> Fitting of the distribution ' lnorm ' by maximum likelihood #> Parameters: #> estimate Std. Error #> meanlog 4.1693701 0.03366988 #> sdlog 0.5366095 0.02380783 gofstat(fitln) #> Goodness-of-fit statistics #> 1-mle-lnorm #> Kolmogorov-Smirnov statistic 0.1493090 #> Cramer-von Mises statistic 0.8277358 #> Anderson-Darling statistic 4.5436542 #> #> Goodness-of-fit criteria #> 1-mle-lnorm #> Akaike's Information Criterion 2526.639 #> Bayesian Information Criterion 2533.713 gofstat(list(fitg, fitln)) #> Goodness-of-fit statistics #> 1-mle-gamma 2-mle-lnorm #> Kolmogorov-Smirnov statistic 0.1281486 0.1493090 #> Cramer-von Mises statistic 0.6936274 0.8277358 #> Anderson-Darling statistic 3.5672625 4.5436542 #> #> Goodness-of-fit criteria #> 1-mle-gamma 2-mle-lnorm #> Akaike's Information Criterion 2511.250 2526.639 #> Bayesian Information Criterion 2518.325 2533.713 # (2) fit of two discrete distributions to toxocara data # and comparison of goodness-of-fit statistics # data(toxocara) number <- toxocara$number fitp <- fitdist(number,\"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) fitnb <- fitdist(number,\"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb) gofstat(list(fitp, fitnb),fitnames = c(\"Poisson\",\"negbin\")) #> Chi-squared statistic: 31256.96 7.48606 #> Degree of freedom of the Chi-squared distribution: 5 4 #> Chi-squared p-value: 0 0.1123255 #> the p-value may be wrong with some theoretical counts < 5 #> Chi-squared table: #> obscounts theo Poisson theo negbin #> <= 0 14 0.009014207 15.295027 #> <= 1 8 0.078236512 5.808596 #> <= 3 6 1.321767215 6.845015 #> <= 4 6 2.131297776 2.407815 #> <= 9 6 29.827829221 7.835196 #> <= 21 6 19.626223732 8.271110 #> > 21 7 0.005631339 6.537242 #> #> Goodness-of-fit criteria #> Poisson negbin #> Akaike's Information Criterion 1017.067 322.6882 #> Bayesian Information Criterion 1019.037 326.6288 # (3) Get Chi-squared results in addition to # recommended statistics for continuous distributions # set.seed(1234) x4 <- rweibull(n=1000,shape=2,scale=1) # fit of the good distribution f4 <- fitdist(x4,\"weibull\") plot(f4) # fit of a bad distribution f4b <- fitdist(x4,\"cauchy\") plot(f4b) (g <- gofstat(list(f4,f4b),fitnames=c(\"Weibull\", \"Cauchy\"))) #> Goodness-of-fit statistics #> Weibull Cauchy #> Kolmogorov-Smirnov statistic 0.02129364 0.114565 #> Cramer-von Mises statistic 0.06261917 1.854791 #> Anderson-Darling statistic 0.43120643 17.929123 #> #> Goodness-of-fit criteria #> Weibull Cauchy #> Akaike's Information Criterion 1225.734 1679.028 #> Bayesian Information Criterion 1235.549 1688.843 g$chisq #> Weibull Cauchy #> 35.76927 306.99824 g$chisqdf #> Weibull Cauchy #> 25 25 g$chisqpvalue #> Weibull Cauchy #> 7.517453e-02 2.364550e-50 g$chisqtable #> obscounts theo Weibull theo Cauchy #> <= 0.1547 36 27.86449 131.86592 #> <= 0.2381 36 34.87234 16.94381 #> <= 0.2952 36 30.58611 14.10775 #> <= 0.3745 36 50.14472 24.12899 #> <= 0.4323 36 41.16340 21.90706 #> <= 0.4764 36 33.55410 19.88887 #> <= 0.5263 36 39.57636 26.45041 #> <= 0.5771 36 41.67095 32.12597 #> <= 0.6276 36 42.36588 37.99145 #> <= 0.669 36 35.03524 35.92961 #> <= 0.7046 36 30.15737 34.26649 #> <= 0.7447 36 33.82481 41.80511 #> <= 0.7779 36 27.74805 36.41317 #> <= 0.8215 36 35.88169 48.69182 #> <= 0.8582 36 29.58833 40.27626 #> <= 0.9194 36 47.80044 62.45332 #> <= 0.9662 36 35.04387 42.03891 #> <= 1.017 36 36.19084 39.23047 #> <= 1.08 36 42.46698 40.45810 #> <= 1.119 36 24.49715 20.76625 #> <= 1.169 36 29.68482 22.91028 #> <= 1.237 36 36.49226 25.22891 #> <= 1.294 36 27.94301 17.49247 #> <= 1.418 36 51.25543 29.00440 #> <= 1.5 36 27.82405 14.64740 #> <= 1.65 36 38.72011 20.11799 #> <= 1.892 36 37.73807 21.69844 #> > 1.892 28 30.30916 81.16036 # and by defining the breaks (g <- gofstat(list(f4,f4b), chisqbreaks = seq(from = min(x4), to = max(x4), length.out = 10), fitnames=c(\"Weibull\", \"Cauchy\"))) #> Goodness-of-fit statistics #> Weibull Cauchy #> Kolmogorov-Smirnov statistic 0.02129364 0.114565 #> Cramer-von Mises statistic 0.06261917 1.854791 #> Anderson-Darling statistic 0.43120643 17.929123 #> #> Goodness-of-fit criteria #> Weibull Cauchy #> Akaike's Information Criterion 1225.734 1679.028 #> Bayesian Information Criterion 1235.549 1688.843 g$chisq #> Weibull Cauchy #> 6.532102 303.031817 g$chisqdf #> Weibull Cauchy #> 8 8 g$chisqpvalue #> Weibull Cauchy #> 5.878491e-01 9.318101e-61 g$chisqtable #> obscounts theo Weibull theo Cauchy #> <= 0.0264 1 0.9414531 111.941831 #> <= 0.3374 123 118.0587149 63.070591 #> <= 0.6483 222 240.3305518 167.852511 #> <= 0.9593 261 252.4491129 318.542341 #> <= 1.27 204 191.1128355 165.083876 #> <= 1.581 111 112.9380271 62.221846 #> <= 1.892 49 53.8525607 30.121634 #> <= 2.203 19 21.0847217 17.463676 #> <= 2.514 6 6.8505892 11.335604 #> <= 2.825 4 1.8602036 7.933114 #> > 2.825 0 0.5212296 44.432977 # (4) fit of two distributions on acute toxicity values # of fluazinam (in decimal logarithm) for # macroinvertebrates and zooplancton # and comparison of goodness-of-fit statistics # data(fluazinam) log10EC50 <-log10(fluazinam) (fln <- fitdistcens(log10EC50,\"norm\")) #> Fitting of the distribution ' norm ' on censored data by maximum likelihood #> Parameters: #> estimate #> mean 2.161449 #> sd 1.167290 plot(fln) gofstat(fln) #> #> Goodness-of-fit criteria #> 1-mle-norm #> Akaike's Information Criterion 44.82424 #> Bayesian Information Criterion 46.10235 (fll <- fitdistcens(log10EC50,\"logis\")) #> Fitting of the distribution ' logis ' on censored data by maximum likelihood #> Parameters: #> estimate #> location 2.1518291 #> scale 0.6910423 plot(fll) gofstat(fll) #> #> Goodness-of-fit criteria #> 1-mle-logis #> Akaike's Information Criterion 45.10781 #> Bayesian Information Criterion 46.38593 gofstat(list(fll, fln), fitnames = c(\"loglogistic\", \"lognormal\")) #> #> Goodness-of-fit criteria #> loglogistic lognormal #> Akaike's Information Criterion 45.10781 44.82424 #> Bayesian Information Criterion 46.38593 46.10235"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"cdfcomp plots empirical cumulative distribution fitted distribution functions, denscomp plots histogram fitted density functions, qqcomp plots theoretical quantiles empirical ones, ppcomp plots theoretical probabilities empirical ones. cdfcomp able plot fits discrete distribution.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"","code":"cdfcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datapch, datacol, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, horizontals = TRUE, verticals = FALSE, do.points = TRUE, use.ppoints = TRUE, a.ppoints = 0.5, name.points = NULL, lines01 = FALSE, discrete, add = FALSE, plotstyle = \"graphics\", fitnbpts = 101, ...) denscomp(ft, xlim, ylim, probability = TRUE, main, xlab, ylab, datacol, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"topright\", ylegend = NULL, demp = FALSE, dempcol = \"black\", plotstyle = \"graphics\", discrete, fitnbpts = 101, fittype=\"l\", ...) qqcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fitpch, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, use.ppoints = TRUE, a.ppoints = 0.5, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, plotstyle = \"graphics\", ...) ppcomp(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fitpch, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, use.ppoints = TRUE, a.ppoints = 0.5, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"ft One \"fitdist\" object list objects class \"fitdist\". xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot. See also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datapch integer specifying symbol used plotting data points. See also par. datacol specification color used plotting data points. See also par. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. See also par. fitlty (vector ) line type(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions/densities. fewer values fits recycled standard fashion. See also par. fitpch (vector ) line type(s) plot fitted quantiles/probabilities. fewer values fits recycled standard fashion. See also par. fittype type plot fitted probabilities case discrete distributions: possible types \"p\" points, \"l\" lines \"o\" overplotted (plot.default). fittype used non-discrete distributions. fitnbpts numeric number points compute fitted probabilities cumulative probabilities. Default 101. addlegend TRUE, legend added plot. legendtext character expression vector length \\(\\ge 1\\) appear legend. See also legend. xlegend, ylegend \\(x\\) \\(y\\) coordinates used position legend. can specified keyword. plotstyle = \"graphics\", see xy.coords legend. plotstyle = \"ggplot\", xlegend keyword must one top, bottom, left, right. See also guide_legend ggplot2 horizontals TRUE, draws horizontal lines step empirical cumulative distribution function (ecdf). See also plot.stepfun. verticals TRUE, draws vertical lines empirical cumulative distribution function (ecdf). taken account horizontals=TRUE. .points TRUE (default), draws points x-locations. large dataset (n > 1e4), .points ignored point drawn. use.ppoints TRUE, probability points empirical distribution defined using function ppoints (1:n - .ppoints)/(n - 2a.ppoints + 1). FALSE, probability points simply defined (1:n)/n. argument ignored discrete data. .ppoints use.ppoints=TRUE, passed ppoints function. name.points Label vector points drawn .e. .points = TRUE (non censored data). lines01 logical plot two horizontal lines h=0 h=1 cdfcomp. line01 logical plot horizontal line \\(y=x\\) qqcomp ppcomp. line01col, line01lty Color line type line01. See also par. demp logical add empirical density plot, using density function. dempcol color empirical density case added plot (demp=TRUE). ynoise logical add small noise plotting empirical quantiles/probabilities qqcomp ppcomp. probability logical use probability scale denscomp. See also hist. discrete TRUE, distributions considered discrete. missing, discrete set TRUE least one object list ft discrete. add TRUE, adds already existing plot. FALSE, starts new plot. parameter available plotstyle = \"ggplot\". plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). ... graphical arguments passed graphical functions used cdfcomp, denscomp, ppcomp qqcomp plotstyle = \"graphics\". plotstyle = \"ggplot\", arguments used histogram plot (hist) denscomp function. plotstyle = \"ggplot\", graphical output can customized relevant ggplot2 functions store output.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"cdfcomp provides plot empirical distribution fitted distribution cdf, default using Hazen's rule empirical distribution, probability points defined (1:n - 0.5)/n. discrete TRUE, probability points always defined (1:n)/n. large dataset (n > 1e4), point drawn line ecdf drawn instead. Note horizontals, verticals .points FALSE, empirical point drawn, fitted cdf shown. denscomp provides density plot fitted distribution histogram data conyinuous data. discrete=TRUE, distributions considered discrete, histogram plotted demp forced TRUE fitted empirical probabilities plotted either vertical lines fittype=\"l\", single points fittype=\"p\" lines points fittype=\"o\". ppcomp provides plot probabilities fitted distribution (\\(x\\)-axis) empirical probabilities (\\(y\\)-axis) default defined (1:n - 0.5)/n (data assumed continuous). large dataset (n > 1e4), lines drawn instead pointss customized fitpch parameter. qqcomp provides plot quantiles theoretical distribution (\\(x\\)-axis) empirical quantiles data (\\(y\\)-axis), default defining probability points (1:n - 0.5)/n theoretical quantile calculation (data assumed continuous). large dataset (n > 1e4), lines drawn instead points customized fitpch parameter. default legend added plots. Many graphical arguments optional, dedicated personalize plots, fixed default values omitted.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"*comp returns list drawn points /lines plotstyle == \"graphics\" object class \"ggplot\" plotstyle == \"ggplot\".","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"Christophe Dutang, Marie-Laure Delignette-Muller Aurelie Siberchicot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcomp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical comparison of multiple fitted distributions (for non-censored data) — graphcomp","text":"","code":"# (1) Plot various distributions fitted to serving size data # data(groundbeef) serving <- groundbeef$serving fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") cdfcomp(list(fitW, fitln, fitg), horizontals = FALSE) cdfcomp(list(fitW, fitln, fitg), horizontals = TRUE) cdfcomp(list(fitW, fitln, fitg), horizontals = TRUE, verticals = TRUE, datacol = \"purple\") cdfcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", ylab = \"F\", xlim = c(0, 250), xlegend = \"center\", lines01 = TRUE) denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\") ppcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlegend = \"bottomright\", line01 = TRUE) qqcomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlegend = \"bottomright\", line01 = TRUE, xlim = c(0, 300), ylim = c(0, 300), fitpch = 16) # (2) Plot lognormal distributions fitted by # maximum goodness-of-fit estimation # using various distances (data plotted in log scale) # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV taxaATV <- subset(endosulfan, group == \"NonArthroInvert\")$taxa flnMGEKS <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"KS\") flnMGEAD <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD\") flnMGEADL <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"ADL\") flnMGEAD2L <- fitdist(ATV, \"lnorm\", method = \"mge\", gof = \"AD2L\") cdfcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), xlogscale = TRUE, main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\"), verticals = TRUE, xlim = c(1, 100000), name.points=taxaATV) qqcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\"), xlogscale = TRUE, ylogscale = TRUE) ppcomp(list(flnMGEKS, flnMGEAD, flnMGEADL, flnMGEAD2L), main = \"fits of a lognormal dist. using various GOF dist.\", legendtext = c(\"MGE KS\", \"MGE AD\", \"MGE ADL\", \"MGE AD2L\")) # (3) Plot normal and logistic distributions fitted by # maximum likelihood estimation # using various plotting positions in cdf plots # data(endosulfan) log10ATV <-log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") fll <- fitdist(log10ATV, \"logis\") # default plot using Hazen plotting position: (1:n - 0.5)/n cdfcomp(list(fln, fll), legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\") # plot using mean plotting position (named also Gumbel plotting position) # (1:n)/(n + 1) cdfcomp(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\", use.ppoints = TRUE, a.ppoints = 0) # plot using basic plotting position: (1:n)/n cdfcomp(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10ATV\", use.ppoints = FALSE) # (4) Comparison of fits of two distributions fitted to discrete data # data(toxocara) number <- toxocara$number fitp <- fitdist(number, \"pois\") fitnb <- fitdist(number, \"nbinom\") cdfcomp(list(fitp, fitnb), legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"l\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"p\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) denscomp(list(fitp, fitnb),demp = TRUE, fittype = \"o\", dempcol = \"black\", legendtext = c(\"Poisson\", \"negative binomial\")) # (5) Customizing of graphical output and use of ggplot2 # data(groundbeef) serving <- groundbeef$serving fitW <- fitdist(serving, \"weibull\") fitln <- fitdist(serving, \"lnorm\") fitg <- fitdist(serving, \"gamma\") if (requireNamespace (\"ggplot2\", quietly = TRUE)) { denscomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") cdfcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") qqcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") ppcomp(list(fitW, fitln, fitg), plotstyle = \"ggplot\") } # customizing graphical output with graphics denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), main = \"ground beef fits\", xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\", addlegend = FALSE) # customizing graphical output with ggplot2 if (requireNamespace (\"ggplot2\", quietly = TRUE)) { dcomp <- denscomp(list(fitW, fitln, fitg), legendtext = c(\"Weibull\", \"lognormal\", \"gamma\"), xlab = \"serving sizes (g)\", xlim = c(0, 250), xlegend = \"topright\", plotstyle = \"ggplot\", breaks = 20, addlegend = FALSE) dcomp + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Ground beef fits\") }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"cdfcompcens plots empirical cumulative distribution fitted distribution functions, qqcompcens plots theoretical quantiles empirical ones, ppcompcens plots theoretical probabilities empirical ones.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"","code":"cdfcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, datacol, fillrect, fitlty, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, lines01 = FALSE, Turnbull.confint = FALSE, NPMLE.method = \"Wang\", add = FALSE, plotstyle = \"graphics\", ...) qqcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fillrect, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, NPMLE.method = \"Wang\", plotstyle = \"graphics\", ...) ppcompcens(ft, xlim, ylim, xlogscale = FALSE, ylogscale = FALSE, main, xlab, ylab, fillrect, fitcol, fitlwd, addlegend = TRUE, legendtext, xlegend = \"bottomright\", ylegend = NULL, line01 = TRUE, line01col = \"black\", line01lty = 1, ynoise = TRUE, NPMLE.method = \"Wang\", plotstyle = \"graphics\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"ft One \"fitdistcens\" object list objects class \"fitdistcens\". xlim \\(x\\)-limits plot. ylim \\(y\\)-limits plot. xlogscale TRUE, uses logarithmic scale \\(x\\)-axis. ylogscale TRUE, uses logarithmic scale \\(y\\)-axis. main main title plot, see also title. xlab label \\(x\\)-axis, defaults description x. ylab label \\(y\\)-axis, defaults description y. datacol specification color used plotting data points. fillrect specification color used filling rectanges non uniqueness empirical cumulative distribution (used NPMLE.method equal \"Wang\" cdfcompcens). Fix NA want fill rectangles. fitcol (vector ) color(s) plot fitted distributions. fewer colors fits recycled standard fashion. fitlty (vector ) line type(s) plot fitted distributions. fewer values fits recycled standard fashion. See also par. fitlwd (vector ) line size(s) plot fitted distributions. fewer values fits recycled standard fashion. See also par. addlegend TRUE, legend added plot. legendtext character expression vector length \\(\\geq 1\\) appear legend, see also legend. xlegend, ylegend \\(x\\) \\(y\\) coordinates used position legend. can specified keyword. plotstyle = \"graphics\", see xy.coords legend. plotstyle = \"ggplot\", xlegend keyword must one top, bottom, left, right. See also guide_legend ggplot2 lines01 logical plot two horizontal lines h=0 h=1 cdfcompcens. Turnbull.confint TRUE confidence intervals added Turnbull plot. case NPMLE.method forced \"Turnbull\" NPMLE.method Three NPMLE techniques provided, \"Wang\", default one, rewritten package npsurv using function constrOptim package stats optimisation, \"Turnbull.middlepoints\", older one implemented package survival \"Turnbull.intervals\" uses Turnbull algorithm package survival associates interval equivalence class instead middlepoint interval (see details). \"Wang\" \"Turnbull.intervals\" enable derivation Q-Q plot P-P plot. add TRUE, adds already existing plot. FALSE, starts new plot. parameter available plotstyle = \"ggplot\". line01 logical plot horizontal line \\(y=x\\) qqcompcens ppcompcens. line01col, line01lty Color line type line01. See also par. ynoise logical add small noise plotting empirical quantiles/probabilities qqcompcens ppcompcens. ynoise used various fits plotted \"graphics\" plotstyle. Facets used instead \"ggplot\" plotstyle. plotstyle \"graphics\" \"ggplot\". \"graphics\", display built graphics functions. \"ggplot\", graphic object output created ggplot2 functions (ggplot2 package must installed). \"cdfcompcens\", \"ggplot\" graphics available \"Wang\" NPMLE technique. ... graphical arguments passed graphical functions used cdfcompcens, ppcompcens qqcompcens.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"See details plotdistcens detailed description provided goddness--fit plots.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"Turnbull BW (1974), Nonparametric estimation survivorship function doubly censored data. Journal American Statistical Association, 69, 169-173. Wang Y (2008), Dimension-reduced nonparametric maximum likelihood computation interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402. Wang Y Taylor SM (2013), Efficient computation nonparametric survival functions via hierarchical mixture formulation. Statistics Computing, 23, 713-725. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/graphcompcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Graphical comparison of multiple fitted distributions for censored data — graphcompcens","text":"","code":"# (1) Plot various distributions fitted to bacterial contamination data # data(smokedfish) Clog10 <- log10(smokedfish) fitsfn <- fitdistcens(Clog10,\"norm\") summary(fitsfn) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean -1.575392 0.2013872 #> sd 1.539446 0.2118026 #> Loglikelihood: -87.10945 AIC: 178.2189 BIC: 183.4884 #> Correlation matrix: #> mean sd #> mean 1.0000000 -0.4325228 #> sd -0.4325228 1.0000000 #> fitsfl <- fitdistcens(Clog10,\"logis\") summary(fitsfl) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location -1.5394230 0.1681236 #> scale 0.8121862 0.1332863 #> Loglikelihood: -86.45499 AIC: 176.91 BIC: 182.1794 #> Correlation matrix: #> location scale #> location 1.0000000 -0.3189915 #> scale -0.3189915 1.0000000 #> dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) fitsfg<-fitdistcens(Clog10,\"gumbel\",start=list(a=-3,b=3)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. summary(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # CDF plot cdfcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found cdfcompcens(list(fitsfn,fitsfl,fitsfg),datacol=\"orange\",fillrect = NA, legendtext=c(\"normal\",\"logistic\",\"Gumbel\"), main=\"bacterial contamination fits\", xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", xlegend = \"bottom\",lines01 = TRUE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # alternative Turnbull plot for the empirical cumulative distribution # (default plot of the previous versions of the package) cdfcompcens(list(fitsfn,fitsfl,fitsfg), NPMLE.method = \"Turnbull.middlepoints\") #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # customizing graphical output with ggplot2 if (requireNamespace (\"ggplot2\", quietly = TRUE)) { cdfcompcens <- cdfcompcens(list(fitsfn,fitsfl,fitsfg),datacol=\"orange\",fillrect = NA, legendtext=c(\"normal\",\"logistic\",\"Gumbel\"), xlab=\"bacterial concentration (CFU/g)\",ylab=\"F\", xlegend = \"bottom\",lines01 = TRUE, plotstyle = \"ggplot\") cdfcompcens + ggplot2::theme_minimal() + ggplot2::ggtitle(\"Bacterial contamination fits\") } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # PP plot ppcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found ppcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(2,2)) ppcompcens(fitsfn) ppcompcens(fitsfl) ppcompcens(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(1,1)) if (requireNamespace (\"ggplot2\", quietly = TRUE)) { ppcompcens(list(fitsfn,fitsfl,fitsfg), plotstyle = \"ggplot\") ppcompcens(list(fitsfn,fitsfl,fitsfg), plotstyle = \"ggplot\", fillrect = c(\"lightpink\", \"lightblue\", \"lightgreen\"), fitcol = c(\"red\", \"blue\", \"green\")) } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found # QQ plot qqcompcens(list(fitsfn,fitsfl,fitsfg)) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(2,2)) qqcompcens(fitsfn) qqcompcens(fitsfl) qqcompcens(fitsfg) #> Error in eval(expr, envir, enclos): object 'fitsfg' not found par(mfrow = c(1,1)) if (requireNamespace (\"ggplot2\", quietly = TRUE)) { qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE, plotstyle = \"ggplot\") qqcompcens(list(fitsfn,fitsfl,fitsfg), ynoise = FALSE, plotstyle = \"ggplot\", fillrect = c(\"lightpink\", \"lightblue\", \"lightgreen\"), fitcol = c(\"red\", \"blue\", \"green\")) } #> Error in eval(expr, envir, enclos): object 'fitsfg' not found"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":null,"dir":"Reference","previous_headings":"","what":"Ground beef serving size data set — groundbeef","title":"Ground beef serving size data set — groundbeef","text":"Serving sizes collected French survey, ground beef patties consumed children 5 years old.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Ground beef serving size data set — groundbeef","text":"","code":"data(groundbeef)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Ground beef serving size data set — groundbeef","text":"groundbeef data frame 1 column (serving: serving sizes grams)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Ground beef serving size data set — groundbeef","text":"Delignette-Muller, M.L., Cornu, M. 2008. Quantitative risk assessment Escherichia coli O157:H7 frozen ground beef patties consumed young children French households. International Journal Food Microbiology, 128, 158-164.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/groundbeef.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Ground beef serving size data set — groundbeef","text":"","code":"# (1) load of data # data(groundbeef) # (2) description and plot of data # serving <- groundbeef$serving descdist(serving) #> summary statistics #> ------ #> min: 10 max: 200 #> median: 79 #> mean: 73.64567 #> estimated sd: 35.88487 #> estimated skewness: 0.7352745 #> estimated kurtosis: 3.551384 plotdist(serving) # (3) fit of a Weibull distribution to data # fitW <- fitdist(serving, \"weibull\") summary(fitW) #> Fitting of the distribution ' weibull ' by maximum likelihood #> Parameters : #> estimate Std. Error #> shape 2.185885 0.1045755 #> scale 83.347679 2.5268626 #> Loglikelihood: -1255.225 AIC: 2514.449 BIC: 2521.524 #> Correlation matrix: #> shape scale #> shape 1.000000 0.321821 #> scale 0.321821 1.000000 #> plot(fitW) gofstat(fitW) #> Goodness-of-fit statistics #> 1-mle-weibull #> Kolmogorov-Smirnov statistic 0.1396646 #> Cramer-von Mises statistic 0.6840994 #> Anderson-Darling statistic 3.5736460 #> #> Goodness-of-fit criteria #> 1-mle-weibull #> Akaike's Information Criterion 2514.449 #> Bayesian Information Criterion 2521.524"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":null,"dir":"Reference","previous_headings":"","what":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"llplot plots (log)likelihood around estimation distributions fitted maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"","code":"llplot(mlefit, loglik = TRUE, expansion = 1, lseq = 50, back.col = TRUE, nlev = 10, pal.col = terrain.colors(100), fit.show = FALSE, fit.pch = 4, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"mlefit object class \"fitdist\" \"fitdistcens\" obtained maximum likelihood (method = \"mle\") loglik logical plot log-likelihood likelihood function. expansion expansion factor enlarge default range values explored parameter. lseq length sequences parameters. back.col logical (llsurface ). Contours plotted background gradient colors TRUE. nlev number contour levels plot. pal.col Palette colors. Colors used back (llsurface ). fit.show logical plot mle estimate. fit.pch type point used plot mle estimate. ... graphical arguments passed graphical functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"llplot plots (log)likelihood surface(s) (curve one estimated parameter) around maximum likelihood estimation. internally calls function llsurface llcurve. two estimated parameters, (log)likehood surface plotted combination two parameters, fixing ones estimated value. (log)likelihood surface, back.col image (2D-plot) used nlev > 0 contour (2D-plot) used add nlev contours. default range values explored estimated parameter 2 standard error around mle estimate range can expanded (contracted) using argument expansion.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-plot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"(Log)likelihood plot for a fit using maximum likelihood — logLikplot","text":"","code":"# (1) a distribution with one parameter # x <- rexp(50) fite <- fitdist(x, \"exp\") llplot(fite) llplot(fite, col = \"red\", fit.show = TRUE) llplot(fite, col = \"red\", fit.show = TRUE, loglik = FALSE) # (2) a distribution with two parameters # data(groundbeef) serving <- groundbeef$serving fitg <- fitdist(serving, \"gamma\") llplot(fitg) # \\donttest{ llplot(fitg, expansion = 2) llplot(fitg, pal.col = heat.colors(100), fit.show = TRUE) llplot(fitg, back.col = FALSE, nlev = 25, fit.show = TRUE) # } # (3) a distribution with two parameters with one fixed # fitg2 <- fitdist(serving, \"gamma\", fix.arg = list(rate = 0.5)) llplot(fitg2, fit.show = TRUE) # (4) a distribution with three parameters # # \\donttest{ data(endosulfan) ATV <-endosulfan$ATV library(\"actuar\") fBurr <- fitdist(ATV, \"burr\", start = list(shape1 = 0.3, shape2 = 1, rate = 1)) llplot(fBurr) llplot(fBurr, back.col = FALSE, fit.show = TRUE, fit.pch = 16) llplot(fBurr, nlev = 0, pal.col = rainbow(100), lseq = 100) # } # (5) a distribution with two parameters fitted on censored data # data(salinity) fsal <- fitdistcens(salinity, \"lnorm\") llplot(fsal, fit.show = TRUE) llplot(fsal, fit.show = TRUE, loglik = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":null,"dir":"Reference","previous_headings":"","what":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"llsurface plots likelihood surface distributions two parameters, llcurve plots likelihood curve distributions one parameters.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"","code":"llsurface(data, distr, plot.arg, min.arg, max.arg, lseq = 50, fix.arg = NULL, loglik = TRUE, back.col = TRUE, nlev = 10, pal.col = terrain.colors(100), weights = NULL, ...) llcurve(data, distr, plot.arg, min.arg, max.arg, lseq = 50, fix.arg = NULL, loglik = TRUE, weights = NULL, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"data numeric vector non censored data dataframe two columns respectively named left right, describing observed value interval censored data. case left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution corresponding density function dname corresponding distribution function pname must classically defined. plot.arg two-element vector names two parameters vary llsurface, one element llcurve. min.arg two-element vector lower plotting bounds llsurface, one element llcurve. max.arg two-element vector upper plotting bounds llsurface, one element llcurve. lseq length sequences parameters. fix.arg named list fixed value parameters. loglik logical plot log-likelihood likelihood function. back.col logical (llsurface ). Contours plotted background gradient colors TRUE. nlev number contour levels plot (llsurface ). pal.col Palette colors. Colors used back (llsurface ). weights optional vector weights used fitting process. NULL numeric vector strictly positive values (classically number occurences observation). ... graphical arguments passed graphical functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"two function intended called directly internally called llplot. llsurface plots likelihood surface distributions two varying parameters parameters fixed. back.col, image (2D-plot) used. nlev > 0, contour (2D-plot) used add nlev contours. llcurve plots likelihood curve distributions one varying parameter parameters fixed.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/logLik-surface.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"(Log)likelihood surfaces or (log)likelihood curves — logLiksurface","text":"","code":"# (1) loglikelihood or likelihood curve # n <- 100 set.seed(1234) x <- rexp(n) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4, loglik = FALSE) llcurve(data = x, distr = \"exp\", plot.arg = \"rate\", min.arg = 0, max.arg = 4, main = \"log-likelihood for exponential distribution\", col = \"red\") abline(v = 1, lty = 2) # (2) loglikelihood surface # x <- rnorm(n, 0, 1) llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), back.col = FALSE, main=\"log-likelihood for normal distribution\") llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), main=\"log-likelihood for normal distribution\", nlev = 20, pal.col = heat.colors(100),) points(0, 1, pch=\"+\", col=\"red\") llsurface(data =x, distr=\"norm\", plot.arg=c(\"mean\", \"sd\"), min.arg=c(-1, 0.5), max.arg=c(1, 3/2), main=\"log-likelihood for normal distribution\", nlev = 0, back.col = TRUE, pal.col = rainbow(100, s = 0.5, end = 0.8)) points(0, 1, pch=\"+\", col=\"black\")"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Fit univariate continuous distribution maximizing goodness--fit (minimizing distance) non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"","code":"mgedist(data, distr, gof = \"CvM\", start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. gof character string coding name goodness--fit distance used : \"CvM\" Cramer-von Mises distance, \"KS\" Kolmogorov-Smirnov distance, \"AD\" Anderson-Darling distance, \"ADR\", \"ADL\", \"AD2R\", \"AD2L\" \"AD2\" variants Anderson-Darling distance described Luceno (2006). start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. silent logical remove show warnings bootstraping. gradient function return gradient gof distance \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"mgedist function numerically maximizes goodness--fit, minimizes goodness--fit distance coded argument gof. One may use one classical distances defined Stephens (1986), Cramer-von Mises distance (\"CvM\"), Kolmogorov-Smirnov distance (\"KS\") Anderson-Darling distance (\"AD\") gives weight tails distribution, one variants last distance proposed Luceno (2006). right-tail AD (\"ADR\") gives weight right tail, left-tail AD (\"ADL\") gives weight left tail. Either tails, , can receive even larger weights using second order Anderson-Darling Statistics (using \"AD2R\", \"AD2L\" \"AD2\"). optimization process mledist, see 'details' section function. function intended called directly internally called fitdist bootdist. function intended used continuous distributions weighted maximum goodness--fit estimation allowed. NB: data values particularly small large, scaling may needed optimization process. See example (4).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"mgedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. gof code goodness--fit distance maximized.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Luceno (2006), Fitting generalized Pareto distribution data using maximum goodness--fit estimators. Computational Statistics Data Analysis, 51, 904-917, doi:10.1016/j.csda.2005.09.011 . Stephens MA (1986), Tests based edf statistics. Goodness--fit techniques (D'Agostino RB Stephens MA, eds), Marcel Dekker, New York, pp. 97-194. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mgedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum goodness-of-fit fit of univariate continuous distributions — mgedist","text":"","code":"# (1) Fit of a Weibull distribution to serving size data by maximum # goodness-of-fit estimation using all the distances available # data(groundbeef) serving <- groundbeef$serving mgedist(serving, \"weibull\", gof=\"CvM\") #> $estimate #> shape scale #> 2.093204 82.660014 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.6556672 #> #> $hessian #> shape scale #> shape 4.05295367 0.09244476 #> scale 0.09244476 0.02418777 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.623 #> #> $gof #> [1] \"CvM\" #> mgedist(serving, \"weibull\", gof=\"KS\") #> $estimate #> shape scale #> 2.065634 81.450487 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.112861 #> #> $hessian #> shape scale #> shape 122.668263 6.509057 #> scale 6.509057 7.599584 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 127 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.975 #> #> $gof #> [1] \"KS\" #> mgedist(serving, \"weibull\", gof=\"AD\") #> $estimate #> shape scale #> 2.125473 82.890260 #> #> $convergence #> [1] 0 #> #> $value #> [1] 3.501035 #> #> $hessian #> shape scale #> shape 29.4165108 0.1823375 #> scale 0.1823375 0.1354409 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 63 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.392 #> #> $gof #> [1] \"AD\" #> mgedist(serving, \"weibull\", gof=\"ADR\") #> $estimate #> shape scale #> 2.072087 82.761868 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1.610479 #> #> $hessian #> shape scale #> shape 13.5240921 -0.33242262 #> scale -0.3324226 0.07977375 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.836 #> #> $gof #> [1] \"ADR\" #> mgedist(serving, \"weibull\", gof=\"ADL\") #> $estimate #> shape scale #> 2.197498 82.016005 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1.845939 #> #> $hessian #> shape scale #> shape 15.3272022 0.54407116 #> scale 0.5440712 0.05549883 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 65 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1255.415 #> #> $gof #> [1] \"ADL\" #> mgedist(serving, \"weibull\", gof=\"AD2R\") #> $estimate #> shape scale #> 1.90328 81.33464 #> #> $convergence #> [1] 0 #> #> $value #> [1] 11.56415 #> #> $hessian #> shape scale #> shape 334.61081 -10.4227495 #> scale -10.42275 0.5223167 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 67 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1259.112 #> #> $gof #> [1] \"AD2R\" #> mgedist(serving, \"weibull\", gof=\"AD2L\") #> $estimate #> shape scale #> 2.483836 78.252113 #> #> $convergence #> [1] 0 #> #> $value #> [1] 9.786977 #> #> $hessian #> shape scale #> shape 113.511932 4.1108355 #> scale 4.110836 0.2341312 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 69 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1265.933 #> #> $gof #> [1] \"AD2L\" #> mgedist(serving, \"weibull\", gof=\"AD2\") #> $estimate #> shape scale #> 2.081168 85.281194 #> #> $convergence #> [1] 0 #> #> $value #> [1] 26.95166 #> #> $hessian #> shape scale #> shape 534.9606 -10.5940982 #> scale -10.5941 0.7606462 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 67 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1256.313 #> #> $gof #> [1] \"AD2\" #> # (2) Fit of a uniform distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # set.seed(1234) u <- runif(100,min=5,max=10) mgedist(u,\"unif\",gof=\"CvM\") #> $estimate #> min max #> 4.788260 9.568912 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.1142423 #> #> $hessian #> min max #> min 2.906956 1.461523 #> max 1.461523 2.570923 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 59 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"CvM\" #> mgedist(u,\"unif\",gof=\"KS\") #> $estimate #> min max #> 4.664535 9.463995 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.08 #> #> $hessian #> min max #> min 43.06566 -33.35097 #> max -33.35097 -61.06933 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 29 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"KS\" #> # (3) Fit of a triangular distribution using Cramer-von Mises or # Kolmogorov-Smirnov distance # # \\donttest{ require(mc2d) set.seed(1234) t <- rtriang(100,min=5,mode=6,max=10) mgedist(t,\"triang\",start = list(min=4, mode=6,max=9),gof=\"CvM\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. #> $estimate #> min mode max #> 5.051036 5.796428 9.391579 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.06428299 #> #> $hessian #> min mode max #> min 3.051858 3.248860 1.522501 #> mode 3.248860 3.821007 1.800899 #> max 1.522501 1.800899 1.593900 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 106 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"CvM\" #> mgedist(t,\"triang\",start = list(min=4, mode=6,max=9),gof=\"KS\") #> Warning: Some parameter names have no starting/fixed value but have a default value: mean. #> $estimate #> min mode max #> 4.939094 5.813200 9.248592 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.06191245 #> #> $hessian #> min mode max #> min 158.93759 158.9436 70.39038 #> mode 158.94358 199.0473 70.39510 #> max 70.39038 70.3951 106.08995 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 268 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -Inf #> #> $gof #> [1] \"KS\" #> # } # (4) scaling problem # the simulated dataset (below) has particularly small values, hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 6:0) cat(i, try(mgedist(x*10^i,\"cauchy\")$estimate, silent=TRUE), \"\\n\") #> 6 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 5 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 4 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 3 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 2 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 1 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 0 Error in eval(expr, envir, enclos) : object 'x' not found #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum likelihood fit of univariate distributions — mledist","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Fit univariate distributions using maximum likelihood censored non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum likelihood fit of univariate distributions — mledist","text":"","code":"mledist(data, distr, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum likelihood fit of univariate distributions — mledist","text":"data numeric vector non censored data dataframe two columns respectively named left right, describing observed value interval censored data. case left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution corresponding density function dname corresponding distribution function pname must classically defined. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see details). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. optim.method \"default\" (see details) optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying MLE optimisation (see details). weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MLE used, otherwise ordinary MLE. silent logical remove show warnings bootstraping. gradient function return gradient log-likelihood \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum likelihood fit of univariate distributions — mledist","text":"function intended called directly internally called fitdist bootdist used maximum likelihood method fitdistcens bootdistcens. assumed distr argument specifies distribution probability density function cumulative distribution function (d, p). quantile function random generator function (q, r) may needed function mmedist, qmedist, mgedist, fitdist,fitdistcens, bootdistcens bootdist. following named distributions, reasonable starting values computed start omitted (.e. NULL) : \"norm\", \"lnorm\", \"exp\" \"pois\", \"cauchy\", \"gamma\", \"logis\", \"nbinom\" (parametrized mu size), \"geom\", \"beta\", \"weibull\" stats package; \"invgamma\", \"llogis\", \"invweibull\", \"pareto1\", \"pareto\", \"lgamma\", \"trgamma\", \"invtrgamma\" actuar package. Note starting values may good enough fit poor. function uses closed-form formula fit uniform distribution. start list, named list names d,p,q,r functions chosen distribution. start function data, function return named list names d,p,q,r functions chosen distribution. mledist function allows user set fixed values parameters. start, fix.arg list, named list names d,p,q,r functions chosen distribution. fix.arg function data, function return named list names d,p,q,r functions chosen distribution. custom.optim=NULL (default), maximum likelihood estimations distribution parameters computed R base optim constrOptim. finite bounds (lower=-Inf upper=Inf) supplied, optim used method specified optim.method. Note optim.method=\"default\" means optim.method=\"Nelder-Mead\" distributions least two parameters optim.method=\"BFGS\" distributions one parameter. finite bounds supplied (among lower upper) gradient != NULL, constrOptim used. finite bounds supplied (among lower upper) gradient == NULL, constrOptim used optim.method=\"Nelder-Mead\"; optim used optim.method=\"L-BFGS-B\" \"Brent\"; case, error raised (behavior constrOptim). errors raised optim, good idea start adding traces optimization process adding control=list(trace=1, REPORT=1). custom.optim NULL, user-supplied function used instead R base optim. custom.optim must (least) following arguments fn function optimized, par initialized parameters. Internally function optimized also arguments, obs observations ddistname distribution name non censored data (Beware potential conflicts optional arguments custom.optim). assumed custom.optim carry MINIMIZATION. Finally, return least following components par estimate, convergence convergence code, value fn(par), hessian, counts number calls (function gradient) message (default NULL) error message custom.optim raises error, see returned value optim. See examples fitdist fitdistcens. Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary MLE carried , otherwise specified weights used balance log-likelihood contributions. yet possible take account weights functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat, descdist, bootdist, bootdistcens mgedist. (developments planned future). NB: data values particularly small large, scaling may needed optimization process. See Example (7).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum likelihood fit of univariate distributions — mledist","text":"mledist returns list following components, estimate parameter estimates. convergence integer code convergence optim/constrOptim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. used fitdist estimate standard errors. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. method \"closed formula\" appropriate otherwise NULL.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Venables WN Ripley BD (2002), Modern applied statistics S. Springer, New York, pp. 435-446, doi:10.1007/978-0-387-21706-2 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum likelihood fit of univariate distributions — mledist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mledist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum likelihood fit of univariate distributions — mledist","text":"","code":"# (1) basic fit of a normal distribution with maximum likelihood estimation # set.seed(1234) x1 <- rnorm(n=100) mledist(x1,\"norm\") #> $estimate #> mean sd #> -0.1567617 0.9993707 #> #> $convergence #> [1] 0 #> #> $value #> [1] 141.8309 #> #> $hessian #> mean sd #> mean 100.126 0.0000 #> sd 0.000 200.2538 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 43 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -141.8309 #> #> $vcov #> NULL #> # (2) defining your own distribution functions, here for the Gumbel distribution # for other distributions, see the CRAN task view dedicated to probability distributions dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) mledist(x1,\"gumbel\",start=list(a=10,b=5)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (3) fit of a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) mledist(x2,\"pois\") #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> [1] 46.18434 #> #> $hessian #> lambda #> lambda 17.64707 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 6 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $vcov #> NULL #> # (4) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) mledist(x3,\"beta\") #> $estimate #> shape1 shape2 #> 4.859798 10.918841 #> #> $convergence #> [1] 0 #> #> $value #> [1] -78.33052 #> #> $hessian #> shape1 shape2 #> shape1 16.295311 -6.542753 #> shape2 -6.542753 3.047900 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 47 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 78.33052 #> #> $vcov #> NULL #> # (5) fit frequency distributions on USArrests dataset. # x4 <- USArrests$Assault mledist(x4, \"pois\") #> $estimate #> lambda #> 170.76 #> #> $convergence #> [1] 0 #> #> $value #> [1] 1211.705 #> #> $hessian #> lambda #> lambda 0.2928087 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 3 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1211.705 #> #> $vcov #> NULL #> mledist(x4, \"nbinom\") #> $estimate #> size mu #> 3.822579 170.747853 #> #> $convergence #> [1] 0 #> #> $value #> [1] 290.3297 #> #> $hessian #> size mu #> size 1.759308e+00 -1.993783e-05 #> mu -1.993783e-05 6.413003e-03 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 47 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -290.3297 #> #> $vcov #> NULL #> # (6) fit a continuous distribution (Gumbel) to censored data. # data(fluazinam) log10EC50 <-log10(fluazinam) # definition of the Gumbel distribution dgumbel <- function(x,a,b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) pgumbel <- function(q,a,b) exp(-exp((a-q)/b)) qgumbel <- function(p,a,b) a-b*log(-log(p)) mledist(log10EC50,\"gumbel\",start=list(a=0,b=2),optim.method=\"Nelder-Mead\") #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (7) scaling problem # the simulated dataset (below) has particularly small values, # hence without scaling (10^0), # the optimization raises an error. The for loop shows how scaling by 10^i # for i=1,...,6 makes the fitting procedure work correctly. set.seed(1234) x2 <- rnorm(100, 1e-4, 2e-4) for(i in 6:0) cat(i, try(mledist(x*10^i, \"cauchy\")$estimate, silent=TRUE), \"\\n\") #> 6 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 5 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 4 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 3 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 2 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 1 Error in eval(expr, envir, enclos) : object 'x' not found #> #> 0 Error in eval(expr, envir, enclos) : object 'x' not found #> # (17) small example for the zero-modified geometric distribution # dzmgeom <- function(x, p1, p2) p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) #pdf x2 <- c(2, 4, 0, 40, 4, 21, 0, 0, 0, 2, 5, 0, 0, 13, 2) #simulated dataset initp1 <- function(x) list(p1=mean(x == 0)) #init as MLE mledist(x2, \"zmgeom\", fix.arg=initp1, start=list(p2=1/2)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'."},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Matching moment fit of univariate distributions — mmedist","title":"Matching moment fit of univariate distributions — mmedist","text":"Fit univariate distributions matching moments (raw centered) non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Matching moment fit of univariate distributions — mmedist","text":"","code":"mmedist(data, distr, order, memp, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Matching moment fit of univariate distributions — mmedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution (see 'details'). order numeric vector moment order(s). length vector must equal number parameters estimate. memp function implementing empirical moments, raw centered consistent distr argument (weights argument). See details . start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization . weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MME used, otherwise ordinary MME. silent logical remove show warnings bootstraping. gradient function return gradient squared difference \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Matching moment fit of univariate distributions — mmedist","text":"argument distr can one base R distributions: \"norm\", \"lnorm\", \"exp\" \"pois\", \"gamma\", \"logis\", \"nbinom\" , \"geom\", \"beta\" \"unif\". case, arguments data distr required, estimate computed closed-form formula. distributions characterized one parameter (\"geom\", \"pois\" \"exp\"), parameter simply estimated matching theoretical observed means, distributions characterized two parameters, parameters estimated matching theoretical observed means variances (Vose, 2000). Note closed-form formula, fix.arg used start ignored. argument distr can also distribution name long corresponding mdistr function exists, e.g. \"pareto\" \"mpareto\" exists. case arguments arguments order memp supplied order carry matching numerically, minimization sum squared differences observed theoretical moments. Optionnally arguments can supplied control optimization (see 'details' section mledist details arguments control optimization). case, fix.arg can used start taken account. non closed-form estimators, memp must provided compute empirical moments. weights=NULL, function must two arguments x, order: x numeric vector data order order moment. weights!=NULL, function must three arguments x, order, weights: x numeric vector data, order order moment, weights numeric vector weights. See examples . Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary MME carried , otherwise specified weights used compute (raw centered) weighted moments. closed-form estimators, weighted mean variance computed wtdmean wtdvar Hmisc package. numerical minimization used, weighted expected computed memp function. yet possible take account weighths functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). function intended called directly internally called fitdist bootdist used matching moments method. Since Version 1.2-0, mmedist automatically computes asymptotic covariance matrix using . Ibragimov R. 'minskii (1981), hence theoretical moments mdist defined order equals twice maximal order given order. instance, normal distribution, fit expectation variance need mnorm order \\(2\\times2=4\\).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Matching moment fit of univariate distributions — mmedist","text":"mmedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function (appropriate) name optimization function used maximum likelihood. optim.method (appropriate) optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. method either \"closed formula\" name optimization method. order order moment(s) matched. memp empirical moment function.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Matching moment fit of univariate distributions — mmedist","text":". Ibragimov R. 'minskii (1981), Statistical Estimation - Asymptotic Theory, Springer-Verlag, doi:10.1007/978-1-4899-0027-2 Evans M, Hastings N Peacock B (2000), Statistical distributions. John Wiley Sons Inc, doi:10.1002/9780470627242 . Vose D (2000), Risk analysis, quantitative guide. John Wiley & Sons Ltd, Chischester, England, pp. 99-143. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Matching moment fit of univariate distributions — mmedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/mmedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Matching moment fit of univariate distributions — mmedist","text":"","code":"# (1) basic fit of a normal distribution with moment matching estimation # set.seed(1234) n <- 100 x1 <- rnorm(n=n) mmedist(x1, \"norm\") #> $estimate #> mean sd #> -0.1567617 0.9993707 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] -141.8309 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 2 #> #> $memp #> NULL #> #> $vcov #> NULL #> #weighted w <- c(rep(1, n/2), rep(10, n/2)) mmedist(x1, \"norm\", weights=w)$estimate #> Warning: weights are not taken into account in the default initial values #> mean sd #> 0.08565839 1.02915474 # (2) fit a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) mmedist(x2, \"pois\") #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 #> #> $memp #> NULL #> #> $vcov #> NULL #> # (3) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) mmedist(x3, \"beta\") #> $estimate #> shape1 shape2 #> 4.522734 10.219685 #> #> $convergence #> [1] 0 #> #> $value #> NULL #> #> $hessian #> NULL #> #> $optim.function #> NULL #> #> $opt.meth #> NULL #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> NULL #> #> $optim.message #> NULL #> #> $loglik #> [1] 78.19503 #> #> $method #> [1] \"closed formula\" #> #> $order #> [1] 1 2 #> #> $memp #> NULL #> #> $vcov #> NULL #> # (4) fit a Pareto distribution # # \\donttest{ require(actuar) #simulate a sample x4 <- rpareto(1000, 6, 2) #empirical raw moment memp <- function(x, order) mean(x^order) memp2 <- function(x, order, weights) sum(x^order * weights)/sum(weights) #fit by MME mmedist(x4, \"pareto\", order=c(1, 2), memp=memp, start=list(shape=10, scale=10), lower=1, upper=Inf) #> $estimate #> shape scale #> 4.560423 1.464764 #> #> $convergence #> [1] 0 #> #> $value #> [1] 6.740714e-13 #> #> $hessian #> NULL #> #> $optim.function #> [1] \"constrOptim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 404 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -80.49091 #> #> $method #> [1] \"default\" #> #> $order #> [1] 1 2 #> #> $memp #> function(x, order) mean(x^order) #> #> #> $vcov #> NULL #> #fit by weighted MME w <- rep(1, length(x4)) w[x4 < 1] <- 2 mmedist(x4, \"pareto\", order=c(1, 2), memp=memp2, weights=w, start=list(shape=10, scale=10), lower=1, upper=Inf) #> Warning: weights are not taken into account in the default initial values #> $estimate #> shape scale #> 5.656694 1.630806 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.09812e-14 #> #> $hessian #> NULL #> #> $optim.function #> [1] \"constrOptim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> [1] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [38] 2 2 2 1 2 2 1 2 2 2 1 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [75] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [112] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 #> [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 #> [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 #> [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 #> [260] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 #> [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [334] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 2 #> [371] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 #> [408] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 #> [445] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 #> [482] 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 #> [519] 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 #> [556] 2 2 2 2 1 2 2 1 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [593] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [630] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 #> [667] 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 1 2 #> [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [741] 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 #> [778] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [815] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [852] 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2 #> [889] 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 #> [926] 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 #> [963] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 #> [1000] 2 #> #> $counts #> function gradient #> 767 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 119.7361 #> #> $method #> [1] \"default\" #> #> $order #> [1] 1 2 #> #> $memp #> function(x, order, weights) sum(x^order * weights)/sum(weights) #> #> #> $vcov #> NULL #> # }"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Maximum spacing estimation of univariate distributions — msedist","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Fit univariate distribution maximizing (log) spacings non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Maximum spacing estimation of univariate distributions — msedist","text":"","code":"msedist(data, distr, phidiv=\"KL\", power.phidiv=NULL, start = NULL, fix.arg = NULL, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights=NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Maximum spacing estimation of univariate distributions — msedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. phidiv character string coding name phi-divergence used : \"KL\" Kullback-Leibler information (corresponds classic maximum spacing estimation), \"J\" Jeffreys' divergence, \"R\" Renyi's divergence, \"H\" Hellinger distance, \"V\" Vajda's measure information, see details. power.phidiv relevant, numeric power used phi-divergence : NULL phidiv=\"KL\" phidiv=\"J\" , positive different 1 phidiv=\"R\", greater equal 1 phidiv=\"H\" phidiv=\"V\", see details. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted MSE used, otherwise ordinary MSE. silent logical remove show warnings bootstraping. gradient function return gradient gof distance \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Maximum spacing estimation of univariate distributions — msedist","text":"msedist function numerically maximizes phi-divergence function spacings, spacings differences cumulative distribution function evaluated sorted dataset. classical maximum spacing estimation (MSE) introduced Cheng Amin (1986) Ranneby (1984) independently phi-diverence logarithm, see Anatolyev Kosenok (2005) link MSE maximum likelihood estimation. MSE generalized Ranneby Ekstrom (1997) allowing different phi-divergence function. Generalized MSE maximizes $$ S_n(\\theta)=\\frac{1}{n+1}\\sum_{=1}^{n+1} \\phi\\left(F(x_{()}; \\theta)-F(x_{(-1)}; \\theta) \\right), $$ \\(F(;\\theta)\\) parametric distribution function fitted, \\(\\phi\\) phi-divergence function, \\(x_{(1)}<\\dots0, \\alpha\\neq 1 $$ Hellinger distance (phidiv=\"H\" power.phidiv=p) $$\\phi(x)=-|1-x^{1/p}|^p \\textrm{ } p\\ge 1 $$ Vajda's measure information (phidiv=\"V\" power.phidiv=beta) $$\\phi(x)=-|1-x|^\\beta \\textrm{ } \\beta\\ge 1 $$ optimization process mledist, see 'details' section function. function intended called directly internally called fitdist bootdist. function intended used non-censored data. NB: data values particularly small large, scaling may needed optimization process, see mledist's examples.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Maximum spacing estimation of univariate distributions — msedist","text":"msedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. phidiv character string coding name phi-divergence used either \"KL\", \"J\", \"R\", \"H\" \"V\". power.phidiv Either NULL numeric power used phi-divergence.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Anatolyev, S., Kosenok, G. (2005). alternative maximum likelihood based spacings. Econometric Theory, 21(2), 472-476, doi:10.1017/S0266466605050255 . Cheng, R.C.H. N..K. Amin (1983) Estimating parameters continuous univariate distributions shifted origin. Journal Royal Statistical Society Series B 45, 394-403, doi:10.1111/j.2517-6161.1983.tb01268.x . Ranneby, B. (1984) maximum spacing method: estimation method related maximum likelihood method. Scandinavian Journal Statistics 11, 93-112. Ranneby, B. Ekstroem, M. (1997). Maximum spacing estimates based different metrics. Umea universitet.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Maximum spacing estimation of univariate distributions — msedist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/msedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Maximum spacing estimation of univariate distributions — msedist","text":"","code":"# (1) Fit of a Weibull distribution to serving size data by maximum # spacing estimation # data(groundbeef) serving <- groundbeef$serving msedist(serving, \"weibull\") #> $estimate #> shape scale #> 1.423799 80.894950 #> #> $convergence #> [1] 0 #> #> $value #> [1] 3.789824 #> #> $hessian #> shape scale #> shape 0.792656647 -0.0043440632 #> scale -0.004344063 0.0002995895 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 59 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -1287.97 #> #> $phidiv #> [1] \"KL\" #> #> $power.phidiv #> NULL #> # (2) Fit of an exponential distribution # set.seed(123) x1 <- rexp(1e3) #the convergence is quick msedist(x1, \"exp\", control=list(trace=0, REPORT=1)) #> $estimate #> rate #> 0.967625 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.516802 #> #> $hessian #> rate #> rate 1.066843 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 12 2 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1029.544 #> #> $phidiv #> [1] \"KL\" #> #> $power.phidiv #> NULL #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot of empirical and theoretical distributions for non-censored data — plotdist","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Plots empirical distribution (non-censored data) theoretical one specified.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"","code":"plotdist(data, distr, para, histo = TRUE, breaks = \"default\", demp = FALSE, discrete, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. argument may omitted para omitted. para named list giving parameters named distribution. argument may omitted distr omitted. histo logical plot histogram using hist function. breaks \"default\" histogram plotted function hist default breaks definition. Else breaks passed function hist. argument taken account discrete TRUE. demp logical plot empirical density first plot (alone superimposed histogram depending value argument histo) using density function. discrete TRUE, distribution considered discrete. \tdistr discrete missing, discrete set \tFALSE. discrete missing distr, \tdiscrete set TRUE distr belongs \t\"binom\", \"nbinom\",\"geom\", \"hyper\" \"pois\". ... graphical arguments passed graphical functions used plotdist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Empirical , specified, theoretical distributions plotted density cdf. plot density, user can use arguments histo demp specify wants histogram using function hist, density plot using function density, (least one two arguments must put \"TRUE\"). continuous distributions, function hist used default breaks definition breaks \"default\" passing breaks argument differs \"default\". continuous distribution theoretical distribution specified arguments distname para, Q-Q plot (plot quantiles theoretical fitted distribution (x-axis) empirical quantiles data) P-P plot (.e. value data set, plot cumulative density function fitted distribution (x-axis) empirical cumulative density function (y-axis)) also given (Cullen Frey, 1999). function ppoints (default parameter argument ) used Q-Q plot, generate set probabilities evaluate inverse distribution. NOTE VERSION 0.4-3, ppoints also used P-P plot cdf plot continuous data. personalize four plots proposed continuous data, example change plotting position, recommend use functions cdfcomp, denscomp, qqcomp ppcomp.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Cullen AC Frey HC (1999), Probabilistic techniques exposure assessment. Plenum Press, USA, pp. 81-155. Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot of empirical and theoretical distributions for non-censored data — plotdist","text":"","code":"# (1) Plot of an empirical distribution with changing # of default line types for CDF and colors # and optionally adding a density line # set.seed(1234) x1 <- rnorm(n=30) plotdist(x1) plotdist(x1,demp = TRUE) plotdist(x1,histo = FALSE, demp = TRUE) #> Warning: arguments ‘freq’, ‘main’, ‘xlab’ are not made use of plotdist(x1, col=\"blue\", type=\"b\", pch=16) #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete plotdist(x1, type=\"s\") #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete # (2) Plot of a discrete distribution against data # set.seed(1234) x2 <- rpois(n=30, lambda = 2) plotdist(x2, discrete=TRUE) plotdist(x2, \"pois\", para=list(lambda = mean(x2))) plotdist(x2, \"pois\", para=list(lambda = mean(x2)), lwd=\"2\") # (3) Plot of a continuous distribution against data # xn <- rnorm(n=100, mean=10, sd=5) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn))) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), pch=16) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), demp = TRUE) plotdist(xn, \"norm\", para=list(mean=mean(xn), sd=sd(xn)), histo = FALSE, demp = TRUE) # (4) Plot of serving size data # data(groundbeef) plotdist(groundbeef$serving, type=\"s\") #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete #> Warning: graphical parameter \"type\" is obsolete # (5) Plot of numbers of parasites with a Poisson distribution data(toxocara) number <- toxocara$number plotdist(number, discrete = TRUE) plotdist(number,\"pois\",para=list(lambda=mean(number)))"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot of empirical and theoretical distributions for censored data — plotdistcens","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Plots empirical distribution censored data theoretical one specified.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"","code":"plotdistcens(censdata, distr, para, leftNA = -Inf, rightNA = Inf, NPMLE = TRUE, Turnbull.confint = FALSE, NPMLE.method = \"Wang\", ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"censdata dataframe two columns respectively named left right, describing observed value interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations. distr character string \"name\" naming distribution, corresponding density function dname corresponding distribution function pname must defined, directly density function. para named list giving parameters named distribution. argument may omitted distr omitted. leftNA real value left bound left censored observations : -Inf finite value 0 positive data example. rightNA real value right bound right censored observations : Inf finite value realistic maximum value. NPMLE TRUE NPMLE (nonparametric maximum likelihood estimate) technique used estimate cdf curve censored data previous arguments leftNA rightNA used (see details) Turnbull.confint TRUE confidence intervals added Turnbull plot. case NPMLE.method forced \"Turnbull.middlepoints\" NPMLE.method Three NPMLE techniques provided, \"Wang\", default one, rewritten package npsurv using function constrOptim package stats optimisation, \"Turnbull.middlepoints\", older one implemented package survival \"Turnbull.intervals\" uses Turnbull algorithm package survival associates interval equivalence class instead middlepoint interval (see details). \"Wang\" \"Turnbull.intervals\" enable derivation Q-Q plot P-P plot. ... graphical arguments passed methods. title plot can modified using argument main CDF plot.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"NPMLE TRUE, NPMLE.method \"Wang\" , empirical distributions plotted cdf using either constrained Newton method (Wang, 2008) hierarchical constrained Newton method (Wang, 2013) compute overall empirical cdf curve. NPMLE TRUE, NPMLE.method \"Turnbull.intervals\" , empirical plotted cdf using EM approach Turnbull (Turnbull, 1974). two cases, grey rectangles represent areas empirical distribution function unique. cases theoretical distribution specified, two goodness--fit plots also provided, Q-Q plot (plot quantiles theoretical fitted distribution (x-axis) empirical quantiles data) P-P plot (.e. value data set, plot cumulative density function fitted distribution (x-axis) empirical cumulative density function (y-axis)). Grey rectangles Q-Q plot P-P plot also represent areas non uniqueness empirical quantiles probabilities, directly derived non uniqueness areas empirical cumulative distribution. NPMLE TRUE, NPMLE.method \"Turnbull.middlepoints\", empirical , specified, theoretical distributions plotted cdf using EM approach Turnbull (Turnbull, 1974) compute overall empirical cdf curve, confidence intervals Turnbull.confint TRUE, calls functions survfit plot.survfit survival package. NPMLE FALSE empirical , specified, theoretical distributions plotted cdf, data directly reported segments interval, left right censored data, points non-censored data. plotting, observations ordered rank r associated . Left censored observations ordered first, right bounds. Interval censored non censored observations ordered mid-points , last, right censored observations ordered left bounds. leftNA (resp. rightNA) finite, left censored (resp. right censored) observations considered interval censored observations ordered mid-points non-censored interval censored data. sometimes necessary fix rightNA leftNA realistic extreme value, even exactly known, obtain reasonable global ranking observations. ranking, n observations plotted point (one x-value) segment (interval possible x-values), y-value equal r/n, r rank observation global ordering previously described. second method may interesting certainly less rigorous methods prefered.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Turnbull BW (1974), Nonparametric estimation survivorship function doubly censored data. Journal American Statistical Association, 69, 169-173, doi:10.2307/2285518 . Wang Y (2008), Dimension-reduced nonparametric maximum likelihood computation interval-censored data. Computational Statistics & Data Analysis, 52, 2388-2402, doi:10.1016/j.csda.2007.10.018 . Wang Y Taylor SM (2013), Efficient computation nonparametric survival functions via hierarchical mixture formulation. Statistics Computing, 23, 713-725, doi:10.1007/s11222-012-9341-9 . Wang, Y., & Fani, S. (2018), Nonparametric maximum likelihood computation U-shaped hazard function. Statistics Computing, 28(1), 187-200, doi:10.1007/s11222-017-9724-z . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/plotdistcens.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot of empirical and theoretical distributions for censored data — plotdistcens","text":"","code":"# (1) Plot of an empirical censored distribution (censored data) as a CDF # using the default Wang method # data(smokedfish) d1 <- as.data.frame(log10(smokedfish)) plotdistcens(d1) # (2) Add the CDF of a normal distribution # plotdistcens(d1, \"norm\", para=list(mean = -1.6, sd = 1.5)) # (3) Various plots of the same empirical distribution # # default Wang plot with representation of equivalence classess plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Wang\") # same plot but using the Turnbull alorithm from the package survival plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Wang\") # Turnbull plot with middlepoints (as in the package survival) plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Turnbull.middlepoints\") # Turnbull plot with middlepoints and confidence intervals plotdistcens(d1, NPMLE = TRUE, NPMLE.method = \"Turnbull.middlepoints\", Turnbull.confint = TRUE) # with intervals and points plotdistcens(d1,rightNA=3, NPMLE = FALSE) # with intervals and points # defining a minimum value for left censored values plotdistcens(d1,leftNA=-3, NPMLE = FALSE)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":null,"dir":"Reference","previous_headings":"","what":"Pre-fitting procedure — prefit","title":"Pre-fitting procedure — prefit","text":"Search good starting values","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pre-fitting procedure — prefit","text":"","code":"prefit(data, distr, method = c(\"mle\", \"mme\", \"qme\", \"mge\"), feasible.par, memp=NULL, order=NULL, probs=NULL, qtype=7, gof=NULL, fix.arg=NULL, lower, upper, weights=NULL, silent=TRUE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pre-fitting procedure — prefit","text":"data numeric vector. distr character string \"name\" naming distribution corresponding density function dname, corresponding distribution function pname corresponding quantile function qname must defined, directly density function. method character string coding fitting method: \"mle\" 'maximum likelihood estimation', \"mme\" 'moment matching estimation', \"qme\" 'quantile matching estimation' \"mge\" 'maximum goodness--fit estimation'. feasible.par named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). may account closed-form formulas. order numeric vector moment order(s). length vector must equal number parameters estimate. memp function implementing empirical moments, raw centered consistent distr argument (weights argument). probs numeric vector probabilities quantile matching done. length vector must equal number parameters estimate. qtype quantile type used R quantile function compute empirical quantiles, (default 7 corresponds default quantile method R). gof character string coding name goodness--fit distance used : \"CvM\" Cramer-von Mises distance,\"KS\" Kolmogorov-Smirnov distance, \"AD\" Anderson-Darling distance, \"ADR\", \"ADL\", \"AD2R\", \"AD2L\" \"AD2\" variants Anderson-Darling distance described Luceno (2006). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated maximum likelihood procedure. use argument possible method=\"mme\" closed-form formula used. weights optional vector weights used fitting process. NULL numeric vector. non-NULL, weighted MLE used, otherwise ordinary MLE. silent logical remove show warnings. lower Lower bounds parameters. upper Upper bounds parameters. ... arguments passed generic functions, one functions \"mledist\", \"mmedist\", \"qmedist\" \"mgedist\" depending chosen method. See mledist, mmedist, qmedist, mgedist details parameter estimation.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Pre-fitting procedure — prefit","text":"Searching good starting values achieved transforming parameters (constraint interval real line) probability distribution. Indeed, positive parameters \\((0,Inf)\\) transformed using logarithm (typically scale parameter sd normal distribution, see Normal), parameters \\((1,Inf)\\) transformed using function \\(log(x-1)\\), probability parameters \\((0,1)\\) transformed using logit function \\(log(x/(1-x))\\) (typically parameter prob geometric distribution, see Geometric), negative probability parameters \\((-1,0)\\) transformed using function \\(log(-x/(1+x))\\), real parameters course transformed , typically mean normal distribution, see Normal. parameters transformed, optimization carried quasi-Newton algorithm (typically BFGS) transform back original parameter value.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pre-fitting procedure — prefit","text":"named list.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Pre-fitting procedure — prefit","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Pre-fitting procedure — prefit","text":"Christophe Dutang Marie-Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/prefit.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Pre-fitting procedure — prefit","text":"","code":"# (1) fit of a gamma distribution by maximum likelihood estimation # x <- rgamma(1e3, 5/2, 7/2) prefit(x, \"gamma\", \"mle\", list(shape=3, scale=3), lower=-Inf, upper=Inf) #> $shape #> [1] 2.57829 #> #> $scale #> [1] 3.559245 #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":null,"dir":"Reference","previous_headings":"","what":"Quantile matching fit of univariate distributions — qmedist","title":"Quantile matching fit of univariate distributions — qmedist","text":"Fit univariate distribution matching quantiles non censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Quantile matching fit of univariate distributions — qmedist","text":"","code":"qmedist(data, distr, probs, start = NULL, fix.arg = NULL, qtype = 7, optim.method = \"default\", lower = -Inf, upper = Inf, custom.optim = NULL, weights = NULL, silent = TRUE, gradient = NULL, checkstartfix=FALSE, calcvcov=FALSE, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Quantile matching fit of univariate distributions — qmedist","text":"data numeric vector non censored data. distr character string \"name\" naming distribution corresponding quantile function qname corresponding density distribution dname must classically defined. probs numeric vector probabilities quantile matching done. length vector must equal number parameters estimate. start named list giving initial values parameters named distribution function data computing initial values returning named list. argument may omitted (default) distributions reasonable starting values computed (see 'details' section mledist). fix.arg optional named list giving values fixed parameters named distribution function data computing (fixed) parameter values returning named list. Parameters fixed value thus estimated. qtype quantile type used R quantile function compute empirical quantiles, (default 7 corresponds default quantile method R). optim.method \"default\" optimization method pass optim. lower Left bounds parameters \"L-BFGS-B\" method (see optim). upper Right bounds parameters \"L-BFGS-B\" method (see optim). custom.optim function carrying optimization. weights optional vector weights used fitting process. NULL numeric vector strictly positive integers (typically number occurences observation). non-NULL, weighted QME used, otherwise ordinary QME. silent logical remove show warnings bootstraping. gradient function return gradient squared difference \"BFGS\", \"CG\" \"L-BFGS-B\" methods. NULL, finite-difference approximation used, see details. checkstartfix logical test starting fixed values. change . calcvcov logical indicating (asymptotic) covariance matrix required. (currently ignored) ... arguments passed optim, constrOptim custom.optim function.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Quantile matching fit of univariate distributions — qmedist","text":"qmedist function carries quantile matching numerically, minimization sum squared differences observed theoretical quantiles. Note discrete distribution, sum squared differences step function consequently, optimum unique, see FAQ. optimization process mledist, see 'details' section function. Optionally, vector weights can used fitting process. default (weigths=NULL), ordinary QME carried , otherwise specified weights used compute weighted quantiles used squared differences. Weigthed quantiles computed wtdquantile Hmisc package. yet possible take account weighths functions plotdist, plotdistcens, plot.fitdist, plot.fitdistcens, cdfcomp, cdfcompcens, denscomp, ppcomp, qqcomp, gofstat descdist (developments planned future). function intended called directly internally called fitdist bootdist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Quantile matching fit of univariate distributions — qmedist","text":"qmedist returns list following components, estimate parameter estimates. convergence integer code convergence optim defined defined user user-supplied optimization function. 0 indicates successful convergence. 1 indicates iteration limit optim reached. 10 indicates degeneracy Nealder-Mead simplex. 100 indicates optim encountered internal error. value minimal value reached criterion minimize. hessian symmetric matrix computed optim estimate Hessian solution found computed user-supplied optimization function. optim.function name optimization function used maximum likelihood. optim.method optim used, name algorithm used, field method custom.optim function otherwise. fix.arg named list giving values parameters named distribution must kept fixed rather estimated maximum likelihood NULL parameters. fix.arg.fun function used set value fix.arg NULL. weights vector weigths used estimation process NULL. counts two-element integer vector giving number calls log-likelihood function gradient respectively. excludes calls needed compute Hessian, requested, calls log-likelihood function compute finite-difference approximation gradient. counts returned optim user-supplied function set NULL. optim.message character string giving additional information returned optimizer, NULL. understand exactly message, see source code. loglik log-likelihood value. probs probability vector quantiles matched.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Quantile matching fit of univariate distributions — qmedist","text":"Klugman SA, Panjer HH Willmot GE (2012), Loss Models: Data Decissions, 4th edition. Wiley Series Statistics Finance, Business Economics, p. 253, doi:10.1198/tech.2006.s409 . Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Quantile matching fit of univariate distributions — qmedist","text":"Christophe Dutang Marie Laure Delignette-Muller.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/qmedist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Quantile matching fit of univariate distributions — qmedist","text":"","code":"# (1) basic fit of a normal distribution # set.seed(1234) x1 <- rnorm(n=100) qmedist(x1, \"norm\", probs=c(1/3, 2/3)) #> $estimate #> mean sd #> -0.3025734 0.8521385 #> #> $convergence #> [1] 0 #> #> $value #> [1] 4.855518e-10 #> #> $hessian #> mean sd #> mean 4.000000e+00 -5.569326e-14 #> sd -5.569326e-14 7.421040e-01 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 57 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -146.1278 #> #> $probs #> [1] 0.3333333 0.6666667 #> # (2) defining your own distribution functions, here for the Gumbel # distribution for other distributions, see the CRAN task view dedicated # to probability distributions dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) qgumbel <- function(p, a, b) a - b*log(-log(p)) qmedist(x1, \"gumbel\", probs=c(1/3, 2/3), start=list(a=10,b=5)) #> Error in checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, argddistname, hasnodefaultval): 'start' must specify names which are arguments to 'distr'. # (3) fit a discrete distribution (Poisson) # set.seed(1234) x2 <- rpois(n=30,lambda = 2) qmedist(x2, \"pois\", probs=1/2) #> $estimate #> lambda #> 1.7 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.25 #> #> $hessian #> lambda #> lambda 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 1 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -46.18434 #> #> $probs #> [1] 0.5 #> # (4) fit a finite-support distribution (beta) # set.seed(1234) x3 <- rbeta(n=100,shape1=5, shape2=10) qmedist(x3, \"beta\", probs=c(1/3, 2/3)) #> $estimate #> shape1 shape2 #> 5.820826 14.053655 #> #> $convergence #> [1] 0 #> #> $value #> [1] 7.779463e-12 #> #> $hessian #> shape1 shape2 #> shape1 0.005429533 -0.0021926587 #> shape2 -0.002192659 0.0008954389 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 89 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] 76.02016 #> #> $probs #> [1] 0.3333333 0.6666667 #> # (5) fit frequency distributions on USArrests dataset. # x4 <- USArrests$Assault qmedist(x4, \"pois\", probs=1/2) #> $estimate #> lambda #> 170.76 #> #> $convergence #> [1] 0 #> #> $value #> [1] 144 #> #> $hessian #> lambda #> lambda 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"BFGS\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 1 1 #> #> $optim.message #> NULL #> #> $loglik #> [1] -1211.705 #> #> $probs #> [1] 0.5 #> qmedist(x4, \"nbinom\", probs=c(1/3, 2/3)) #> $estimate #> size mu #> 2.518966 182.313344 #> #> $convergence #> [1] 0 #> #> $value #> [1] 0.2222222 #> #> $hessian #> size mu #> size 0 0 #> mu 0 0 #> #> $optim.function #> [1] \"optim\" #> #> $optim.method #> [1] \"Nelder-Mead\" #> #> $fix.arg #> NULL #> #> $fix.arg.fun #> NULL #> #> $weights #> NULL #> #> $counts #> function gradient #> 37 NA #> #> $optim.message #> NULL #> #> $loglik #> [1] -292.5969 #> #> $probs #> [1] 0.3333333 0.6666667 #>"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":null,"dir":"Reference","previous_headings":"","what":"Quantile estimation from a fitted distribution — quantile","title":"Quantile estimation from a fitted distribution — quantile","text":"Quantile estimation fitted distribution, optionally confidence intervals calculated bootstrap result.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Quantile estimation from a fitted distribution — quantile","text":"","code":"# S3 method for class 'fitdist' quantile(x, probs = seq(0.1, 0.9, by=0.1), ...) # S3 method for class 'fitdistcens' quantile(x, probs = seq(0.1, 0.9, by=0.1), ...) # S3 method for class 'bootdist' quantile(x, probs = seq(0.1, 0.9, by=0.1),CI.type = \"two.sided\", CI.level = 0.95, ...) # S3 method for class 'bootdistcens' quantile(x, probs = seq(0.1, 0.9, by=0.1),CI.type = \"two.sided\", CI.level = 0.95, ...) # S3 method for class 'quantile.fitdist' print(x, ...) # S3 method for class 'quantile.fitdistcens' print(x, ...) # S3 method for class 'quantile.bootdist' print(x, ...) # S3 method for class 'quantile.bootdistcens' print(x, ...)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Quantile estimation from a fitted distribution — quantile","text":"x object class \"fitdist\", \"fitdistcens\", \"bootdist\", \"bootdistcens\" \"quantile.fitdist\", \"quantile.fitdistcens\", \"quantile.bootdist\", \"quantile.bootdistcens\" print generic function. probs numeric vector probabilities values [0, 1] quantiles must calculated. CI.type Type confidence intervals : either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. ... arguments passed generic functions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Quantile estimation from a fitted distribution — quantile","text":"Quantiles parametric distribution calculated probability specified probs, using estimated parameters. used object class \"bootdist\" \"bootdistcens\", percentile confidence intervals medians etimates also calculated bootstrap result. CI.type two.sided, CI.level two-sided confidence intervals quantiles calculated. CI.type less greater, CI.level one-sided confidence intervals quantiles calculated. print functions show estimated quantiles percentile confidence intervals median estimates bootstrap resampling done previously, number bootstrap iterations estimation converges inferior whole number bootstrap iterations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Quantile estimation from a fitted distribution — quantile","text":"quantile returns list 2 components (first two described ) called object class \"fitdist\" \"fitdistcens\" 8 components (described ) called object class \"bootdist\" \"bootdistcens\" : quantiles dataframe containing estimated quantiles probability value specified argument probs (one row, many columns values probs). probs numeric vector probabilities quantiles calculated. bootquant data frame containing bootstraped values quantile (many rows, specified call bootdist argument niter, many columns values probs) quantCI CI.type two.sided, two bounds CI.level percent two.sided confidence interval quantile (two rows many columns values probs). CI.type less, right bound CI.level percent one.sided confidence interval quantile (one row). CI.type greater, left bound CI.level percent one.sided confidence interval quantile (one row). quantmedian Median bootstrap estimates (per probability). CI.type Type confidence interval: either \"two.sided\" one-sided intervals (\"less\" \"greater\"). CI.level confidence level. nbboot number samples drawn bootstrap. nbconverg number iterations optimization algorithm converges.","code":""},{"path":[]},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Quantile estimation from a fitted distribution — quantile","text":"Delignette-Muller ML Dutang C (2015), fitdistrplus: R Package Fitting Distributions. Journal Statistical Software, 64(4), 1-34, doi:10.18637/jss.v064.i04 .","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"Quantile estimation from a fitted distribution — quantile","text":"Marie-Laure Delignette-Muller Christophe Dutang.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/quantile.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Quantile estimation from a fitted distribution — quantile","text":"","code":"# (1) Fit of a normal distribution on acute toxicity log-transformed values of # endosulfan for nonarthropod invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, followed with calculations of their # confidence intervals with various definitions, from a small number of bootstrap # iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(endosulfan) ATV <- subset(endosulfan, group == \"NonArthroInvert\")$ATV log10ATV <- log10(subset(endosulfan, group == \"NonArthroInvert\")$ATV) fln <- fitdist(log10ATV, \"norm\") quantile(fln, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 bln <- bootdist(fln, bootmethod=\"param\", niter=101) quantile(bln, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.206058 1.615810 2.040136 #> 97.5 % 2.372660 2.617113 2.937556 quantile(bln, probs = c(0.05, 0.1, 0.2), CI.type = \"greater\") #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> left bound of one-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.299871 1.64396 2.126053 quantile(bln, probs = c(0.05, 0.1, 0.2), CI.level = 0.9) #> (original) estimated quantiles for each specified probability (non-censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.744227 2.080093 2.4868 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.831458 2.128334 2.515952 #> #> two-sided 90 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.299871 1.643960 2.126053 #> 95 % 2.297746 2.565286 2.894080 # (2) Draw of 95 percent confidence intervals on quantiles of the # previously fitted distribution # cdfcomp(fln) q1 <- quantile(bln, probs = seq(0,1,length=101)) points(q1$quantCI[1,],q1$probs,type=\"l\") points(q1$quantCI[2,],q1$probs,type=\"l\") # (2b) Draw of 95 percent confidence intervals on quantiles of the # previously fitted distribution # using the NEW function CIcdfplot # CIcdfplot(bln, CI.output = \"quantile\", CI.fill = \"pink\") # (3) Fit of a distribution on acute salinity log-transformed tolerance # for riverine macro-invertebrates, using maximum likelihood estimation # to estimate what is called a species sensitivity distribution # (SSD) in ecotoxicology, followed by estimation of the 5, 10 and 20 percent quantile # values of the fitted distribution, which are called the 5, 10, 20 percent hazardous # concentrations (HC5, HC10, HC20) in ecotoxicology, followed with calculations of # their confidence intervals with various definitions. # from a small number of bootstrap iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # data(salinity) log10LC50 <-log10(salinity) flncens <- fitdistcens(log10LC50,\"norm\") quantile(flncens, probs = c(0.05, 0.1, 0.2)) #> Estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 blncens <- bootdistcens(flncens, niter = 101) quantile(blncens, probs = c(0.05, 0.1, 0.2)) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> two-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 2.5 % 1.057448 1.138889 1.239646 #> 97.5 % 1.203538 1.270419 1.355852 quantile(blncens, probs = c(0.05, 0.1, 0.2), CI.type = \"greater\") #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> left bound of one-sided 95 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.062249 1.145186 1.245616 quantile(blncens, probs = c(0.05, 0.1, 0.2), CI.level = 0.9) #> (original) estimated quantiles for each specified probability (censored data) #> p=0.05 p=0.1 p=0.2 #> estimate 1.11584 1.194121 1.288913 #> Median of bootstrap estimates #> p=0.05 p=0.1 p=0.2 #> estimate 1.127552 1.204485 1.299218 #> #> two-sided 90 % CI of each quantile #> p=0.05 p=0.1 p=0.2 #> 5 % 1.062249 1.145186 1.245616 #> 95 % 1.195896 1.266786 1.346183"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":null,"dir":"Reference","previous_headings":"","what":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"72-hour acute salinity tolerance (LC50 values) riverine macro-invertebrates.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"","code":"data(salinity)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"salinity data frame 2 columns named left right, describing observed LC50 value (electrical condutivity, millisiemens per centimeter) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value noncensored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"Kefford, B.J., Nugegoda, D., Metzeling, L., Fields, E. 2006. Validating species sensitivity distributions using salinity tolerance riverine macroinvertebrates southern Murray-darling Basin (Vitoria, Australia). Canadian Journal Fisheries Aquatic Science, 63, 1865-1877.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/salinity.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Species-Sensitivity Distribution (SSD) for salinity tolerance — salinity","text":"","code":"# (1) load of data # data(salinity) # (2) plot of data using Turnbull cdf plot # log10LC50 <- log10(salinity) plotdistcens(log10LC50) # (3) fit of a normal and a logistic distribution to data in log10 # (classical distributions used for species sensitivity # distributions, SSD, in ecotoxicology)) # and visual comparison of the fits using Turnbull cdf plot # fln <- fitdistcens(log10LC50, \"norm\") summary(fln) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean 1.4702582 0.02817044 #> sd 0.2154709 0.02369006 #> Loglikelihood: -61.79623 AIC: 127.5925 BIC: 132.9567 #> Correlation matrix: #> mean sd #> mean 1.0000000 0.2937484 #> sd 0.2937484 1.0000000 #> fll <- fitdistcens(log10LC50, \"logis\") summary(fll) #> Fitting of the distribution ' logis ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> location 1.4761562 0.02822706 #> scale 0.1269994 0.01543956 #> Loglikelihood: -62.81293 AIC: 129.6259 BIC: 134.9901 #> Correlation matrix: #> location scale #> location 1.0000000 0.2024688 #> scale 0.2024688 1.0000000 #> cdfcompcens(list(fln, fll),legendtext = c(\"normal\", \"logistic\"), xlab = \"log10(LC50)\", xlim = c(0.5, 2), lines01 = TRUE) # (4) estimation of the 5 percent quantile value of # the normal fitted distribution (5 percent hazardous concentration : HC5) # with its two-sided 95 percent confidence interval calculated by # non parametric bootstrap # from a small number of bootstrap iterations to satisfy CRAN running times constraint. # For practical applications, we recommend to use at least niter=501 or niter=1001. # # in log10(LC50) bln <- bootdistcens(fln, niter = 101) HC5ln <- quantile(bln, probs = 0.05) # in LC50 10^(HC5ln$quantiles) #> p=0.05 #> estimate 13.0569 10^(HC5ln$quantCI) #> p=0.05 #> 2.5 % 11.08712 #> 97.5 % 15.50325 # (5) estimation of the HC5 value # with its one-sided 95 percent confidence interval (type \"greater\") # # in log10(LC50) HC5lnb <- quantile(bln, probs = 0.05, CI.type = \"greater\") # in LC50 10^(HC5lnb$quantiles) #> p=0.05 #> estimate 13.0569 10^(HC5lnb$quantCI) #> p=0.05 #> 5 % 11.31157"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":null,"dir":"Reference","previous_headings":"","what":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"Contamination data Listeria monocytogenes smoked fish Belgian market period 2005 2007.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"","code":"data(smokedfish)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"smokedfish data frame 2 columns named left right, describing observed value Listeria monocytogenes concentration (CFU/g) interval. left column contains either NA left censored observations, left bound interval interval censored observations, observed value non-censored observations. right column contains either NA right censored observations, right bound interval interval censored observations, observed value non-censored observations.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"Busschaert, P., Geereard, .H., Uyttendaele, M., Van Impe, J.F., 2010. Estimating distributions qualitative (semi) quantitative microbiological contamination data use risk assessment. International Journal Food Microbiology. 138, 260-269.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/smokedfish.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Contamination data of Listeria monocytogenes in smoked fish — smokedfish","text":"","code":"# (1) load of data # data(smokedfish) # (2) plot of data in CFU/g # plotdistcens(smokedfish) # (3) plot of transformed data in log10[CFU/g] # Clog10 <- log10(smokedfish) plotdistcens(Clog10) # (4) Fit of a normal distribution to data in log10[CFU/g] # fitlog10 <- fitdistcens(Clog10, \"norm\") summary(fitlog10) #> Fitting of the distribution ' norm ' By maximum likelihood on censored data #> Parameters #> estimate Std. Error #> mean -1.575392 0.2013872 #> sd 1.539446 0.2118026 #> Loglikelihood: -87.10945 AIC: 178.2189 BIC: 183.4884 #> Correlation matrix: #> mean sd #> mean 1.0000000 -0.4325228 #> sd -0.4325228 1.0000000 #> plot(fitlog10)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":null,"dir":"Reference","previous_headings":"","what":"Parasite abundance in insular feral cats — toxocara","title":"Parasite abundance in insular feral cats — toxocara","text":"Toxocara cati abundance feral cats living Kerguelen island.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Parasite abundance in insular feral cats — toxocara","text":"","code":"data(toxocara)"},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Parasite abundance in insular feral cats — toxocara","text":"toxocara data frame 1 column (number: number parasites digestive tract)","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Parasite abundance in insular feral cats — toxocara","text":"Fromont, E., Morvilliers, L., Artois, M., Pontier, D. 2001. Parasite richness abundance insular mainland feral cats. Parasitology, 123, 143-151.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/reference/toxocara.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Parasite abundance in insular feral cats — toxocara","text":"","code":"# (1) load of data # data(toxocara) # (2) description and plot of data # number <- toxocara$number descdist(number, discrete = TRUE, boot = 11) #> summary statistics #> ------ #> min: 0 max: 75 #> median: 2 #> mean: 8.679245 #> estimated sd: 14.29332 #> estimated skewness: 2.630609 #> estimated kurtosis: 11.4078 plotdist(number, discrete = TRUE) # (3) fit of a Poisson distribution to data # fitp <- fitdist(number, \"pois\") summary(fitp) #> Fitting of the distribution ' pois ' by maximum likelihood #> Parameters : #> estimate Std. Error #> lambda 8.679245 0.4046719 #> Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 plot(fitp) # (4) fit of a negative binomial distribution to data # fitnb <- fitdist(number, \"nbinom\") summary(fitnb) #> Fitting of the distribution ' nbinom ' by maximum likelihood #> Parameters : #> estimate Std. Error #> size 0.3971457 0.08289027 #> mu 8.6802520 1.93501003 #> Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 #> Correlation matrix: #> size mu #> size 1.0000000000 -0.0001038553 #> mu -0.0001038553 1.0000000000 #> plot(fitnb)"},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-12-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.2-2","title":"fitdistrplus 1.2-2","text":"NEW FEATURES website bringing together resources related fitdistrplus package now exists github.io following URL: https://lbbe-software.github.io/fitdistrplus/ BUG FIX default starting value gamma distribution wrongly computed rate parameter.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-12-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.2-1","title":"fitdistrplus 1.2-1","text":"CRAN release: 2024-07-12 NEW FEATURES fitdistrplus git repo now belongs lbbe-software organization modify add initial value univariate distributions provided actuar. create new vignette regarding default initial values. add generic functions AIC() BIC() fitdist fitdistcens objects. make gofstat() work fitdistcens objects (giving AIC BIC values). add calculation hessian using optimHess within fitdist given optim. compute asymptotic covariance matrix MME : Now theoretical moments m defined order equals twice maximal order given order. add new argument calcvcov order (dis)able computation covariance matrix method. graphics function *comp() now return list drawn points /lines plotstyle == \"graphics\". add density function bootdist(cens) objects. add DOIs man pages. BUG FIXES scale parameter fixed, startarg function also set rate parameter. leads error calling density. add sanity check plotdistcens: following code plotdistcens(data.frame(right=smokedfish$right, left=smokedfish$left)) raised error via npsurv(), thanks R. Pouillot. bug fixed using breaks plotdist. solve extremely long time taking lines descdist. add defensive programming input data (check NA, NaN, Inf values). correct links man pages URL DOI. remove use plot.np vignettes.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-11","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-11","title":"fitdistrplus 1.1-11","text":"CRAN release: 2023-04-25 NEW FEATURES add print argument descdist function allow plot skewness-kurtosis graph, without printing computed parameters BUG FIX use deprecated ggplot2 functions updated use deprecated BibTeX entries updated bug fixed drawing CI lines CIcdfcplot ggplot2 called bug fixed drawing horizontal lines cdfcompcens","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-8","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-8","title":"fitdistrplus 1.1-8","text":"CRAN release: 2022-03-10 WARNING FIX update URL fitdistrplus.Rd replace (class(x) == XX) (inherits(x, XX)) replace dontrun tags donttest examples rd files BUG FIX fix error t-detectbound.R producing “failure: length > 1 coercion logical” reported Brian Ripley","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-6","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-6","title":"fitdistrplus 1.1-6","text":"CRAN release: 2021-09-28 NEW FEATURES new function Surv2fitdistcens() format data use fitdistcens() format used survival package new dataset fremale order illustrate Surv2fitdistcens() support use ggplot2 CIcdfplot add taxon names endosulfan dataset new argument name.points cdfcomp CIcdfplot add labels next points","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-5","title":"fitdistrplus 1.1-5","text":"CRAN release: 2021-05-28 WARNING FIX reduce testing times test files","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-3","title":"fitdistrplus 1.1-3","text":"CRAN release: 2020-12-05 NEW FEATURE take account fix.arg uniform distribution BUG FIXES add loglikelihood value uniform distribution (mledist()) correct usage triple dots argument llsurface() fix error ppcomp() qqcomp() raised large dataset","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-11-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.1-1","title":"fitdistrplus 1.1-1","text":"CRAN release: 2020-05-19 NEW FEATURES add internal functions cope problems lack maintenance package npsurv remove dependence package remove deprecated argument Turnbull plotdistcens()","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-14","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-14","title":"fitdistrplus 1.0-14","text":"CRAN release: 2019-01-23 NEW FEATURES add new estimation method called maximum spacing estimation via msedist()","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-13","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-13","title":"fitdistrplus 1.0-13","text":"BUG FIXES fix issues coming noLD (–disable-long-double) configuration R","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-12","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-12","title":"fitdistrplus 1.0-12","text":"BUG FIXES bug fixed qmedist() fitdistcens() raised error checkparamlist(). bug fixed testdpqfun() assumes first argument d,p,q,r functions exactly base R.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-11","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-11","title":"fitdistrplus 1.0-11","text":"CRAN release: 2018-09-10 NEW FEATURES update FAQ beta(,). improve graphics discrete distributions denscomp(). improve automatic naming legends xxxcomp(). harmonize outputs mledist(), qmedist(), mmedist(), mgedist(), fitdist() fitdistcens(). automatic test d, p, q functions fitdist() raise warnings. improve test starting fixed values. add new default starting values distributions actuar. change default CDF plot censored data, using Wang NPMLE algorithm provided package npsurv (plotdistcens() cdfcompcens()) add two new goodness--fit plots (QQ-plot PP-plot) censored data (cf. plotdistcens, qqcompcens ppcompcens). add part dedicated censored datain FAQ vignette. homogeneization xlim ylim default definition plotdistcens. Removing name first argument calls dpq functions order make package compatible distributions defined non classical name first argument (resp. x, q, p d, p, q functions). add possibility change title CDF plot plotdistcens() using argument main. support use ggplot2 cdfcompcens, qqcompcens, ppcompcens. BUG FIXES bug fixed concerning use gofstat chi squared df <=0 (error message blocking functions) bug fix mledist() bounds set (NULL) censored MLE enable correct use non-equidistant breaks denscomp histogram plotstyle = “ggplot”, prohibit use non-equidistant breaks probability = FALSE (adding stop case).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-9","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-9","title":"fitdistrplus 1.0-9","text":"CRAN release: 2017-03-24 update FAQ linear inequality constraints.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-8","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-8","title":"fitdistrplus 1.0-8","text":"CRAN release: 2017-02-01 NEW FEATURES support use ggplot2 cdfcomp, denscomp, qqcomp, ppcomp. BUG FIXES correct legend qqcomp ppomp large data. correct weights mmedist. correct name Akaike gofstat. correct use trueval plot.bootdist. correct vignette truncate (inflated) distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-7","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-7","title":"fitdistrplus 1.0-7","text":"CRAN release: 2016-07-02 NEW FEATURES keep JSS vignette pdf. start FAQ vignette add datasets (?dataFAQ) . provide likelihood plot/surface/curve: llplot, llcurve, llsurface. provide parallelization bootstrap bootdist bootdistcens. provide graphic (e)cdf bootstraped confidence interval/area: CIcdfplot. allow use constrOptim() mledist, mmedist, mgedist, qmedist functions. add possible pre-fitting procedure: prefit. BUG FIXES add invisible() graphical functions. bug fixed concerning use weights censored data.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-6","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-6","title":"fitdistrplus 1.0-6","text":"CRAN release: 2015-11-30 BUG FIXES automatic definition starting values distributions llogis invweibull now working.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-5","title":"fitdistrplus 1.0-5","text":"CRAN release: 2015-09-21 NEW FEATURES update starting/fixing values mledist, mmedist, mgedist, qmedist functions. update graphics bootstrap procedure. add argument .points cdfcomp. add argument weights mledist, qmedist, mmedist, fitdist, fitdistcens. add argument keepdata fitdist, fitdistcens. suppress warnings/errors fitdist(cens), bootdist(cens). BUG FIXES defensive programming plotdist, cdfcomp,… simplify plotting curves cdfcomp seq(xmin, xmax, =1) used.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-4","title":"fitdistrplus 1.0-4","text":"CRAN release: 2015-02-23 release JSS publication.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-3","title":"fitdistrplus 1.0-3","text":"CRAN release: 2014-12-13 NEW FEATURES new generic functions fitdist(cens): loglik, vcov coef. vignette updated version paper accepted Journal Statistical Software. add argument discrete fitdist order able take account non classical discrete distributions plotting fit plot.fitdist cdfcomp calculating goodness--fit statistics gofstat (add example : fit zero inflate Poisson distribution). add S3 class descdist print method. BUG FIXES fitdist can handle non invertible Hessian matrices.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-2","title":"fitdistrplus 1.0-2","text":"CRAN release: 2014-02-12 NEW FEATURES plotdist can plot empirical density histogram, density plot superimposed. strong warning added documentation function descdist problematic high variance skewness kurtosis. BUG FIXES bug fixed bootdistcens : argument fix.arg now correctly passed mle.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-1","title":"fitdistrplus 1.0-1","text":"CRAN release: 2013-04-10 NEW FEATURES gofstat can handle multiple fitdist objects. plotdist discrete data slightly enhanced.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-10-0","dir":"Changelog","previous_headings":"","what":"fitdistrplus 1.0-0","title":"fitdistrplus 1.0-0","text":"CRAN release: 2012-12-27 NEW FEATURES update cdfcomp add denscomp, ppcomp qqcomp functions. add argument Turnbull.confint functions plotdistcens cdfcompcens order draw confidence intervals empirical distribution requested. ppoints now used “fitdist” QQ plot, PP plot cdf plot continuous data (used QQ plot previous versions) enable Blom type plotting position (using default Hazen plotting position can chanfge using arguments use.ppoints .ppoints) many changes examples given reference manual. vignette removed, transformed paper soon submit journal. add four data sets : fluazinam, salinity, danishuni danishmulti. add functions calculate quantiles fitted distribution, 95 percent CI calculated bootstrap : quantile generic function available fitdist bootdist objects quantile generic function available fitdistcens bootdistcens objects. BUG FIXES correction formula CvM test Weibull distribution. elimination CvM AD tests normal, lognormal logistic distributions : formulas previously used (given Stephens 1986) use exactly MLE estimates thus results approximates. make arguments xlim ylim functional cdfcompcens. bug fix closed formula mmedist lognormal distributions.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-4","title":"fitdistrplus 0.3-4","text":"CRAN release: 2012-03-22 NEW FEATURES posibility fix xlegend keyword (e.g. bottomright) cdfcomp cdfcompdens. improvement new vignette. BUG FIXES correction NAMESPACE file order enable correct print summary fitdistcens object (correlation matrix, loglikelihood AIC BIC statistics).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-3","title":"fitdistrplus 0.3-3","text":"NEW FEATURES new function (cdfcompcens) plot cumulative distributions corresponding various fits using censored data set. add example scaling problem man pages.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-2","title":"fitdistrplus 0.3-2","text":"NEW FEATURES new plot empirical cdf curve plotdistcens, using Turnbull algorithm call function survfit{survival}. new arguments function cdfcomp : verticals, horizontals xlim.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-1","title":"fitdistrplus 0.3-1","text":"NEW FEATURES add draft new version vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-03-0","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.3-0","title":"fitdistrplus 0.3-0","text":"NEW FEATURES new function (cdfcomp) plot cumulative distributions corresponding various fits using non censored data set. add two data sets : endosulfan toxocara.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-02-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.2-2","title":"fitdistrplus 0.2-2","text":"CRAN release: 2011-04-27 BUG FIXES elimination NON-ASCII characters vignette.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-02-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.2-1","title":"fitdistrplus 0.2-1","text":"CRAN release: 2011-03-18 NEW FEATURES new fitting method implemented continuous distributions : maximum goodness--fit estimation (function mgedist) (moment available non censored data).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-5","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-5","title":"fitdistrplus 0.1-5","text":"NEW FEATURES new goodness--fit statistic added gofstat, corresponding test : Cramer-von Mises distance. new fitting method implemented : quantile matching estimation (function qmedist). moment, available non censored data. moment matching estimation extended (function mmedist) enable numerical matching closed formula available. BUG FIXES correction bug inserted adding argument fix.arg prevent print results goodness--fit tests.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-4","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-4","title":"fitdistrplus 0.1-4","text":"CRAN release: 2010-09-16 NEW FEATURES component named dots added list returned fitdist fitdistcens order pass optional arguments control optimization mledist bootdist bootdistcens. bootdist bootdistcens changed take account optional arguments defined call fitdist fitdistcens. argument added fitdist, fitdistcens mledist, named fix.arg, giving possibility fix distribution parameters maximizing likelihood. Functions bootdist, bootdistcens gofstat also changed order take new argument account. new data file bacterial contamination censored data extracted Busschaert et al. 2000 examples corresponding analysis dataset. BUG FIXES correction bug print plot bootstraped samples using bootdist bootdistcens one parameter estimated maximum likelihood.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-3","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-3","title":"fitdistrplus 0.1-3","text":"CRAN release: 2010-06-02 NEW FEATURES new data file groundbeef (groundbeef.rda groundbeef.Rd) new use dataset examples. new function gofstat. Goodness--fit statistics computed fitdist may computed printed use function gofstat. new function, whole results computed printed : results tests printed argument print.test==TRUE continuous distributions Anderson-Darling Kolomogorov-Smirnov statistics printed default (complete results returned gofstat). modifications descdist : three arguments added descdist 1/ method, choose unbiased estimations standard deviation, skewness kurtosis (default choice) sample values. 2/ obs.col choose color used plot observed point graph. 3/ boot.col choose color used plot bootstrap sample points graph. modifications plotfit : minor changes performed order facilitate use argument … personnalize plots (see examples plotdist.Rd) modication vignette BUG FIXES correction bug plotdist due redefinition previous version parameter “ylim” plot histogram theoretical density function (problem infinite values theoretical density function).","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-2","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-2","title":"fitdistrplus 0.1-2","text":"CRAN release: 2009-12-29 NEW FEATURES deletion mledistcens modification mledist order maximize likelihood censored non censored data. possibility choose optimization method used maximum likelihood estimation (MLE) distribution parameters using new argument “optim.method” mledist. possibility specify contraints distribution parameters using new arguments “lower” “upper” mledist. possibility use custom optimization function MLE using new argument “custom.optim”. moment matching estimation longer done argument method set “mom” set “mme” fitdist. renaming momdist mmedist. calculation AIC BIC criterion maximum likelihood estimation distribution parameters change default number iterations 999 1001 bootstrap order avoid interpolation using quantile function use argument “log” (resp. “log.p”) density (resp. distribution) available compute loglikelihood. BUG FIXES omitting name first argument calls density function maximization likelihood order enable use density function defined first parameter (vector quantiles) name differing “x” (classical name density distributions defined R), density function dexGAUS package gamlss.dist.","code":""},{"path":"https://lbbe-software.github.io/fitdistrplus/news/index.html","id":"fitdistrplus-01-1","dir":"Changelog","previous_headings":"","what":"fitdistrplus 0.1-1","title":"fitdistrplus 0.1-1","text":"CRAN release: 2009-02-16 Initial release.","code":""}]