Skip to content

Commit

Permalink
bug fix of Issue 34
Browse files Browse the repository at this point in the history
  • Loading branch information
dutangc committed Oct 25, 2024
1 parent 7bf9211 commit 3cda711
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 19 deletions.
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ at the following URL: https://lbbe-software.github.io/fitdistrplus/

BUG FIX

- the default starting value for the gamma distribution was wrongly computed for the rate parameter.
- mgedist() may suffer a numerical issue for Anderson-Darling GoF metrics. All GoF metrics now take care of numerical issue, such as log(0) or 1/0, and are properly scaled by the sample sized to avoid large sample size issues. Thanks for Ethan Chapman for reporting the bug.
- the default starting value for the gamma distribution was wrongly computed for the rate parameter. Thanks for Wendy Martin for reporting the bug.

# fitdistrplus 1.2-1

Expand Down
49 changes: 32 additions & 17 deletions R/mgedist.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ mgedist <- function (data, distr, gof = "CvM", start=NULL, fix.arg=NULL, optim.m
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
1/(12*n) + sum( ( theop - (2 * 1:n - 1)/(2 * n) )^2 )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
1/(12*n^2) + mean( ( theop - (2 * 1:n - 1)/(2 * n) )^2 )
}
}else if (gof == "KS")
{
Expand All @@ -156,63 +156,78 @@ mgedist <- function (data, distr, gof = "CvM", start=NULL, fix.arg=NULL, optim.m
s <- sort(obs)
obspu <- seq(1,n)/n
obspl <- seq(0,n-1)/n
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
max(pmax(abs(theop-obspu),abs(theop-obspl)))
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
max(pmax(abs(theop-obspu), abs(theop-obspl)))
}
}else if (gof == "AD")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
- n - mean( (2 * 1:n - 1) * (log(theop) + log(1 - rev(theop))) )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
ilogpi <- log(theop * (1 - rev(theop))) * (2 * 1:n - 1)
idx <- is.finite(ilogpi)
ifelse(sum(idx) > 0, - sum(idx)/n - mean( ilogpi[idx] )/n, .Machine$integer.max)
}
}else if (gof == "ADR")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
n/2 - 2 * sum(theop) - mean ( (2 * 1:n - 1) * log(1 - rev(theop)) )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
ilogpi <- log(1 - rev(theop)) * (2 * 1:n - 1)
idx <- is.finite(ilogpi)
ifelse(sum(idx) > 0, sum(idx)/2/n - 2 * sum(theop[idx])/n - mean ( ilogpi[idx] )/n, .Machine$integer.max)
}
}else if (gof == "ADL")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
-3*n/2 + 2 * sum(theop) - mean ( (2 * 1:n - 1) * log(theop) )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
ilogpi <- (2 * 1:n - 1) * log(theop)
idx <- is.finite(ilogpi)
ifelse(sum(idx) > 0, -3*sum(idx)/2/n + 2 * sum(theop[idx])/n - mean ( ilogpi[idx] )/n, .Machine$integer.max)
}
}else if (gof == "AD2R")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
2 * sum(log(1 - theop)) + mean ( (2 * 1:n - 1) / (1 - rev(theop)) )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
logpi <- log(1 - theop)
i1pi2 <- (2 * 1:n - 1) / (1 - rev(theop))
idx <- is.finite(logpi) & is.finite(i1pi2)
ifelse(sum(idx) > 0, 2 * sum(logpi[idx])/n + mean( i1pi2[idx] )/n, .Machine$integer.max)
}
}else if (gof == "AD2L")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
2 * sum(log(theop)) + mean ( (2 * 1:n - 1) / theop )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
logpi <- log(theop)
i1pi <- (2 * 1:n - 1) / theop
idx <- is.finite(logpi) & is.finite(i1pi)
ifelse(sum(idx) > 0, 2 * sum( logpi[idx] )/n + mean ( i1pi[idx] )/n, .Machine$integer.max)
}
}else if (gof == "AD2")
{
fnobj <- function(par, fix.arg, obs, pdistnam)
{
n <- length(obs)
s <- sort(obs)
theop <- do.call(pdistnam,c(list(s),as.list(par),as.list(fix.arg)))
2 * sum(log(theop) + log(1 - theop) ) +
mean ( ((2 * 1:n - 1) / theop) + ((2 * 1:n - 1) / (1 - rev(theop))) )
theop <- do.call(pdistnam, c(list(s), as.list(par), as.list(fix.arg)))
logpi <- log(theop * (1 - theop))
i1pi <- (2 * 1:n - 1) / theop
i1pi2 <- (2 * 1:n - 1) / (1 - rev(theop))
idx <- is.finite(logpi) & is.finite(i1pi) & is.finite(i1pi2)
ifelse(sum(idx) > 0, 2 * sum( logpi[idx] )/n + mean ( i1pi[idx] + i1pi2[idx] )/n, .Machine$integer.max)
}
}else
stop("wrong gof metric.")
Expand Down
4 changes: 3 additions & 1 deletion tests/t-mgedist.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,13 @@ try(mgedist(c(serving, NaN), "gamma"))


# (7) test the component optim.message

x <- rnorm(1000)
#change parameter to obtain unsuccessful convergence
mgedist(x, "norm", control=list(maxit=2), start=list(mean=1e5, sd=1), optim.method="L-BFGS-B", lower=0)

# (8) test bounds

x <- rnorm(1000)
mgedist(x, "norm", optim.method="L-BFGS-B", lower=c(-Inf, 0)) #optim and L-BFGS-B
mgedist(x, "norm", optim.method="Nelder", lower=c(-Inf, 0))
Expand All @@ -143,7 +145,7 @@ if(FALSE)
set.seed(123)
obs <- rlnorm(1e6, 3, 2)
for(i in 2:6)
cat(i, try(mgedist(obs[1:10^i], "lnorm", control=list(trace=1, REPORT=1))$estimate, silent=TRUE), "\n")
cat(i, try(mgedist(obs[1:10^i], "lnorm", control=list(trace=0, REPORT=1))$estimate, silent=TRUE), "\n")

# 2 3.143734 1.819549
# 3 3.023688 1.955416
Expand Down

0 comments on commit 3cda711

Please sign in to comment.