Skip to content

Commit

Permalink
scale the logLik
Browse files Browse the repository at this point in the history
  • Loading branch information
dutangc committed Oct 25, 2024
1 parent 3cda711 commit 4fa8de2
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 160 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ BUG FIX

- mgedist() may suffer a numerical issue for Anderson-Darling GoF metrics. All GoF metrics now take care of numerical issue, such as log(0) or 1/0, and are properly scaled by the sample sized to avoid large sample size issues. Thanks for Ethan Chapman for reporting the bug.
- the default starting value for the gamma distribution was wrongly computed for the rate parameter. Thanks for Wendy Martin for reporting the bug.
- mledist() may suffer a scaling issue and the objective function is properly scaled by the sample sized to avoid large sample size issues.

# fitdistrplus 1.2-1

Expand Down
39 changes: 20 additions & 19 deletions R/mledist.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
if (!(is.numeric(data) & length(data)>1))
stop(txt1uncens)
checkUncensoredNAInfNan(data)
data.size <- length(data)
}
else {
cens <- TRUE
Expand All @@ -75,7 +76,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
pdistname<-paste("p", distname, sep="")
if (!exists(pdistname, mode="function"))
stop(paste("The ", pdistname, " function must be defined to apply maximum likelihood to censored data"))

data.size <- NROW(censdata)
}

if (cens) {
Expand Down Expand Up @@ -153,7 +154,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul

############# MLE fit using optim or custom.optim ##########

# definition of the function to minimize : - log likelihood
# definition of the function to minimize : minus average log likelihood
# for non censored data
if (!cens && is.null(weights)) {
# the argument names are:
Expand All @@ -163,12 +164,12 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
# - ddistnam for distribution name
if ("log" %in% argddistname){
fnobj <- function(par, fix.arg, obs, ddistnam){
-sum(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg), log=TRUE) ) )
-mean(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg), log=TRUE) ) )
}
}
else{
fnobj <- function(par, fix.arg, obs, ddistnam) {
-sum(log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) )
-mean(log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) )
}
}
}
Expand All @@ -177,22 +178,22 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
argpdistname<-names(formals(pdistname))
if (("log" %in% argddistname) & ("log.p" %in% argpdistname))
fnobjcens <- function(par, fix.arg, rcens, lcens, icens, ncens, ddistnam, pdistnam)
-sum(do.call(ddistnam, c(list(ncens), as.list(par), as.list(fix.arg), list(log=TRUE)))) -
sum(do.call(pdistnam, c(list(lcens), as.list(par), as.list(fix.arg), list(log=TRUE)))) -
sum(do.call(pdistnam, c(list(rcens), as.list(par), as.list(fix.arg), list(lower.tail=FALSE), list(log=TRUE)))) -
sum(log(do.call(pdistnam, c(list(icens$right), as.list(par), as.list(fix.arg))) - # without log=TRUE here
-mean(do.call(ddistnam, c(list(ncens), as.list(par), as.list(fix.arg), list(log=TRUE)))) -
mean(do.call(pdistnam, c(list(lcens), as.list(par), as.list(fix.arg), list(log=TRUE)))) -
mean(do.call(pdistnam, c(list(rcens), as.list(par), as.list(fix.arg), list(lower.tail=FALSE), list(log=TRUE)))) -
mean(log(do.call(pdistnam, c(list(icens$right), as.list(par), as.list(fix.arg))) - # without log=TRUE here
do.call(pdistnam, c(list(icens$left), as.list(par), as.list(fix.arg))) )) # without log=TRUE here
else
fnobjcens <- function(par, fix.arg, rcens, lcens, icens, ncens, ddistnam, pdistnam)
-sum(log(do.call(ddistnam, c(list(ncens), as.list(par), as.list(fix.arg))))) -
sum(log(do.call(pdistnam, c(list(lcens), as.list(par), as.list(fix.arg))))) -
sum(log(1-do.call(pdistnam, c(list(rcens), as.list(par), as.list(fix.arg))))) -
sum(log(do.call(pdistnam, c(list(icens$right), as.list(par), as.list(fix.arg))) -
-mean(log(do.call(ddistnam, c(list(ncens), as.list(par), as.list(fix.arg))))) -
mean(log(do.call(pdistnam, c(list(lcens), as.list(par), as.list(fix.arg))))) -
mean(log(1-do.call(pdistnam, c(list(rcens), as.list(par), as.list(fix.arg))))) -
mean(log(do.call(pdistnam, c(list(icens$right), as.list(par), as.list(fix.arg))) -
do.call(pdistnam, c(list(icens$left), as.list(par), as.list(fix.arg))) ))
}else if(!cens && !is.null(weights))
{
fnobj <- function(par, fix.arg, obs, ddistnam) {
-sum(weights * log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) )
-mean(weights * log(do.call(ddistnam, c(list(obs), as.list(par), as.list(fix.arg)) ) ) )
}
}else if(cens && !is.null(weights))
{
Expand All @@ -203,10 +204,10 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
p3 <- log(1-do.call(pdistnam, c(list(rcens), as.list(par), as.list(fix.arg))))
p4 <- log(do.call(pdistnam, c(list(icens$right), as.list(par), as.list(fix.arg))) -
do.call(pdistnam, c(list(icens$left), as.list(par), as.list(fix.arg))) )
- sum(weights[irow.ncens] * p1) -
sum(weights[irow.lcens] * p2) -
sum(weights[irow.rcens] * p3) -
sum(weights[irow.icens] * p4)
- mean(weights[irow.ncens] * p1) -
mean(weights[irow.lcens] * p2) -
mean(weights[irow.rcens] * p3) -
mean(weights[irow.icens] * p4)
}
}

Expand Down Expand Up @@ -385,7 +386,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
res <- list(estimate = opt$par, convergence = opt$convergence, value=opt$value,
hessian = opt$hessian, optim.function=opt.fun, optim.method=meth,
fix.arg = fix.arg, fix.arg.fun = fix.arg.fun, weights = weights,
counts=opt$counts, optim.message=opt$message, loglik = -opt$value,
counts=opt$counts, optim.message=opt$message, loglik = -opt$value*data.size,
vcov=varcovar)
}
else # Try to minimize the minus (log-)likelihood using a user-supplied optim function
Expand Down Expand Up @@ -428,7 +429,7 @@ mledist <- function (data, distr, start=NULL, fix.arg=NULL, optim.method="defaul
res <- list(estimate = opt$par, convergence = opt$convergence, value=opt$value,
hessian = opt$hessian, optim.function = custom.optim, optim.method = method.cust,
fix.arg = fix.arg, fix.arg.fun = fix.arg.fun, weights = weights,
counts=opt$counts, optim.message=opt$message, loglik = -opt$value,
counts=opt$counts, optim.message=opt$message, loglik = -opt$value*data.size,
vcov=varcovar)
}

Expand Down
Loading

0 comments on commit 4fa8de2

Please sign in to comment.