From 481a7fb424087c5d3f9309d200117d3fbbc1860f Mon Sep 17 00:00:00 2001
From: Christophe Dutang <dutangc@gmail.com>
Date: Mon, 19 Feb 2024 12:07:38 +0100
Subject: [PATCH] adjust fig.width and fig.height

---
 vignettes/FAQ.Rmd | 70 +++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/vignettes/FAQ.Rmd b/vignettes/FAQ.Rmd
index e43937e5..2a2d50ee 100644
--- a/vignettes/FAQ.Rmd
+++ b/vignettes/FAQ.Rmd
@@ -187,7 +187,7 @@ f_Y(y) =
 \end{array}\right.
 $$
 
-```{r, message=FALSE, fig.height=4, fig.width=8}
+```{r, message=FALSE, fig.height=4, fig.width=7}
 dtexp <- function(x, rate, low, upp)
 {
   PU <- pexp(upp, rate=rate)
@@ -205,6 +205,7 @@ x <- rexp(n); x <- x[x > .5 & x < 3]
 f1 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x)))
 f2 <- fitdist(x, "texp", method="mle", start=list(rate=3), fix.arg=list(low=.5, upp=3))
 gofstat(list(f1, f2))
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcomp(list(f1, f2), do.points = FALSE, xlim=c(0, 3.5))
 ```
 
@@ -243,7 +244,7 @@ with $u=\max_i Y_i$ and $l=\min_i Y_i$.
 
 Let us illustrate truncated distribution with the truncated exponential distribution.
 The log-likelihood is particularly bad-shaped.
-```{r, message=FALSE, fig.height=4, fig.width=8}
+```{r, message=FALSE, fig.height=3.5, fig.width=7}
 dtiexp <- function(x, rate, low, upp)
 {
   PU <- pexp(upp, rate=rate, lower.tail = FALSE)
@@ -263,10 +264,11 @@ The first method directly maximizes the log-likelihood $L(l, \theta, u)$; the se
 method maximizes the log-likelihood $L(\theta)$ assuming that $u=\hat u$ and $l=\hat l$ are known.
 Inside $[0.5,3]$, the CDF are correctly estimated in both methods but the first method
 does not succeed to estimate the true value of the bounds $l,u$.
-```{r, fig.height=4, fig.width=6}
+```{r, fig.height=3.5, fig.width=7}
 (f1 <- fitdist(x, "tiexp", method="mle", start=list(rate=3, low=0, upp=20)))
 (f2 <- fitdist(x, "tiexp", method="mle", start=list(rate=3), fix.arg=list(low=min(x), upp=max(x))))
 gofstat(list(f1, f2))
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcomp(list(f1, f2), do.points = FALSE, addlegend=FALSE, xlim=c(0, 3.5))
 curve(ptiexp(x, 1, .5, 3), add=TRUE, col="blue", lty=3)
 legend("bottomright", lty=1:3, col=c("red", "green", "blue", "black"), 
@@ -298,12 +300,13 @@ maximizing the log-likelihood.
 
 For these reasons, `fitdist(data, dist="unif", method="mle")` uses the 
 explicit form of the MLE for this distribution. Here is an example below
-```{r, fig.height=4, fig.width=6}
+```{r, fig.height=4, fig.width=7}
 trueval <- c("min"=3, "max"=5)
 x <- runif(n=500, trueval[1], trueval[2])
 
 f1 <- fitdist(x, "unif")
 delta <- .01
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 llsurface(x, "unif", plot.arg = c("min", "max"), min.arg=c(min(x)-2*delta, max(x)-delta),
           max.arg=c(min(x)+delta, max(x)+2*delta), main="likelihood surface for uniform",
         loglik=FALSE)
@@ -377,11 +380,12 @@ f1 <- fitdist(x2, "pert", start=list(min=-1, mode=0, max=10, shape=1),
 f2 <- fitdist(x2, "pert", start=list(mode=1, shape=1), 
               fix.arg=list(min=min(x2)-eps, max=max(x2)+eps),
               lower=c(min(x2), 0), upper=c(max(x2), Inf))
-
+print(cbind(coef(f1), 
+            c(f2$fix.arg["min"], coef(f2)["mode"], f2$fix.arg["max"], coef(f2)["shape"])), 
+      digits=7)
 gofstat(list(f1,f2))
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcomp(list(f1,f2))
-print(cbind(coef(f1), c(f2$fix.arg["min"], coef(f2)["mode"], 
-                        f2$fix.arg["max"], coef(f2)["shape"])), digits=7)
 ```
 
 
@@ -405,6 +409,7 @@ fgamma <- fitdist(x, "gamma")
 # fit of a bad distribution
 fexp <- fitdist(x, "exp")
 g <- gofstat(list(fgamma, fexp), fitnames = c("gamma", "exp"))
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 denscomp(list(fgamma, fexp), legendtext = c("gamma", "exp"))
 # results of the tests
 ## chi square test (with corresponding table with theoretical and observed counts)
@@ -464,7 +469,7 @@ approximated by a normal distribution. Testing the fit (here using a Kolmogorov-
 of 100 observations would not reject the normal fit, while testing it on a sample of 10000
 observations would reject it, while both samples come from the same distribution.
 
-```{r, fig.height=3, fig.width=6}
+```{r, fig.height=3.5, fig.width=7}
 set.seed(1234)
 x1 <- rpois(n = 100, lambda = 100)
 f1 <- fitdist(x1, "norm")
@@ -476,10 +481,9 @@ f2 <- fitdist(x2, "norm")
 g2 <- gofstat(f2)
 g2$kstest
 
-par(mfrow=1:2)
+par(mfrow=c(1,2), mar=c(4,4,2,1))
 denscomp(f1, demp = TRUE, addlegend = FALSE, main = "small sample")
 denscomp(f2, demp = TRUE, addlegend = FALSE, main = "big sample")
-
 ```
 
 
@@ -505,7 +509,7 @@ test, for the small sample the normal fit is  rejected only for the bigger sampl
 visual confrontation of the distributions. In that particular case, the chi square test with
 classes defined by default would have rejected te normal fit for both samples.
 
-```{r, fig.height=3, fig.width=6}
+```{r, fig.height=3.5, fig.width=7}
 set.seed(1234)
 x3 <- rpois(n = 500, lambda = 1)
 f3 <- fitdist(x3, "norm")
@@ -517,7 +521,7 @@ f4 <- fitdist(x4, "norm")
 g4 <- gofstat(f4)
 g4$kstest
 
-par(mfrow=1:2)
+par(mfrow=c(1,2), mar=c(4,4,2,1))
 denscomp(f3, addlegend = FALSE, main = "big sample") 
 denscomp(f4, addlegend = FALSE, main = "small sample")
 ```
@@ -648,7 +652,7 @@ n <- 1e3
 x <- rlnorm(n)
 descdist(x)
 ```
-```{r, echo=FALSE}
+```{r, echo=FALSE, fig.width=5, fig.height=5}
 n <- 1e3
 x <- rlnorm(n)
 par(mar=c(4,4,2,1), mfrow=c(1,1))
@@ -670,7 +674,7 @@ kurtosis.th <- function(sigma)
 ```
 
 The convergence to theoretical standardized moments (skewness and kurtosis) is slow
-```{r, echo=FALSE, fig.width=6, fig.height=3}
+```{r, echo=FALSE, fig.width=7, fig.height=3.5}
 moment <- function(obs, k) 
   mean(scale(obs)^k)
 skewness <- function(obs) 
@@ -979,7 +983,7 @@ F_X^{-1}(q) = \left\lfloor\frac{\log(1-q)}{\log(1-p)}\right\rfloor.
 $$
 Due to the integer part (floor function), both the distribution function and the
 quantile function are step functions.
-```{r, fig.height=3, fig.width=6}
+```{r, fig.height=3.5, fig.width=7}
 pgeom(0:3, prob=1/2)
 qgeom(c(0.3, 0.6, 0.9), prob=1/2)
 par(mar=c(4,4,2,1), mfrow=1:2)
@@ -1020,11 +1024,12 @@ $$
 q_{n,1/2} \leq \frac{\log(1/2)}{\log(1-p)} < q_{n,1/2} +1.
 $$
 Let us plot the squared differences $(F_X^{-1}(1/2) - q_{n,1/2})^2$.
-```{r, fig.height=4, fig.width=6}
+```{r, fig.height=4, fig.width=7}
 x <- rgeom(100, 1/3)
 L2 <- function(p)
   (qgeom(1/2, p) - median(x))^2
 L2(1/3) #theoretical value
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 curve(L2(x), 0.10, 0.95, xlab=expression(p), ylab=expression(L2(p)), main="squared differences", n=301)
 ```
 
@@ -1065,11 +1070,11 @@ i & \text{if } \sum_{k=0}^{i-1} P(X=k) \leq p < \sum_{k=0}^{i} P(X=k) \\
 $$
 
 Again,  the squared differences is a step function $(F_X^{-1}(1/2) - q_{n,1/2})^2$.
-```{r, fig.height=4, fig.width=6}
-par(mar=c(4,4,2,1))
+```{r, fig.height=4, fig.width=7}
 x <- rpois(100, lambda=7.5)
 L2 <- function(lam)
   (qpois(1/2, lambda = lam) - median(x))^2
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 curve(L2(x), 6, 9, xlab=expression(lambda), ylab=expression(L2(lambda)), main="squared differences", n=201)
 ```
 
@@ -1162,7 +1167,7 @@ matpar <- cbind(
 showpar(matpar)
 ```
 However the fitted cumulative distributions are indistinguable. See figure below.
-```{r, fig.height=4, fig.width=6, echo=FALSE}
+```{r, fig.height=4, fig.width=7, echo=FALSE}
 par(mar=c(4,4,2,1), mfrow=c(1,1))
 cdfcomp(list(fit.NM.2P, fit.NM.3P), do.points = FALSE, addlegend = FALSE)
 curve(ptgamma(x, shape=shape, rate=rate, low=x0, upp=Inf), add=TRUE, col="blue", lty=3)
@@ -1175,7 +1180,7 @@ Changing the optimization method is useless. The graph below display the likelih
 contours as well as optimization iterates (grey crosses), the final estimate (black cross)
 and the true value (red dot).
 Iterates get stuck far from the true value.
-```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=3, fig.width=6}
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=3.5, fig.width=7}
 #make trace in dtgamma
 dtgamma <- function(x, shape, rate, low, upp)
 {
@@ -1339,6 +1344,7 @@ fn <- fitdist(n, "norm")
 bn <- bootdist(fn)
 bn$CI
 fn$estimate + cbind("estimate"= 0, "2.5%"= -1.96*fn$sd, "97.5%"= 1.96*fn$sd)
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 llplot(fn, back.col = FALSE)
 ```
 
@@ -1349,6 +1355,7 @@ fg <- fitdist(g, "gamma")
 bg <- bootdist(fg)
 bg$CI
 fg$estimate + cbind("estimate"= 0, "2.5%"= -1.96*fg$sd, "97.5%"= 1.96*fg$sd)
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 llplot(fg, back.col = FALSE)
 ```
 
@@ -1363,7 +1370,7 @@ as a CDF curve surrounded by a band corresponding to  pointwise intervals on the
 See an example below on censored data corresponding to 72-hour acute salinity tolerance (LC50values)
 of rivermarine invertebrates.
 
-```{r, fig.height=3, fig.width=4, warning = FALSE}
+```{r, fig.height=4, fig.width=7, warning = FALSE}
 data(salinity)
 log10LC50 <-log10(salinity)
 fit <- fitdistcens(log10LC50, "norm")
@@ -1374,6 +1381,7 @@ bootsample <- bootdistcens(fit, niter = 101)
 # Calculation of the quantile of interest (here the 5 percent hazard concentration)
 (HC5 <- quantile(bootsample, probs = 0.05))
 # visualizing pointwise confidence intervals on other quantiles
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 CIcdfplot(bootsample, CI.output = "quantile", CI.fill = "pink", xlim = c(0.5,2), main = "")
 ```
 
@@ -1416,7 +1424,7 @@ functions, `denscomp`, `cdfcomp`, `ppcomp`, `qqcomp` or `cdfcompcens` (see how i
 The default plot of an object of class `fitdist` can be easily reproduced and personalized 
 using `denscomp`, `cdfcomp`, `ppcomp` and `qqcomp`.
 
-```{r, fig.height=6, fig.width=6, warning = FALSE}
+```{r, fig.height=7, fig.width=7, warning = FALSE}
 data(groundbeef)
 serving <- groundbeef$serving
 fit <- fitdist(serving, "gamma")
@@ -1434,7 +1442,7 @@ using `cdfcompcens`.
 
 An argument `plotstyle` was added to functions `denscomp`, `cdfcomp`, `ppcomp`, `qqcomp`and `cdfcompcens`, `ppcompcens`, `qqcompcens` to enable the generation of plots using the `ggplot2` package. This argument by default fixed at `graphics` must simply be fixed at `ggplot` for this purpose, as in the following example. In that latter case the graphical functions return a graphic object that can be further personalized using `ggplot2` functions.
 
-```{r, fig.height= 4, fig.width= 6, warning = FALSE}
+```{r, fig.height= 4, fig.width= 7, warning = FALSE}
 library(ggplot2)
 fitW <- fitdist(serving, "weibull")
 fitln <- fitdist(serving, "lnorm")
@@ -1457,7 +1465,7 @@ ECDF goodness-of-fit plots and only for non censored data. This option can be us
 as below, for example, to name the species in the classical plot of the
 Species Sensitivity Distributions (SSD) in ecotoxicology.
 
-```{r, fig.height= 6, fig.width= 6, warning = FALSE}
+```{r, fig.height= 6, fig.width= 7, warning = FALSE}
 data(endosulfan)
 ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV
 taxaATV <- subset(endosulfan, group == "NonArthroInvert")$taxa
@@ -1501,9 +1509,10 @@ has been implemented with arguments similar to the ones of `Surv()`.
 svdata <- Surv2fitdistcens(exitage, event=death)
 ```
 Let us now fit two simple distributions.
-```{r, fig.height= 4, fig.width= 6}
+```{r, fig.height= 4, fig.width= 7}
 flnormc <- fitdistcens(svdata, "lnorm")
 fweic <- fitdistcens(svdata, "weibull")
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcompcens(list(fweic, flnormc), xlim=range(exitage), xlegend = "topleft")
 ```
 
@@ -1516,7 +1525,7 @@ y-value defined by the rank of the observation as done below using function
 but it remains difficult to correctly order the observations
 in any case (see the example below on the right using data `smokedfish`).
 
-```{r, fig.height= 4, fig.width= 8}
+```{r, fig.height= 3.5, fig.width= 7}
 par(mfrow = c(1,2), mar = c(3, 4, 3, 0.5))
 plotdistcens(dtoy, NPMLE = FALSE)
 data(smokedfish)
@@ -1539,7 +1548,7 @@ rewrite their main functions in our package, using another
 optimization function. The same
 ECDF plot was also implemented in our using the Turnbull algorithm of survival (see below).
 
-```{r, fig.height= 6, fig.width= 6}
+```{r, fig.height= 7, fig.width= 7}
 par(mfrow = c(2, 2),  mar = c(3, 4, 3, 0.5))
 # Turnbull algorithm with representation of middle points of equivalence classes
 plotdistcens(dsmo, NPMLE.method = "Turnbull.middlepoints", xlim = c(-1.8, 2.4))
@@ -1565,7 +1574,7 @@ class, which may be zero on some classes. The NPMLE is unique only up to these e
 Various NPMLE algorithms are implemented in the packages **Icens**, **interval** and **npsurv**. They are more or less performant and all of them do not enable the handling
 of other data than survival data, especially with left censored observations.
 
-```{r, echo = FALSE, fig.height= 4, fig.width= 8}
+```{r, echo = FALSE, fig.height= 6, fig.width= 7}
 d <- data.frame(left = c(NA, 2, 4, 6, 9.5, 10), right = c(1, 3, 7, 8, 9.5, NA))
 addbounds <- function(d)
 {
@@ -1625,7 +1634,7 @@ Considering goodness-of-fit plots, the generic `plot` function of an object of c
 using the Wang prepresentation, see previous part for details), a Q-Q plot and a P-P plot
 simply derived from the Wang plot of the ECDF, with filled rectangles indicating non uniqueness of the NPMLE ECDF. 
 
-```{r, fig.height= 6, fig.width= 6}
+```{r, fig.height= 7, fig.width= 7}
 par(mar = c(2, 4, 3, 0.5))
 plot(fnorm)
 ```
@@ -1636,6 +1645,7 @@ CDF, Q-Q and P-P goodness-of-fit plots and/or to compare the fit of various dist
 on a same dataset.
 
 ```{r, fig.height= 4, fig.width= 4}
+par(mfrow=c(1,1), mar=c(4,4,2,1))
 cdfcompcens(list(fnorm, flogis), fitlty = 1)
 qqcompcens(list(fnorm, flogis))
 ppcompcens(list(fnorm, flogis))
@@ -1645,7 +1655,7 @@ Considering Q-Q plots and P-P plots, it may be easier to compare various fits by
 splitting the plots as below which is done automatically using the `plotstyle` `ggplot` in `qqcompens()` and `ppcompcens()` but can also be done manually
 with the `plotstyle` `graphics`.
 
-```{r, fig.height= 4, fig.width= 8}
+```{r, fig.height= 4, fig.width= 7}
 qqcompcens(list(fnorm, flogis), lwd = 2, plotstyle = "ggplot",
   fitcol = c("red", "green"), fillrect = c("pink", "lightgreen"),
   legendtext = c("normal distribution", "logistic distribution"))