diff --git a/.Rbuildignore b/.Rbuildignore index 8b945df..868eefe 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,5 @@ ^inst/images$ ^\.zenodo\.json$ ^documentation$ +^.lintr$ +^.DS_Store$ \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..0b112d6 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,5 @@ +- [ ] ran `devtools::document()` +- [ ] ran `spelling::spell_check_package()` and corrected any spelling errors +- [ ] ran `lintr::lint_package()` and resolved all lint warnings and notes +- [ ] ran `styler::style_pkg()` to make sure code matches the style guidelines +- [ ] ran R-CMD CHECK and resolved all issues diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 10c51d1..f4b17a4 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,81 +1,29 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: - - master + branches: [main, master] pull_request: - branches: - - master + branches: [main, master] name: R-CMD-check jobs: R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - + runs-on: ubuntu-latest env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-r@v2 with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@master + use-public-rspm: true - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} + extra-packages: any::rcmdcheck + needs: check - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + - uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 0000000..8544907 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,30 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + pull_request: + branches: [main, master] + +name: lint + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 5871380..087f0b0 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,46 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: master + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: name: pkgdown jobs: pkgdown: - runs-on: macOS-latest + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true - - name: Cache R packages - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + extra-packages: any::pkgdown, local::. + needs: website - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - install.packages("pkgdown") + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - - name: Install package - run: R CMD INSTALL . - - - name: Deploy package - run: | - git config --local user.email "actions@github.com" - git config --local user.name "GitHub Actions" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml deleted file mode 100644 index 4ddcd02..0000000 --- a/.github/workflows/test-coverage.yml +++ /dev/null @@ -1,57 +0,0 @@ -on: - push: - branches: - - master - pull_request: - branches: - - master - -name: test-coverage - -jobs: - test-coverage: - runs-on: macOS-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - CODECOV_TOKEN: "75b0b4df-84ec-41c0-9593-c05874ce237d" - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@master - - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - uses: actions/cache@v1 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies (macOS) - if: runner.os == 'macOS' - run: | - brew install cairo - brew install --cask xquartz - - name: Install dependencies - run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("covr") - # if (!requireNamespace("BiocManager", quietly = TRUE)) - # install.packages("BiocManager") - # BiocManager::install("graph") - # install.packages("igraph", dependencies = TRUE) - # remotes::install_github("dahtah/imager") - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2eae819..ac1a93b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ docs inst/doc CRAN-RELEASE cran-comments.md +.DS_Store diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..0a79709 --- /dev/null +++ b/.lintr @@ -0,0 +1,10 @@ +linters: linters_with_defaults( + line_length_linter(150), + cyclocomp_linter(25), + object_name_linter = NULL, + commented_code_linter = NULL) +exclusions: list() +exclude: "# Exclude Linting" +exclude_start: "# Begin Exclude Linting" +exclude_end: "# End Exclude Linting" +encoding: "ISO-8859-1" diff --git a/DESCRIPTION b/DESCRIPTION index 6f46bc8..7c4f6e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fxTWAPLS Title: An Improved Version of WA-PLS -Version: 0.1.1 +Version: 0.1.2 Authors@R: c( person(given = "Mengmeng", family = "Liu", @@ -52,12 +52,15 @@ Imports: parallel, progressr Suggests: + lintr (>= 3.0.0), magrittr, progress, scales, + spelling, + styler, tictoc Depends: R (>= 3.6) Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.2 Language: en-GB diff --git a/NEWS.md b/NEWS.md index 17941e5..ba307c1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # fxTWAPLS (development version) +# fxTWAPLS 0.1.2 +* Remove taxa with 0 tolerance. +* Add example script to the main page. +* Add reference to the functions `WAPLS.w2` and `TWAPLS.w2`. + # fxTWAPLS 0.1.1 * Update maintainer's contact information. diff --git a/R/fxTWAPLS.R b/R/fxTWAPLS.R index 7710a92..f9fa581 100644 --- a/R/fxTWAPLS.R +++ b/R/fxTWAPLS.R @@ -2,13 +2,13 @@ "_PACKAGE" #' Get frequency of the climate value -#' -#' Function to get the frequency of the climate value, which will be used to +#' +#' Function to get the frequency of the climate value, which will be used to #' provide \code{fx} correction for WA-PLS and TWA-PLS. #' #' @importFrom graphics hist #' @importFrom graphics plot -#' +#' #' @param x Numeric vector with the modern climate values. #' @param bin Binwidth to get the frequency of the modern climate values. #' @param show_plot Boolean flag to show a plot of \code{fx ~ x}. @@ -20,14 +20,14 @@ #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Get the frequency of each climate variable fx #' fx_Tmin <- fxTWAPLS::fx(modern_pollen$Tmin, bin = 0.02, show_plot = TRUE) #' fx_gdd <- fxTWAPLS::fx(modern_pollen$gdd, bin = 20, show_plot = TRUE) #' fx_alpha <- fxTWAPLS::fx(modern_pollen$alpha, bin = 0.002, show_plot = TRUE) #' } -#' -#' @seealso \code{\link{cv.w}}, \code{\link{cv.pr.w}}, and +#' +#' @seealso \code{\link{cv.w}}, \code{\link{cv.pr.w}}, and #' \code{\link{sse.sample}} fx <- function(x, bin, show_plot = FALSE) { pbin <- round((max(x) - min(x)) / bin, digits = 0) @@ -42,21 +42,22 @@ fx <- function(x, bin, show_plot = FALSE) { if (any(fx == 0)) { print("Some x have a count of 0!") } - if (show_plot) + if (show_plot) { plot(fx ~ x) + } return(fx) } #' Get frequency of the climate value with p-spline smoothing -#' -#' Function to get the frequency of the climate value, which will be used to +#' +#' Function to get the frequency of the climate value, which will be used to #' provide \code{fx} correction for WA-PLS and TWA-PLS. #' #' @importFrom graphics hist #' @importFrom graphics plot -#' +#' #' @param x Numeric vector with the modern climate values. -#' @param bin Binwidth to get the frequency of the modern climate values, the +#' @param bin Binwidth to get the frequency of the modern climate values, the #' curve will be p-spline smoothed later #' @param show_plot Boolean flag to show a plot of \code{fx ~ x}. #' @@ -67,54 +68,60 @@ fx <- function(x, bin, show_plot = FALSE) { #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Get the frequency of each climate variable fx -#' fx_pspline_Tmin <- fxTWAPLS::fx_pspline(modern_pollen$Tmin, -#' bin = 0.02, -#' show_plot = TRUE) -#' fx_pspline_gdd <- fxTWAPLS::fx_pspline(modern_pollen$gdd, -#' bin = 20, -#' show_plot = TRUE) -#' fx_pspline_alpha <- fxTWAPLS::fx_pspline(modern_pollen$alpha, -#' bin = 0.002, -#' show_plot = TRUE) +#' fx_pspline_Tmin <- fxTWAPLS::fx_pspline( +#' modern_pollen$Tmin, +#' bin = 0.02, +#' show_plot = TRUE +#' ) +#' fx_pspline_gdd <- fxTWAPLS::fx_pspline( +#' modern_pollen$gdd, +#' bin = 20, +#' show_plot = TRUE +#' ) +#' fx_pspline_alpha <- fxTWAPLS::fx_pspline( +#' modern_pollen$alpha, +#' bin = 0.002, +#' show_plot = TRUE +#' ) #' } -#' -#' @seealso \code{\link{cv.w}}, \code{\link{cv.pr.w}}, and +#' +#' @seealso \code{\link{cv.w}}, \code{\link{cv.pr.w}}, and #' \code{\link{sse.sample}} -fx_pspline <- function (x, bin, show_plot = FALSE) { +fx_pspline <- function(x, bin, show_plot = FALSE) { pbin <- round((max(x) - min(x)) / bin, digits = 0) bin <- (max(x) - min(x)) / pbin - brks <- seq(min(x) , max(x) , by = bin) + brks <- seq(min(x), max(x), by = bin) h <- hist(x, breaks = brks, plot = show_plot) mids <- h$mids counts <- h$counts - Data <- data.frame(mids, counts) - Dat <- data.frame(x) nseg <- 20 lambda <- 1 d <- 3 - + # Iterative smoothing , updating tuning based on diff of # coeffs for (it in 1:20) { - fit <- JOPS::psPoisson(mids, - counts, - nseg = nseg, - pord = d, - lambda = lambda, - show = FALSE) + fit <- JOPS::psPoisson( + mids, + counts, + nseg = nseg, + pord = d, + lambda = lambda, + show = FALSE + ) a <- fit$pcoef - vr <- sum ((diff(a, diff = d)) ^ 2) / fit$effdim + vr <- sum((diff(a, diff = d))^2) / fit$effdim lambda_new <- 1 / vr - dla <- abs ((lambda_new - lambda) / lambda) + dla <- abs((lambda_new - lambda) / lambda) lambda <- lambda_new - # cat(it, log10(lambda ) , "\n") - if (dla < 1e-05) + if (dla < 1e-05) { break + } } # Gridded data for plotting - Fit1 <- data.frame(xgrid = fit$xgrid , ygrid = fit$mugrid) + Fit1 <- data.frame(xgrid = fit$xgrid, ygrid = fit$mugrid) fx <- rep(NA, length(x)) for (i in seq_len(length(x))) { fx[i] <- Fit1[which.min(abs(x[i] - Fit1$xgrid)), "ygrid"] @@ -122,79 +129,82 @@ fx_pspline <- function (x, bin, show_plot = FALSE) { if (any(fx == 0)) { print("Some x have a count of 0!") } - if (show_plot) + if (show_plot) { plot(fx ~ x) + } return(fx) } #' WA-PLS training function -#' -#' WA-PLS training function, which can perform \code{fx} correction. +#' +#' WA-PLS training function, which can perform \code{fx} correction. #' \code{1/fx^2} correction will be applied at step 7. -#' +#' #' @importFrom stats lm -#' -#' @param modern_taxa The modern taxa abundance data, each row represents a +#' +#' @param modern_taxa The modern taxa abundance data, each row represents a #' sampling site, each column represents a taxon. #' @param modern_climate The modern climate value at each sampling site. #' @param nPLS The number of components to be extracted. #' @param usefx Boolean flag on whether or not use \code{fx} correction. -#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if -#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, +#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if +#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, #' \code{\link{fx}} function will be used when choosing "bin"; #' \code{\link{fx_pspline}} function will be used when choosing "pspline". #' @param bin Binwidth to get fx, needed for both binned and p-splined method. #' if \code{usefx = FALSE}, this should be \code{NA}; -#' @return A list of the training results, which will be used by the predict +#' @return A list of the training results, which will be used by the predict #' function. Each element in the list is described below: #' \describe{ #' \item{\code{fit}}{the fitted values using each number of components.} #' \item{\code{x}}{the observed modern climate values.} #' \item{\code{taxon_name}}{the name of each taxon.} -#' \item{\code{optimum}}{the updated taxon optimum (u* in the WA-PLS +#' \item{\code{optimum}}{the updated taxon optimum (u* in the WA-PLS #' paper).} -#' \item{\code{comp}}{each component extracted (will be used in step 7 +#' \item{\code{comp}}{each component extracted (will be used in step 7 #' regression).} #' \item{\code{u}}{taxon optimum for each component (step 2).} -#' \item{\code{z}}{a parameter used in standardization for each component +#' \item{\code{z}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{s}}{a parameter used in standardization for each component +#' \item{\code{s}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{orth}}{a list that stores orthogonalization parameters +#' \item{\code{orth}}{a list that stores orthogonalization parameters #' (step 4).} #' \item{\code{alpha}}{a list that stores regression coefficients (step 7).} #' \item{\code{meanx}}{mean value of the observed modern climate values.} #' \item{\code{nPLS}}{the total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # MTCO +#' +#' # Training #' fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) +#' fit_f_Tmin <- fxTWAPLS::WAPLS.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) #' } -#' +#' #' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, and #' \code{\link{WAPLS.predict.w}} -WAPLS.w <- function(modern_taxa, - modern_climate, - nPLS = 5, - usefx = FALSE, +WAPLS.w <- function(modern_taxa, + modern_climate, + nPLS = 5, + usefx = FALSE, fx_method = "bin", bin = NA) { # Step 0. Centre the environmental variable by subtracting the weighted mean @@ -202,17 +212,17 @@ WAPLS.w <- function(modern_taxa, y <- modern_taxa y <- as.matrix(y) y <- y / rowSums(y) - + nc <- ncol(modern_taxa) nr <- nrow(modern_taxa) - Ytottot <- sum(y) + sumk_yik <- rowSums(y) sumi_yik <- colSums(y) - + # Define some matrix to store the values u <- matrix(NA, nc, nPLS) # u of each component # u of each component, standardized the same way as r - u_sd <- matrix(NA, nc, nPLS) + u_sd <- matrix(NA, nc, nPLS) optimum <- matrix(NA, nc, nPLS) # u updated r <- matrix(NA, nr, nPLS) # site score z <- matrix(NA, 1, nPLS) # standardize @@ -221,189 +231,205 @@ WAPLS.w <- function(modern_taxa, alpha <- list() # store regression coefficients comp <- matrix(NA, nr, nPLS) # each component fit <- matrix(NA, nr, nPLS) # current estimate - + pls <- 1 - # Step 1. Take the centred environmental variable(xi) as initial site - # scores (ri). + # Step 1. Take the centred environmental variable(xi) as initial site + # scores (ri). r[, pls] <- x - mean(x) - - # Step 2. Calculate new species scores (uk* by weighted averaging of the + + # Step 2. Calculate new species scores (uk* by weighted averaging of the # site scores) - u[, pls] <- t(y) %*% r[, pls] / sumi_yik # uk = sumi_yik*xi/sumi_yik; 1*nmodern_taxa - - # Step 3. Calculate new site scores (ri) by weighted averaging of the species + u[, pls] <- t(y) %*% r[, pls] / sumi_yik + + # Step 3. Calculate new site scores (ri) by weighted averaging of the species # scores - r[, pls] <- y %*% u[, pls] / sumk_yik # xi=sumk_yik*uk/sumk_yik; 1*nsite - + r[, pls] <- y %*% u[, pls] / sumk_yik + # Step 4. For the first axis go to Step 5. - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable (xJ on the components obtained + + # Step 7. Regress the environmental variable (xJ on the components obtained # so far using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) } else { - if (fx_method == "bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x, bin) ^ 2) + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin)^2 + ) } else { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x, bin) ^ 2) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin)^2 + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, pls] * alpha[[pls]][2] - + for (pls in 2:nPLS) { - # Go to Step 2 with the residuals of the regression as the new site + # Go to Step 2 with the residuals of the regression as the new site # scores (rJ. r[, pls] <- lm[["residuals"]] - - # Step 2. Calculate new species scores (uk* by weighted averaging of the + + # Step 2. Calculate new species scores (uk* by weighted averaging of the # site scores) - # uk=sumi_yik*xi/sumi_yik; 1*nmodern_taxa u[, pls] <- t(y) %*% r[, pls] / sumi_yik - - # Step 3. Calculate new site scores (ri) by weighted averaging of the + + # Step 3. Calculate new site scores (ri) by weighted averaging of the # species scores - # xi=sumk_yik*uk/sumk_yik; 1*nsite - r[, pls] <- y %*% u[, pls] / sumk_yik - - # Step 4. For second and higher components, make the new site scores (ri) - # uncorrelated with the previous components by orthogonalization + r[, pls] <- y %*% u[, pls] / sumk_yik + + # Step 4. For second and higher components, make the new site scores (ri) + # uncorrelated with the previous components by orthogonalization # (Ter Braak, 1987 : Table 5 .2b) v <- rep(NA, pls - 1) for (j in 1:(pls - 1)) { fi <- r[, pls - j] xi <- r[, pls] - v[pls - j] <- sum(sumk_yik * fi * xi) / Ytottot + v[pls - j] <- sum(sumk_yik * fi * xi) / sum(y) xinew <- xi - v[pls - j] * fi } orth[[pls]] <- v r[, pls] <- xinew - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable on the components obtained so - # far using weights and take the fitted values as current estimates + + # Step 7. Regress the environmental variable on the components obtained so + # far using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) } else { - if (fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x, bin) ^ 2) + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin)^2 + ) } else { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x, bin) ^ 2) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin)^2 + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] - + u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - - list <- list(fit, - modern_climate, - colnames(modern_taxa), - optimum, - comp, - u, - z, - s, - orth, - alpha, - mean(modern_climate), - nPLS) - names(list) <- c("fit", - "x", - "taxon_name", - "optimum", - "comp", - "u", - "z", - "s", - "orth", - "alpha", - "meanx", - "nPLS") + + list <- list( + fit, + modern_climate, + colnames(modern_taxa), + optimum, + comp, + u, + z, + s, + orth, + alpha, + mean(modern_climate), + nPLS + ) + names(list) <- c( + "fit", + "x", + "taxon_name", + "optimum", + "comp", + "u", + "z", + "s", + "orth", + "alpha", + "meanx", + "nPLS" + ) return(list) } #' TWA-PLS training function -#' -#' TWA-PLS training function, which can perform \code{fx} correction. +#' +#' TWA-PLS training function, which can perform \code{fx} correction. #' \code{1/fx^2} correction will be applied at step 7. -#' +#' #' @importFrom stats lm -#' +#' #' @inheritParams WAPLS.w #' -#' @return A list of the training results, which will be used by the predict +#' @return A list of the training results, which will be used by the predict #' function. Each element in the list is described below: #' \describe{ #' \item{\code{fit}}{the fitted values using each number of components.} #' \item{\code{x}}{the observed modern climate values.} #' \item{\code{taxon_name}}{the name of each taxon.} #' \item{\code{optimum}}{the updated taxon optimum} -#' \item{\code{comp}}{each component extracted (will be used in step 7 +#' \item{\code{comp}}{each component extracted (will be used in step 7 #' regression).} #' \item{\code{u}}{taxon optimum for each component (step 2).} #' \item{\code{t}}{taxon tolerance for each component (step 2).} -#' \item{\code{z}}{a parameter used in standardization for each component +#' \item{\code{z}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{s}}{a parameter used in standardization for each component +#' \item{\code{s}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{orth}}{a list that stores orthogonalization parameters +#' \item{\code{orth}}{a list that stores orthogonalization parameters #' (step 4).} #' \item{\code{alpha}}{a list that stores regression coefficients (step 7).} #' \item{\code{meanx}}{mean value of the observed modern climate values.} #' \item{\code{nPLS}}{the total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # MTCO +#' +#' # Training #' fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) +#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) #' } -#' +#' #' @seealso \code{\link{fx}}, \code{\link{TWAPLS.predict.w}}, and #' \code{\link{WAPLS.w}} TWAPLS.w <- function(modern_taxa, @@ -411,20 +437,20 @@ TWAPLS.w <- function(modern_taxa, nPLS = 5, usefx = FALSE, fx_method = "bin", - bin = NA){ + bin = NA) { # Step 0. Centre the environmental variable by subtracting the weighted mean x <- modern_climate y <- modern_taxa y <- as.matrix(y) - y <- y/rowSums(y) - + y <- y / rowSums(y) + nc <- ncol(modern_taxa) nr <- nrow(modern_taxa) - Ytottot <- sum(y) + sumk_yik <- rowSums(y) sumi_yik <- colSums(y) - - #Define some matrix to store the values + + # Define some matrix to store the values u <- matrix(NA, nc, nPLS) # u of each component # u of each component, standardized the same way as r u_sd <- matrix(NA, nc, nPLS) @@ -437,237 +463,251 @@ TWAPLS.w <- function(modern_taxa, alpha <- list() # store regression coefficients comp <- matrix(NA, nr, nPLS) # each component fit <- matrix(NA, nr, nPLS) # current estimate - + pls <- 1 - # Step 1. Take the centred environmental variable (xi) as initial site - # scores (ri). + # Step 1. Take the centred environmental variable (xi) as initial site + # scores (ri). r[, pls] <- x - mean(x) - + # Step 2. Calculate uk and tk - u[, pls] <- t(y) %*% r[, pls] / sumi_yik # uk=sumi_yik*xi/sumi_yik; 1*nmodern_taxa + u[, pls] <- t(y) %*% r[, pls] / sumi_yik n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls]) ^ 2) / sumi_yik[k]) - n2[k] <- 1 / sum((y[, k] / sum(y[, k])) ^ 2) + t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls])^2) / sumi_yik[k]) + n2[k] <- 1 / sum((y[, k] / sum(y[, k]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } - + # Step 3. Calculate new site scores (ri) - #xi; 1*nsite - r[, pls] <- (y %*% (u[, pls] / t[, pls] ^ 2)) / (y %*% (1 / t[, pls] ^ 2)) - + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + # Step 4. For the first axis go to Step 5. - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - + # Step 7. Regress the environmental variable on the components obtained so far - # using weights and take the fitted values as current estimates + # using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) } else { - if (fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x, bin) ^ 2) + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin)^2 + ) } else { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x, bin) ^ 2) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin)^2 + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, pls] * alpha[[pls]][2] - + for (pls in 2:nPLS) { - # Go to Step 2 with the residuals of the regression as the new site + # Go to Step 2 with the residuals of the regression as the new site # scores (ri). r[, pls] <- lm[["residuals"]] - + # Step 2. Calculate new uk and tk - # uk=sumi_yik*xi/sumi_yik; 1*nmodern_taxa u[, pls] <- t(y) %*% r[, pls] / sumi_yik n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls]) ^ 2) / sumi_yik[k]) - n2[k] <- 1 / sum((y[, k] / sum(y[, k])) ^ 2) + t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls])^2) / sumi_yik[k]) + n2[k] <- 1 / sum((y[, k] / sum(y[, k]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } - - # Step 3. Calculate new site scores (r;) by weighted averaging of the + + # Step 3. Calculate new site scores (r;) by weighted averaging of the # species scores, i.e. new - r[,pls] <- (y%*%(u[,pls]/t[,pls]^2))/(y%*%(1/t[,pls]^2)); #xi; 1*nsite - - # Step 4. For second and higher components, make the new site scores (r;) - # uncorrelated with the previous components by orthogonalization + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + + # Step 4. For second and higher components, make the new site scores (r;) + # uncorrelated with the previous components by orthogonalization # (Ter Braak, 1987 : Table 5 .2b) v <- rep(NA, pls - 1) for (j in 1:(pls - 1)) { fi <- r[, pls - j] xi <- r[, pls] - v[pls - j] <- sum(sumk_yik * fi * xi) / Ytottot + v[pls - j] <- sum(sumk_yik * fi * xi) / sum(y) xinew <- xi - v[pls - j] * fi } orth[[pls]] <- v - # plot(xinew~r[,pls]);abline(0,1) r[, pls] <- xinew - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized scores as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable (xJ on the components obtained - # so far using weights and take the fitted values as current estimates + + # Step 7. Regress the environmental variable (xJ on the components obtained + # so far using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) } else { - if (fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x, bin) ^ 2) + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin)^2 + ) } else { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x, bin) ^ 2) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin)^2 + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] - + u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - - list <- list(fit, - modern_climate, - colnames(modern_taxa), - optimum, - comp, - u, - t, - z, - s, - orth, - alpha, - mean(modern_climate), - nPLS) - names(list) <- c("fit", - "x", - "taxon_name", - "optimum", - "comp", - "u", - "t", - "z", - "s", - "orth", - "alpha", - "meanx", - "nPLS") + + list <- list( + fit, + modern_climate, + colnames(modern_taxa), + optimum, + comp, + u, + t, + z, + s, + orth, + alpha, + mean(modern_climate), + nPLS + ) + names(list) <- c( + "fit", + "x", + "taxon_name", + "optimum", + "comp", + "u", + "t", + "z", + "s", + "orth", + "alpha", + "meanx", + "nPLS" + ) return(list) } #' WA-PLS training function v2 -#' -#' WA-PLS training function, which can perform \code{fx} correction. +#' +#' WA-PLS training function, which can perform \code{fx} correction. #' \code{1/fx} correction will be applied at step 2 and step 7. -#' +#' #' @importFrom stats lm -#' -#' @param modern_taxa The modern taxa abundance data, each row represents a +#' +#' @param modern_taxa The modern taxa abundance data, each row represents a #' sampling site, each column represents a taxon. #' @param modern_climate The modern climate value at each sampling site. #' @param nPLS The number of components to be extracted. #' @param usefx Boolean flag on whether or not use \code{fx} correction. -#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if -#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, +#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if +#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, #' \code{\link{fx}} function will be used when choosing "bin"; #' \code{\link{fx_pspline}} function will be used when choosing "pspline". #' @param bin Binwidth to get fx, needed for both binned and p-splined method. #' if \code{usefx = FALSE}, this should be \code{NA}; -#' @return A list of the training results, which will be used by the predict +#' @return A list of the training results, which will be used by the predict #' function. Each element in the list is described below: #' \describe{ #' \item{\code{fit}}{the fitted values using each number of components.} #' \item{\code{x}}{the observed modern climate values.} #' \item{\code{taxon_name}}{the name of each taxon.} -#' \item{\code{optimum}}{the updated taxon optimum (u* in the WA-PLS +#' \item{\code{optimum}}{the updated taxon optimum (u* in the WA-PLS #' paper).} -#' \item{\code{comp}}{each component extracted (will be used in step 7 +#' \item{\code{comp}}{each component extracted (will be used in step 7 #' regression).} #' \item{\code{u}}{taxon optimum for each component (step 2).} -#' \item{\code{z}}{a parameter used in standardization for each component +#' \item{\code{z}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{s}}{a parameter used in standardization for each component +#' \item{\code{s}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{orth}}{a list that stores orthogonalization parameters +#' \item{\code{orth}}{a list that stores orthogonalization parameters #' (step 4).} #' \item{\code{alpha}}{a list that stores regression coefficients (step 7).} #' \item{\code{meanx}}{mean value of the observed modern climate values.} #' \item{\code{nPLS}}{the total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # Get the frequency of each climate variable fx -#' fx_Tmin <- fxTWAPLS::fx(modern_pollen$Tmin, bin = 0.02) -#' -#' # MTCO +#' +#' # Training #' fit_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) +#' fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) #' } -#' +#' #' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, and #' \code{\link{WAPLS.predict.w}} -WAPLS.w2 <- function(modern_taxa, - modern_climate, - nPLS = 5, - usefx = FALSE, +WAPLS.w2 <- function(modern_taxa, + modern_climate, + nPLS = 5, + usefx = FALSE, fx_method = "bin", bin = NA) { # Step 0. Centre the environmental variable by subtracting the weighted mean x <- modern_climate y <- modern_taxa y <- as.matrix(y) - y <- y/rowSums(y) - + y <- y / rowSums(y) + nc <- ncol(modern_taxa) nr <- nrow(modern_taxa) - Ytottot <- sum(y) + sumk_yik <- rowSums(y) sumi_yik <- colSums(y) - + # Define some matrix to store the values u <- matrix(NA, nc, nPLS) # u of each component # u of each component, standardized the same way as r - u_sd <- matrix(NA, nc, nPLS) + u_sd <- matrix(NA, nc, nPLS) optimum <- matrix(NA, nc, nPLS) # u updated r <- matrix(NA, nr, nPLS) # site score z <- matrix(NA, 1, nPLS) # standardize @@ -677,212 +717,224 @@ WAPLS.w2 <- function(modern_taxa, comp <- matrix(NA, nr, nPLS) # each component fit <- matrix(NA, nr, nPLS) # current estimate fr <- matrix(NA, nr, nPLS) # frequency of site score - - + + pls <- 1 - # Step 1. Take the centred environmental variable(xi) as initial site - # scores (ri). + # Step 1. Take the centred environmental variable(xi) as initial site + # scores (ri). r[, pls] <- x - mean(x) - - # Step 2. Calculate new species scores (uk* by weighted averaging of the - # site scores) - if(usefx==FALSE){ - # uk = sumi_yik*xi/sumi_yik; - u[, pls] <- t(y) %*% r[, pls] / sumi_yik - - }else{ - if(fx_method=="bin"){fr[,pls]<-fxTWAPLS::fx(r[,pls],bin=bin)}else{ - fr[,pls]<-fxTWAPLS::fx_pspline(r[,pls],bin=bin)} - #uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); - u[, pls] <- t(y/fr[,pls]) %*% r[, pls] / colSums(y/fr[,pls]) + + # Step 2. Calculate new species scores (uk* by weighted averaging of the + # site scores) + if (usefx == FALSE) { + u[, pls] <- t(y) %*% r[, pls] / sumi_yik + } else { + if (fx_method == "bin") { + fr[, pls] <- fxTWAPLS::fx(r[, pls], bin = bin) + } else { + fr[, pls] <- fxTWAPLS::fx_pspline(r[, pls], bin = bin) + } + u[, pls] <- t(y / fr[, pls]) %*% r[, pls] / colSums(y / fr[, pls]) } - - # Step 3. Calculate new site scores (ri) by weighted averaging of the species + + # Step 3. Calculate new site scores (ri) by weighted averaging of the species # scores r[, pls] <- y %*% u[, pls] / rowSums(y) # xi=sumk_yik*uk/sumk_yik; 1*nsite - + # Step 4. For the first axis go to Step 5. - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable (xJ on the components obtained + + # Step 7. Regress the environmental variable (xJ on the components obtained # so far using weights and take the fitted values as current estimates - if(usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) - } else{ - - if(fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x,bin) ) - }else{ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x,bin) ) + if (usefx == FALSE) { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) + } else { + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin) + ) + } else { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin) + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, pls] * alpha[[pls]][2] - - for(pls in 2:nPLS) { - # Go to Step 2 with the residuals of the regression as the new site + + for (pls in 2:nPLS) { + # Go to Step 2 with the residuals of the regression as the new site # scores (rJ. r[, pls] <- lm[["residuals"]] - - # Step 2. Calculate new species scores (uk* by weighted averaging of the - # site scores) uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); - if(usefx==FALSE){ - # uk = sumi_yik*xi/sumi_yik; - u[, pls] <- t(y) %*% r[, pls] / sumi_yik - - }else{ - if(fx_method=="bin"){fr[,pls]<-fxTWAPLS::fx(r[,pls],bin=bin)}else{ - fr[,pls]<-fxTWAPLS::fx_pspline(r[,pls],bin=bin)} - #uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); - u[, pls] <- t(y/fr[,pls]) %*% r[, pls] / colSums(y/fr[,pls]) + + # Step 2. Calculate new species scores (uk* by weighted averaging of the + # site scores) uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); + if (usefx == FALSE) { + u[, pls] <- t(y) %*% r[, pls] / sumi_yik + } else { + if (fx_method == "bin") { + fr[, pls] <- fxTWAPLS::fx(r[, pls], bin = bin) + } else { + fr[, pls] <- fxTWAPLS::fx_pspline(r[, pls], bin = bin) + } + u[, pls] <- t(y / fr[, pls]) %*% r[, pls] / colSums(y / fr[, pls]) } - - # Step 3. Calculate new site scores (ri) by weighted averaging of the + + # Step 3. Calculate new site scores (ri) by weighted averaging of the # species scores - # xi=sumk_yik*uk/sumk_yik; 1*nsite - r[, pls] <- y %*% u[, pls] / rowSums(y) - - # Step 4. For second and higher components, make the new site scores (ri) - # uncorrelated with the previous components by orthogonalization + r[, pls] <- y %*% u[, pls] / rowSums(y) + + # Step 4. For second and higher components, make the new site scores (ri) + # uncorrelated with the previous components by orthogonalization # (Ter Braak, 1987 : Table 5 .2b) v <- rep(NA, pls - 1) for (j in 1:(pls - 1)) { fi <- r[, pls - j] xi <- r[, pls] - v[pls - j] <- sum(sumk_yik * fi * xi) / Ytottot + v[pls - j] <- sum(sumk_yik * fi * xi) / sum(y) xinew <- xi - v[pls - j] * fi } orth[[pls]] <- v r[, pls] <- xinew - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable on the components obtained so - # far using weights and take the fitted values as current estimates - if(usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) - } else{ - - if(fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x,bin) ) - }else{ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x,bin) ) + + # Step 7. Regress the environmental variable on the components obtained so + # far using weights and take the fitted values as current estimates + if (usefx == FALSE) { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) + } else { + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin) + ) + } else { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin) + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] - + u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - - list <- list(fit, - modern_climate, - colnames(modern_taxa), - optimum, - comp, - u, - z, - s, - orth, - alpha, - mean(modern_climate), - nPLS) - names(list) <- c("fit", - "x", - "taxon_name", - "optimum", - "comp", - "u", - "z", - "s", - "orth", - "alpha", - "meanx", - "nPLS") + + list <- list( + fit, + modern_climate, + colnames(modern_taxa), + optimum, + comp, + u, + z, + s, + orth, + alpha, + mean(modern_climate), + nPLS + ) + names(list) <- c( + "fit", + "x", + "taxon_name", + "optimum", + "comp", + "u", + "z", + "s", + "orth", + "alpha", + "meanx", + "nPLS" + ) return(list) } #' TWA-PLS training function v2 -#' -#' TWA-PLS training function, which can perform \code{fx} correction. +#' +#' TWA-PLS training function, which can perform \code{fx} correction. #' \code{1/fx} correction will be applied at step 2 and step 7. -#' +#' #' @importFrom stats lm -#' +#' #' @inheritParams WAPLS.w #' -#' @return A list of the training results, which will be used by the predict +#' @return A list of the training results, which will be used by the predict #' function. Each element in the list is described below: #' \describe{ #' \item{\code{fit}}{the fitted values using each number of components.} #' \item{\code{x}}{the observed modern climate values.} #' \item{\code{taxon_name}}{the name of each taxon.} #' \item{\code{optimum}}{the updated taxon optimum} -#' \item{\code{comp}}{each component extracted (will be used in step 7 +#' \item{\code{comp}}{each component extracted (will be used in step 7 #' regression).} #' \item{\code{u}}{taxon optimum for each component (step 2).} #' \item{\code{t}}{taxon tolerance for each component (step 2).} -#' \item{\code{z}}{a parameter used in standardization for each component +#' \item{\code{z}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{s}}{a parameter used in standardization for each component +#' \item{\code{s}}{a parameter used in standardization for each component #' (step 5).} -#' \item{\code{orth}}{a list that stores orthogonalization parameters +#' \item{\code{orth}}{a list that stores orthogonalization parameters #' (step 4).} #' \item{\code{alpha}}{a list that stores regression coefficients (step 7).} #' \item{\code{meanx}}{mean value of the observed modern climate values.} #' \item{\code{nPLS}}{the total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # Get the frequency of each climate variable fx -#' fx_Tmin <- fxTWAPLS::fx(modern_pollen$Tmin, bin = 0.02) -#' -#' # MTCO +#' +#' # Training #' fit_t_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) +#' fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) #' } -#' +#' #' @seealso \code{\link{fx}}, \code{\link{TWAPLS.predict.w}}, and #' \code{\link{WAPLS.w}} TWAPLS.w2 <- function(modern_taxa, @@ -890,20 +942,20 @@ TWAPLS.w2 <- function(modern_taxa, nPLS = 5, usefx = FALSE, fx_method = "bin", - bin = NA){ + bin = NA) { # Step 0. Centre the environmental variable by subtracting the weighted mean x <- modern_climate y <- modern_taxa y <- as.matrix(y) - y <- y/rowSums(y) - + y <- y / rowSums(y) + nc <- ncol(modern_taxa) nr <- nrow(modern_taxa) - Ytottot <- sum(y) + sumk_yik <- rowSums(y) sumi_yik <- colSums(y) - - #Define some matrix to store the values + + # Define some matrix to store the values u <- matrix(NA, nc, nPLS) # u of each component # u of each component, standardized the same way as r u_sd <- matrix(NA, nc, nPLS) @@ -917,256 +969,288 @@ TWAPLS.w2 <- function(modern_taxa, comp <- matrix(NA, nr, nPLS) # each component fit <- matrix(NA, nr, nPLS) # current estimate fr <- matrix(NA, nr, nPLS) # frequency of site score - + pls <- 1 - # Step 1. Take the centred environmental variable (xi) as initial site - # scores (ri). + # Step 1. Take the centred environmental variable (xi) as initial site + # scores (ri). r[, pls] <- x - mean(x) - + # Step 2. Calculate uk and tk - if(usefx==FALSE){ - u[, pls] <- t(y) %*% r[, pls] / sumi_yik # uk=sumi_yik*xi/sumi_yik; + if (usefx == FALSE) { + u[, pls] <- t(y) %*% r[, pls] / sumi_yik # uk=sumi_yik*xi/sumi_yik; n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls]) ^ 2) / sumi_yik[k]) - n2[k] <- 1 / sum((y[, k] / sum(y[, k])) ^ 2) + t[k, pls] <- + sqrt(sum(y[, k] * (r[, pls] - u[k, pls])^2) / sumi_yik[k]) + n2[k] <- 1 / sum((y[, k] / sum(y[, k]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } - - }else{ - if(fx_method=="bin"){fr[,pls]<-fxTWAPLS::fx(r[,pls],bin=bin)}else{ - fr[,pls]<-fxTWAPLS::fx_pspline(r[,pls],bin=bin)} - #uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); - u[, pls] <- t(y/fr[,pls]) %*% r[, pls] / colSums(y/fr[,pls]) + } else { + if (fx_method == "bin") { + fr[, pls] <- fxTWAPLS::fx(r[, pls], bin = bin) + } else { + fr[, pls] <- fxTWAPLS::fx_pspline(r[, pls], bin = bin) + } + u[, pls] <- t(y / fr[, pls]) %*% r[, pls] / colSums(y / fr[, pls]) n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] /fr[,pls]* (r[, pls] - u[k, pls]) ^ 2) / colSums(y/fr[,pls])[k]) - n2[k] <- 1 / sum((y[, k]/fr[,pls] / sum(y[, k]/fr[,pls])) ^ 2) + t[k, pls] <- + sqrt(sum(y[, k] / fr[, pls] * (r[, pls] - u[k, pls])^2) / colSums(y / fr[, pls])[k]) + n2[k] <- 1 / sum((y[, k] / fr[, pls] / sum(y[, k] / fr[, pls]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } } - - - + # Step 3. Calculate new site scores (ri) - #xi; 1*nsite - r[, pls] <- (y %*% (u[, pls] / t[, pls] ^ 2)) / (y %*% (1 / t[, pls] ^ 2)) - + # xi; 1*nsite + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + # Step 4. For the first axis go to Step 5. - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized score as the new component comp[, pls] <- r[, pls] - + # Step 7. Regress the environmental variable on the components obtained so far - # using weights and take the fitted values as current estimates + # using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) - } else{ - - if(fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x,bin) ) - }else{ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x,bin) ) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) + } else { + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin) + ) + } else { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin) + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, pls] * alpha[[pls]][2] - + for (pls in 2:nPLS) { - # Go to Step 2 with the residuals of the regression as the new site + # Go to Step 2 with the residuals of the regression as the new site # scores (ri). r[, pls] <- lm[["residuals"]] - + # Step 2. Calculate new uk and tk - if(usefx==FALSE){ - u[, pls] <- t(y) %*% r[, pls] / sumi_yik # uk=sumi_yik*xi/sumi_yik; + if (usefx == FALSE) { + u[, pls] <- t(y) %*% r[, pls] / sumi_yik n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] * (r[, pls] - u[k, pls]) ^ 2) / sumi_yik[k]) - n2[k] <- 1 / sum((y[, k] / sum(y[, k])) ^ 2) + t[k, pls] <- + sqrt(sum(y[, k] * (r[, pls] - u[k, pls])^2) / sumi_yik[k]) + n2[k] <- 1 / sum((y[, k] / sum(y[, k]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } - - }else{ - if(fx_method=="bin"){fr[,pls]<-fxTWAPLS::fx(r[,pls],bin=bin)}else{ - fr[,pls]<-fxTWAPLS::fx_pspline(r[,pls],bin=bin)} - #uk=sumi_(yik*xi/fxi)/sumi_(yik/fxi); - u[, pls] <- t(y/fr[,pls]) %*% r[, pls] / colSums(y/fr[,pls]) + } else { + if (fx_method == "bin") { + fr[, pls] <- fxTWAPLS::fx(r[, pls], bin = bin) + } else { + fr[, pls] <- fxTWAPLS::fx_pspline(r[, pls], bin = bin) + } + u[, pls] <- t(y / fr[, pls]) %*% r[, pls] / colSums(y / fr[, pls]) n2 <- matrix(NA, nc, 1) for (k in 1:nc) { - t[k, pls] <- sqrt(sum(y[, k] /fr[,pls]* (r[, pls] - u[k, pls]) ^ 2) / colSums(y/fr[,pls])[k]) - n2[k] <- 1 / sum((y[, k]/fr[,pls] / sum(y[, k]/fr[,pls])) ^ 2) + t[k, pls] <- + sqrt(sum(y[, k] / fr[, pls] * (r[, pls] - u[k, pls])^2) / colSums(y / fr[, pls])[k]) + n2[k] <- + 1 / sum((y[, k] / fr[, pls] / sum(y[, k] / fr[, pls]))^2) t[k, pls] <- t[k, pls] / sqrt(1 - 1 / n2[k]) } } - - # Step 3. Calculate new site scores (r;) by weighted averaging of the + + # Step 3. Calculate new site scores (r;) by weighted averaging of the # species scores, i.e. new - r[,pls] <- (y%*%(u[,pls]/t[,pls]^2))/(y%*%(1/t[,pls]^2)); #xi; 1*nsite - - # Step 4. For second and higher components, make the new site scores (r;) - # uncorrelated with the previous components by orthogonalization + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + + # Step 4. For second and higher components, make the new site scores (r;) + # uncorrelated with the previous components by orthogonalization # (Ter Braak, 1987 : Table 5 .2b) v <- rep(NA, pls - 1) for (j in 1:(pls - 1)) { fi <- r[, pls - j] xi <- r[, pls] - v[pls - j] <- sum(sumk_yik * fi * xi) / Ytottot + v[pls - j] <- sum(sumk_yik * fi * xi) / sum(y) xinew <- xi - v[pls - j] * fi } orth[[pls]] <- v - # plot(xinew~r[,pls]);abline(0,1) r[, pls] <- xinew - + # Step 5. Standardize the new site scores (ri) ter braak 1987 5.2.c z[, pls] <- mean(r[, pls], na.rm = TRUE) - s[, pls] <- sqrt(sum((r[, pls] - z[, pls]) ^ 2, na.rm = TRUE) / Ytottot) + s[, pls] <- sqrt(sum((r[, pls] - z[, pls])^2, na.rm = TRUE) / sum(y)) r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] - + # Step 6. Take the standardized scores as the new component comp[, pls] <- r[, pls] - - # Step 7. Regress the environmental variable (xJ on the components obtained - # so far using weights and take the fitted values as current estimates + + # Step 7. Regress the environmental variable (xJ on the components obtained + # so far using weights and take the fitted values as current estimates if (usefx == FALSE) { - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = sumk_yik / Ytottot) - } else{ - - if(fx_method=="bin"){ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx(x,bin) ) - }else{ - lm <- MASS::rlm(modern_climate ~ comp[, 1:pls], - weights = 1 / fxTWAPLS::fx_pspline(x,bin) ) + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = sumk_yik / sum(y) + ) + } else { + if (fx_method == "bin") { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx(x, bin) + ) + } else { + lm <- MASS::rlm( + modern_climate ~ comp[, 1:pls], + weights = 1 / fxTWAPLS::fx_pspline(x, bin) + ) } } - + fit[, pls] <- lm[["fitted.values"]] alpha[[pls]] <- lm[["coefficients"]] - + u_sd[, pls] <- (u[, pls] - z[, pls]) / s[, pls] optimum[, pls] <- alpha[[pls]][1] + u_sd[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - - list <- list(fit, - modern_climate, - colnames(modern_taxa), - optimum, - comp, - u, - t, - z, - s, - orth, - alpha, - mean(modern_climate), - nPLS) - names(list) <- c("fit", - "x", - "taxon_name", - "optimum", - "comp", - "u", - "t", - "z", - "s", - "orth", - "alpha", - "meanx", - "nPLS") + + list <- list( + fit, + modern_climate, + colnames(modern_taxa), + optimum, + comp, + u, + t, + z, + s, + orth, + alpha, + mean(modern_climate), + nPLS + ) + names(list) <- c( + "fit", + "x", + "taxon_name", + "optimum", + "comp", + "u", + "t", + "z", + "s", + "orth", + "alpha", + "meanx", + "nPLS" + ) return(list) } #' WA-PLS predict function #' -#' @param WAPLSoutput The output of the \code{\link{WAPLS.w}} training function, +#' @param WAPLSoutput The output of the \code{\link{WAPLS.w}} training function, #' either with or without \code{fx} correction. -#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, -#' each row represents a site to be reconstructed, each column represents a +#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, +#' each row represents a site to be reconstructed, each column represents a #' taxon. #' -#' @return A list of the reconstruction results. Each element in the list is +#' @return A list of the reconstruction results. Each element in the list is #' described below: #' \describe{ #' \item{\code{fit}}{The fitted values using each number of components.} #' \item{\code{nPLS}}{The total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' +#' #' # Load reconstruction data #' Holocene <- read.csv("/path/to/Holocene.csv") #' taxaColMin <- which(colnames(Holocene) == "taxa0") #' taxaColMax <- which(colnames(Holocene) == "taxaN") #' core <- Holocene[, taxaColMin:taxaColMax] -#' -#' # MTCO +#' #' ## Train #' fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) +#' fit_f_Tmin <- fxTWAPLS::WAPLS.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) +#' fit_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) +#' fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) #' ## Predict #' fossil_Tmin <- fxTWAPLS::WAPLS.predict.w(fit_Tmin, core) #' fossil_f_Tmin <- fxTWAPLS::WAPLS.predict.w(fit_f_Tmin, core) +#' fossil_Tmin2 <- fxTWAPLS::WAPLS.predict.w(fit_Tmin2, core) +#' fossil_f_Tmin2 <- fxTWAPLS::WAPLS.predict.w(fit_f_Tmin2, core) #' } -#' +#' #' @seealso \code{\link{WAPLS.w}} WAPLS.predict.w <- function(WAPLSoutput, fossil_taxa) { y <- fossil_taxa y <- as.matrix(y) + y <- y / rowSums(y) + nc <- ncol(fossil_taxa) nr <- nrow(fossil_taxa) - Ytottot <- sum(y) + sumk_yik <- rowSums(y) - sumi_yik <- colSums(y) - + nPLS <- WAPLSoutput[["nPLS"]] - meanx <- WAPLSoutput[["meanx"]] u <- WAPLSoutput[["u"]] z <- WAPLSoutput[["z"]] s <- WAPLSoutput[["s"]] orth <- WAPLSoutput[["orth"]] alpha <- WAPLSoutput[["alpha"]] - + if (nc != nrow(u)) { print("Number of taxa doesn't match!") } if (all(colnames(fossil_taxa) == WAPLSoutput[["taxon_name"]]) == FALSE) { print("Taxa don't match!") } - + # Define some matrix to store the values fit <- matrix(NA, nr, nPLS) r <- matrix(NA, nr, nPLS) comp <- matrix(NA, nr, nPLS) - + pls <- 1 # xi=sumk_yik*uk/sumk_yik; 1*nsite r[, pls] <- y %*% u[, pls] / sumk_yik # xi=sumk_yik*uk/sumk_yik; 1*nsite @@ -1175,7 +1259,7 @@ WAPLS.predict.w <- function(WAPLSoutput, fossil_taxa) { # multiply the same regression coefficients comp[, pls] <- r[, pls] fit[, 1] <- alpha[[pls]][1] + comp[, pls] * alpha[[pls]][2] - + for (pls in 2:nPLS) { # xi = sumk_yik*uk/sumk_yik; 1*nsite r[, pls] <- y %*% u[, pls] / sumk_yik # xi = sumk_yik*uk/sumk_yik; 1*nsite @@ -1186,7 +1270,7 @@ WAPLS.predict.w <- function(WAPLSoutput, fossil_taxa) { xinew <- xi - orth[[pls]][pls - j] * fi } r[, pls] <- xinew - + # standardize the same way r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] # multiply the same regression coefficients @@ -1194,7 +1278,7 @@ WAPLS.predict.w <- function(WAPLSoutput, fossil_taxa) { fit[, pls] <- alpha[[pls]][1] + comp[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - + list <- list(fit, nPLS) names(list) <- c(c("fit", "nPLS")) return(list) @@ -1202,97 +1286,106 @@ WAPLS.predict.w <- function(WAPLSoutput, fossil_taxa) { #' TWA-PLS predict function #' -#' @param TWAPLSoutput The output of the \code{\link{TWAPLS.w}} training +#' @param TWAPLSoutput The output of the \code{\link{TWAPLS.w}} training #' function, either with or without \code{fx} correction. -#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, -#' each row represents a site to be reconstructed, each column represents +#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, +#' each row represents a site to be reconstructed, each column represents #' a taxon. #' -#' @return A list of the reconstruction results. Each element in the list is +#' @return A list of the reconstruction results. Each element in the list is #' described below: #' \describe{ #' \item{\code{fit}}{the fitted values using each number of components.} #' \item{\code{nPLS}}{the total number of components extracted.} #' } -#' +#' #' @export #' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' +#' #' # Load reconstruction data #' Holocene <- read.csv("/path/to/Holocene.csv") #' taxaColMin <- which(colnames(Holocene) == "taxa0") #' taxaColMax <- which(colnames(Holocene) == "taxaN") #' core <- Holocene[, taxaColMin:taxaColMax] -#' -#' # MTCO +#' #' ## Train #' fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) -#' +#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) +#' fit_t_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) +#' fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) +#' #' ## Predict #' fossil_t_Tmin <- fxTWAPLS::TWAPLS.predict.w(fit_t_Tmin, core) #' fossil_tf_Tmin <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin, core) +#' fossil_t_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_t_Tmin2, core) +#' fossil_tf_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin2, core) #' } -#' +#' #' @seealso \code{\link{TWAPLS.w}} TWAPLS.predict.w <- function(TWAPLSoutput, fossil_taxa) { y <- fossil_taxa y <- as.matrix(y) + y <- y / rowSums(y) + nc <- ncol(fossil_taxa) nr <- nrow(fossil_taxa) - Ytottot <- sum(y) - sumk_yik <- rowSums(y) - sumi_yik <- colSums(y) - nPLS <- TWAPLSoutput[["nPLS"]] - meanx <- TWAPLSoutput[["meanx"]] u <- TWAPLSoutput[["u"]] t <- TWAPLSoutput[["t"]] z <- TWAPLSoutput[["z"]] s <- TWAPLSoutput[["s"]] orth <- TWAPLSoutput[["orth"]] alpha <- TWAPLSoutput[["alpha"]] - + if (nc != nrow(u)) { print("Number of taxa doesn't match!") } if (all(colnames(fossil_taxa) == TWAPLSoutput[["taxon_name"]]) == FALSE) { print("Taxa don't match!") } - + # Define some matrix to store the values fit <- matrix(NA, nr, nPLS) r <- matrix(NA, nr, nPLS) comp <- matrix(NA, nr, nPLS) - + pls <- 1 - # xi=sumk_yik*uk/sumk_yik; 1*nsite - # xi; 1*nsite - r[, pls] <- (y %*% (u[, pls] / t[, pls] ^ 2)) / (y %*% (1 / t[, pls] ^ 2)) + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + # standardize the same way r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] # multiply the same regression coefficients comp[, pls] <- r[, pls] fit[, 1] <- alpha[[pls]][1] + comp[, pls] * alpha[[pls]][2] - + for (pls in 2:nPLS) { - # xi=sumk_yik*uk/sumk_yik; 1*nsite - # xi; 1*nsite - r[, pls] <- (y %*% (u[, pls] / t[, pls] ^ 2)) / (y %*% (1 / t[, pls] ^ 2)) + taxa_to_keep <- which(t[, pls] != 0) # remove those taxa with 0 tolerance + r[, pls] <- (y[, taxa_to_keep] %*% (u[taxa_to_keep, pls] / t[taxa_to_keep, pls]^2)) / (y[, taxa_to_keep] %*% (1 / t[taxa_to_keep, pls]^2)) + # orthoganlization the same way for (j in 1:(pls - 1)) { fi <- r[, pls - j] @@ -1300,7 +1393,7 @@ TWAPLS.predict.w <- function(TWAPLSoutput, fossil_taxa) { xinew <- xi - orth[[pls]][pls - j] * fi } r[, pls] <- xinew - + # standardize the same way r[, pls] <- (r[, pls] - z[, pls]) / s[, pls] # multiply the same regression coefficients @@ -1308,35 +1401,35 @@ TWAPLS.predict.w <- function(TWAPLSoutput, fossil_taxa) { fit[, pls] <- alpha[[pls]][1] + comp[, 1:pls] %*% as.matrix(alpha[[pls]][2:(pls + 1)]) } - + list <- list(fit, nPLS) names(list) <- c(c("fit", "nPLS")) return(list) } #' Calculate Sample Specific Errors -#' +#' #' @importFrom foreach %dopar% #' -#' @param modern_taxa The modern taxa abundance data, each row represents a +#' @param modern_taxa The modern taxa abundance data, each row represents a #' sampling site, each column represents a taxon. #' @param modern_climate The modern climate value at each sampling site -#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, -#' each row represents a site to be reconstructed, each column represents a +#' @param fossil_taxa Fossil taxa abundance data to reconstruct past climates, +#' each row represents a site to be reconstructed, each column represents a #' taxon. -#' @param trainfun Training function you want to use, either +#' @param trainfun Training function you want to use, either #' \code{\link{WAPLS.w}} or \code{\link{TWAPLS.w}}. -#' @param predictfun Predict function you want to use: if \code{trainfun} is -#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; -#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be +#' @param predictfun Predict function you want to use: if \code{trainfun} is +#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; +#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be #' \code{\link{TWAPLS.predict.w}}. #' @param nboot The number of bootstrap cycles you want to use. #' @param nPLS The number of components to be extracted. -#' @param nsig The significant number of components to use to reconstruct past +#' @param nsig The significant number of components to use to reconstruct past #' climates, this can be obtained from the cross-validation results. #' @param usefx Boolean flag on whether or not use \code{fx} correction. -#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if -#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, +#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if +#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, #' \code{\link{fx}} function will be used when choosing "bin"; #' \code{\link{fx_pspline}} function will be used when choosing "pspline". #' @param bin Binwidth to get fx, needed for both binned and p-splined method. @@ -1355,85 +1448,59 @@ TWAPLS.predict.w <- function(TWAPLSoutput, fossil_taxa) { #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' +#' #' # Load reconstruction data #' Holocene <- read.csv("/path/to/Holocene.csv") #' taxaColMin <- which(colnames(Holocene) == "taxa0") #' taxaColMax <- which(colnames(Holocene) == "taxaN") #' core <- Holocene[, taxaColMin:taxaColMax] -#' +#' #' ## SSE #' nboot <- 5 # Recommended 1000 -#' ### without fx -#' sse_t_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, -#' modern_climate = modern_pollen$Tmin, -#' fossil_taxa = core, -#' trainfun = fxTWAPLS::TWAPLS.w, -#' predictfun = -#' fxTWAPLS::TWAPLS.predict.w, -#' nboot = nboot, -#' nPLS = 5, -#' nsig = 3, -#' usefx = FALSE, -#' cpus = 2, -#' seed = 1) -#' ### with fx -#' sse_tf_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, -#' modern_climate = -#' modern_pollen$Tmin, -#' fossil_taxa = core, -#' trainfun = fxTWAPLS::TWAPLS.w, -#' predictfun = -#' fxTWAPLS::TWAPLS.predict.w, -#' nboot = nboot, -#' nPLS = 5, -#' nsig = 3, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, -#' seed = 1) -#' +#' nsig <- 3 # This should be got from the random t-test of the cross validation +#' +#' sse_tf_Tmin2 <- fxTWAPLS::sse.sample( +#' modern_taxa = taxa, +#' modern_climate = modern_pollen$Tmin, +#' fossil_taxa = core, +#' trainfun = fxTWAPLS::TWAPLS.w2, +#' predictfun = fxTWAPLS::TWAPLS.predict.w, +#' nboot = nboot, +#' nPLS = 5, +#' nsig = nsig, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, +#' seed = 1 +#' ) +#' #' # Run with progress bar #' `%>%` <- magrittr::`%>%` -#' ### without fx -#' sse_t_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, -#' modern_climate = modern_pollen$Tmin, -#' fossil_taxa = core, -#' trainfun = fxTWAPLS::TWAPLS.w, -#' predictfun = -#' fxTWAPLS::TWAPLS.predict.w, -#' nboot = nboot, -#' nPLS = 5, -#' nsig = 3, -#' usefx = FALSE, -#' cpus = 2, -#' seed = 1) %>% fxTWAPLS::pb() -#' ### with fx -#' sse_tf_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, -#' modern_climate = -#' modern_pollen$Tmin, -#' fossil_taxa = core, -#' trainfun = fxTWAPLS::TWAPLS.w, -#' predictfun = -#' fxTWAPLS::TWAPLS.predict.w, -#' nboot = nboot, -#' nPLS = 5, -#' nsig = 3, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, -#' seed = 1) %>% fxTWAPLS::pb() +#' sse_tf_Tmin2 <- fxTWAPLS::sse.sample( +#' modern_taxa = taxa, +#' modern_climate = modern_pollen$Tmin, +#' fossil_taxa = core, +#' trainfun = fxTWAPLS::TWAPLS.w2, +#' predictfun = fxTWAPLS::TWAPLS.predict.w, +#' nboot = nboot, +#' nPLS = 5, +#' nsig = nsig, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, +#' seed = 1 +#' ) %>% fxTWAPLS::pb() #' } -#' -#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, -#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and +#' +#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, +#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and #' \code{\link{WAPLS.predict.w}} sse.sample <- function(modern_taxa, modern_climate, @@ -1454,24 +1521,25 @@ sse.sample <- function(modern_taxa, # Check the number of CPUs does not exceed the availability avail_cpus <- parallel::detectCores() - 1 cpus <- ifelse(cpus > avail_cpus, avail_cpus, cpus) - + # Start parallel backend - # cl <- parallel::makeCluster(cpus) - # on.exit(parallel::stopCluster(cl), add = TRUE) # Stop cluster doFuture::registerDoFuture() - # oplan <- future::plan(future::cluster, workers = cl) oplan <- future::plan(future::multisession, workers = cpus) on.exit(future::plan(oplan), add = TRUE) - + # Set seed for reproducibility set.seed(seed) - - # Make list of row numbers by sampling with - # replacement - k_samples <- replicate(nboot, sample(1:nrow(modern_taxa), - size = nrow(modern_taxa), - replace = TRUE)) - + + # Make list of row numbers by sampling with replacement + k_samples <- replicate( + nboot, + sample( + seq_len(nrow(modern_taxa)), + size = nrow(modern_taxa), + replace = TRUE + ) + ) + # Create list of indices to loop through idx <- 1:nboot # Reduce the list of indices, if test_mode = TRUE @@ -1480,70 +1548,82 @@ sse.sample <- function(modern_taxa, } # Set up progress API p <- progressr::progressor(along = idx) - xboot <- foreach::foreach(i = idx, - .combine = cbind) %dopar% { - tryCatch({ - # Extract list of row numbers by sampling with - # replacement - k <- k_samples[, i] - - # Reorganise modern_taxa obs in k order - modern_taxak <- modern_taxa[k, ] - modern_climatek <- modern_climate[k] - # Strip out cols with no value or one value - modern_taxak <- - modern_taxak[, which(colSums(modern_taxak>0)>=2)] - - # Apply train function, with modern_climate also - # in k order - if (usefx == FALSE) { - mod <- trainfun(modern_taxak, - modern_climatek, - nPLS = nPLS) - } else { - mod <- trainfun(modern_taxak, - modern_climatek, - nPLS = nPLS, - usefx = TRUE, - fx_method = fx_method, - bin = bin) - } - - # Make reconstruction - out <- - predictfun(mod, - fossil_taxa[, which(colSums(modern_taxa[k, ] >0) - >=2)])$fit[, nsig] - p() - out - }, error=function(e){}) - } - + xboot <- foreach::foreach( + i = idx, + .combine = cbind + ) %dopar% { + tryCatch( + { + # Extract list of row numbers by sampling with + # replacement + k <- k_samples[, i] + + # Reorganise modern_taxa obs in k order + modern_taxak <- modern_taxa[k, ] + modern_climatek <- modern_climate[k] + # Strip out cols with no value or one value + modern_taxak <- + modern_taxak[, which(colSums(modern_taxak > 0) >= 2)] + + # Apply train function, with modern_climate also + # in k order + if (usefx == FALSE) { + mod <- trainfun( + modern_taxak, + modern_climatek, + nPLS = nPLS + ) + } else { + mod <- trainfun( + modern_taxak, + modern_climatek, + nPLS = nPLS, + usefx = TRUE, + fx_method = fx_method, + bin = bin + ) + } + + # Make reconstruction + out <- + predictfun( + mod, + fossil_taxa[, which(colSums(modern_taxa[k, ] > 0) >= 2)] + )$fit[, nsig] + p() + out + }, + error = function(e) { + + } + ) + } + avg.xboot <- rowMeans(xboot, na.rm = TRUE) - v1 <- boot.mean.square <- rowMeans((xboot - avg.xboot) ^ 2 , na.rm = TRUE) - return(sqrt(v1)) + boot.mean.square <- rowMeans((xboot - avg.xboot)^2, na.rm = TRUE) + return(sqrt(boot.mean.square)) } #' Leave-one-out cross-validation -#' -#' Leave-one-out cross-validation as +#' +#' Leave-one-out cross-validation as #' \code{rioja} (\url{https://cran.r-project.org/package=rioja}). -#' +#' #' @importFrom foreach %dopar% -#' -#' @param modern_taxa The modern taxa abundance data, each row represents a +#' +#' @param modern_taxa The modern taxa abundance data, each row represents a #' sampling site, each column represents a taxon. #' @param modern_climate The modern climate value at each sampling site. #' @param nPLS The number of components to be extracted. -#' @param trainfun Training function you want to use, either +#' @param trainfun Training function you want to use, either #' \code{\link{WAPLS.w}} or \code{\link{TWAPLS.w}}. -#' @param predictfun Predict function you want to use: if \code{trainfun} is -#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; -#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be +#' @param predictfun Predict function you want to use: if \code{trainfun} is +#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; +#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be #' \code{\link{TWAPLS.predict.w}}. #' @param usefx Boolean flag on whether or not use \code{fx} correction. -#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if -#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, +#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if +#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, #' \code{\link{fx}} function will be used when choosing "bin"; #' \code{\link{fx_pspline}} function will be used when choosing "pspline". #' @param bin Binwidth to get fx, needed for both binned and p-splined method. @@ -1561,58 +1641,44 @@ sse.sample <- function(modern_taxa, #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' +#' #' ## LOOCV #' test_mode <- TRUE # It should be set to FALSE before running -#' ### without fx -#' cv_t_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' ### with fx -#' cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' +#' cv_tf_Tmin2 <- fxTWAPLS::cv.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' fxTWAPLS::TWAPLS.w2, +#' fxTWAPLS::TWAPLS.predict.w, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) +#' #' # Run with progress bar #' `%>%` <- magrittr::`%>%` -#' ### without fx -#' cv_t_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() -#' ### with fx -#' cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() +#' cv_tf_Tmin2 <- fxTWAPLS::cv.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' fxTWAPLS::TWAPLS.w2, +#' fxTWAPLS::TWAPLS.predict.w, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) %>% fxTWAPLS::pb() #' } -#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, -#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and +#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, +#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and #' \code{\link{WAPLS.predict.w}} cv.w <- function(modern_taxa, modern_climate, @@ -1628,19 +1694,16 @@ cv.w <- function(modern_taxa, i <- NULL # Local binding x <- modern_climate y <- modern_taxa - + # Check the number of CPUs does not exceed the availability avail_cpus <- parallel::detectCores() - 1 cpus <- ifelse(cpus > avail_cpus, avail_cpus, cpus) - + # Start parallel backend - # cl <- parallel::makeCluster(cpus) - # on.exit(parallel::stopCluster(cl), add = TRUE) # Stop cluster doFuture::registerDoFuture() - # oplan <- future::plan(future::cluster, workers = cl) oplan <- future::plan(future::multisession, workers = cpus) on.exit(future::plan(oplan), add = TRUE) - + # Create list of indices to loop through idx <- seq_len(length(x)) # Reduce the list of indices, if test_mode = TRUE @@ -1649,117 +1712,128 @@ cv.w <- function(modern_taxa, } # Set up progress API p <- progressr::progressor(along = idx) - all.cv.out <- foreach::foreach(i = idx, - .combine = rbind, #comb_pb(max(idx)), - .verbose = FALSE) %dopar% { - # Strip out cols with no value or one value - cvtaxa=y[-i,] - cvtaxa <- cvtaxa[, which(colSums(cvtaxa>0)>=2)] - - fit <- trainfun(modern_taxa=cvtaxa, - modern_climate=x[-i], - nPLS=nPLS, - usefx=usefx, - fx_method=fx_method, - bin=bin) - xnew <- predictfun(fit, - y[i, which(colSums(y[-i,]>0)>=2)])[["fit"]] - p() - data.frame(x[i], xnew) - } + all.cv.out <- foreach::foreach( + i = idx, + .combine = rbind, + .verbose = FALSE + ) %dopar% { + # Strip out cols with no value or one value + cvtaxa <- y[-i, ] + cvtaxa <- cvtaxa[, which(colSums(cvtaxa > 0) >= 2)] + + fit <- trainfun( + modern_taxa = cvtaxa, + modern_climate = x[-i], + nPLS = nPLS, + usefx = usefx, + fx_method = fx_method, + bin = bin + ) + xnew <- predictfun( + fit, + y[i, which(colSums(y[-i, ] > 0) >= 2)] + )[["fit"]] + p() + data.frame(x[i], xnew) + } colnames(all.cv.out) <- c("test.x", paste0("comp", 1:nPLS)) return(all.cv.out) } #' Get the distance between points -#' -#' Get the distance between points, the output will be used in +#' +#' Get the distance between points, the output will be used in #' \code{\link{get_pseudo}}. -#' +#' #' @importFrom foreach %dopar% -#' -#' @param point Each row represents a sampling site, the first column is +#' +#' @param point Each row represents a sampling site, the first column is #' longitude and the second column is latitude, both in decimal format. #' @param cpus Number of CPUs for simultaneous iterations to execute, check #' \code{parallel::detectCores()} for available CPUs on your machine. #' @param test_mode Boolean flag to execute the function with a limited number #' of iterations, \code{test_it}, for testing purposes only. #' @param test_it Number of iterations to use in the test mode. -#' -#' @return Distance matrix, the value at the \code{i-th} row, means the distance +#' +#' @return Distance matrix, the value at the \code{i-th} row, means the distance #' between the \code{i-th} sampling site and the whole sampling sites. #' @export -#' +#' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' point <- modern_pollen[, c("Long", "Lat")] #' test_mode <- TRUE # It should be set to FALSE before running -#' dist <- fxTWAPLS::get_distance(point, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) +#' dist <- fxTWAPLS::get_distance( +#' point, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) #' # Run with progress bar #' `%>%` <- magrittr::`%>%` -#' dist <- fxTWAPLS::get_distance(point, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() +#' dist <- fxTWAPLS::get_distance( +#' point, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) %>% +#' fxTWAPLS::pb() #' } -#' +#' #' @seealso \code{\link{get_pseudo}} get_distance <- function(point, cpus = 4, test_mode = FALSE, test_it = 5) { i <- NULL # Local binding colnames(point) <- c("Long", "Lat") - + # Check the number of CPUs does not exceed the availability avail_cpus <- parallel::detectCores() - 1 cpus <- ifelse(cpus > avail_cpus, avail_cpus, cpus) - + # Start parallel backend - # cl <- parallel::makeCluster(cpus) - # on.exit(parallel::stopCluster(cl), add = TRUE) # Stop cluster doFuture::registerDoFuture() - # oplan <- future::plan(future::cluster, workers = cl) oplan <- future::plan(future::multisession, workers = cpus) on.exit(future::plan(oplan), add = TRUE) - + # Create list of indices to loop through idx <- seq_len(nrow(point)) - + # Reduce the list of indices, if test_mode = TRUE if (test_mode) { idx <- 1:test_it } - + # Set up progress API p <- progressr::progressor(along = idx) - - dist <- foreach::foreach(i = idx, - .combine = rbind) %dopar% { - tmp <- rep(0, nrow(point)) - lon1 <- point[i, "Long"] - lat1 <- point[i, "Lat"] - for (j in seq_len(nrow(point))) { - lon2 <- point[j, "Long"] - lat2 <- point[j, "Lat"] - tmp[j] <- geosphere::distm(c(lon1, lat1), - c(lon2, lat2), - fun = geosphere::distHaversine) - } - p() - tmp - } + + dist <- foreach::foreach( + i = idx, + .combine = rbind + ) %dopar% { + tmp <- rep(0, nrow(point)) + lon1 <- point[i, "Long"] + lat1 <- point[i, "Lat"] + for (j in seq_len(nrow(point))) { + lon2 <- point[j, "Long"] + lat2 <- point[j, "Lat"] + tmp[j] <- geosphere::distm(c(lon1, lat1), + c(lon2, lat2), + fun = geosphere::distHaversine + ) + } + p() + tmp + } return(dist) } #' Get geographically and climatically close sites -#' -#' Get the sites which are both geographically and climatically close to the -#' test site, which could result in pseudo-replication and inflate the -#' cross-validation statistics. The output will be used in +#' +#' Get the sites which are both geographically and climatically close to the +#' test site, which could result in pseudo-replication and inflate the +#' cross-validation statistics. The output will be used in #' \code{\link{cv.pr.w}}. -#' +#' #' @importFrom foreach %dopar% #' #' @param dist Distance matrix which contains the distance from other sites. @@ -1769,7 +1843,7 @@ get_distance <- function(point, cpus = 4, test_mode = FALSE, test_it = 5) { #' @param test_mode Boolean flag to execute the function with a limited number #' of iterations, \code{test_it}, for testing purposes only. #' @param test_it Number of iterations to use in the test mode. -#' +#' #' @return The geographically and climatically close sites to each test site. #' @export #' @@ -1777,22 +1851,29 @@ get_distance <- function(point, cpus = 4, test_mode = FALSE, test_it = 5) { #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' point <- modern_pollen[, c("Long", "Lat")] #' test_mode <- TRUE # It should be set to FALSE before running -#' dist <- fxTWAPLS::get_distance(point, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, -#' modern_pollen$Tmin, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) +#' dist <- fxTWAPLS::get_distance( +#' point, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) +#' pseudo_Tmin <- fxTWAPLS::get_pseudo( +#' dist, +#' modern_pollen$Tmin, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) #' # Run with progress bar #' `%>%` <- magrittr::`%>%` -#' pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, -#' modern_pollen$Tmin, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() +#' pseudo_Tmin <- fxTWAPLS::get_pseudo( +#' dist, +#' modern_pollen$Tmin, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) %>% +#' fxTWAPLS::pb() #' } #' @seealso \code{\link{get_distance}} get_pseudo <- function(dist, x, cpus = 4, test_mode = FALSE, test_it = 5) { @@ -1800,22 +1881,19 @@ get_pseudo <- function(dist, x, cpus = 4, test_mode = FALSE, test_it = 5) { # Check the number of CPUs does not exceed the availability avail_cpus <- parallel::detectCores() - 1 cpus <- ifelse(cpus > avail_cpus, avail_cpus, cpus) - + # Start parallel backend - # cl <- parallel::makeCluster(cpus) - # on.exit(parallel::stopCluster(cl), add = TRUE) # Stop cluster doFuture::registerDoFuture() - # oplan <- future::plan(future::cluster, workers = cl) oplan <- future::plan(future::multisession, workers = cpus) on.exit(future::plan(oplan), add = TRUE) - + # Create list of indices to loop through idx <- seq_len(length(x)) # Reduce the list of indices, if test_mode = TRUE if (test_mode) { idx <- 1:test_it } - + # Set up progress API p <- progressr::progressor(along = idx) pseudo <- foreach::foreach(i = idx) %dopar% { @@ -1827,24 +1905,24 @@ get_pseudo <- function(dist, x, cpus = 4, test_mode = FALSE, test_it = 5) { } #' Pseudo-removed leave-out cross-validation -#' +#' #' @importFrom foreach %dopar% -#' -#' @param modern_taxa The modern taxa abundance data, each row represents a +#' +#' @param modern_taxa The modern taxa abundance data, each row represents a #' sampling site, each column represents a taxon. #' @param modern_climate The modern climate value at each sampling site. #' @param nPLS The number of components to be extracted. -#' @param trainfun Training function you want to use, either +#' @param trainfun Training function you want to use, either #' \code{\link{WAPLS.w}} or \code{\link{TWAPLS.w}}. -#' @param predictfun Predict function you want to use: if \code{trainfun} is -#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; -#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be +#' @param predictfun Predict function you want to use: if \code{trainfun} is +#' \code{\link{WAPLS.w}}, then this should be \code{\link{WAPLS.predict.w}}; +#' if \code{trainfun} is \code{\link{TWAPLS.w}}, then this should be #' \code{\link{TWAPLS.predict.w}}. -#' @param pseudo The geographically and climatically close sites to each test +#' @param pseudo The geographically and climatically close sites to each test #' site, obtained from \code{\link{get_pseudo}} function. #' @param usefx Boolean flag on whether or not use \code{fx} correction. -#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if -#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, +#' @param fx_method Binned or p-spline smoothed \code{fx} correction: if +#' \code{usefx = FALSE}, this should be \code{NA}; otherwise, #' \code{\link{fx}} function will be used when choosing "bin"; #' \code{\link{fx_pspline}} function will be used when choosing "pspline". #' @param bin Binwidth to get fx, needed for both binned and p-splined method. @@ -1862,68 +1940,60 @@ get_pseudo <- function(dist, x, cpus = 4, test_mode = FALSE, test_it = 5) { #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' +#' #' point <- modern_pollen[, c("Long", "Lat")] #' test_mode <- TRUE # It should be set to FALSE before running -#' dist <- fxTWAPLS::get_distance(point, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, -#' modern_pollen$Tmin, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' -#' cv_pr_t_Tmin <- fxTWAPLS::cv.pr.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' pseudo_Tmin, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' cv_pr_tf_Tmin <- fxTWAPLS::cv.pr.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' pseudo_Tmin, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) -#' +#' dist <- fxTWAPLS::get_distance( +#' point, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) +#' pseudo_Tmin <- fxTWAPLS::get_pseudo( +#' dist, +#' modern_pollen$Tmin, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) +#' +#' cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' fxTWAPLS::TWAPLS.w2, +#' fxTWAPLS::TWAPLS.predict.w, +#' pseudo_Tmin, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) +#' #' # Run with progress bar #' `%>%` <- magrittr::`%>%` -#' cv_pr_t_Tmin <- fxTWAPLS::cv.pr.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' pseudo_Tmin, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() -#' cv_pr_tf_Tmin <- fxTWAPLS::cv.pr.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' pseudo_Tmin, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() -#' +#' cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' fxTWAPLS::TWAPLS.w2, +#' fxTWAPLS::TWAPLS.predict.w, +#' pseudo_Tmin, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02, +#' cpus = 2, # Remove the following line +#' test_mode = test_mode +#' ) %>% +#' fxTWAPLS::pb() #' } -#' -#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, -#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and +#' +#' @seealso \code{\link{fx}}, \code{\link{TWAPLS.w}}, +#' \code{\link{TWAPLS.predict.w}}, \code{\link{WAPLS.w}}, and #' \code{\link{WAPLS.predict.w}} cv.pr.w <- function(modern_taxa, modern_climate, @@ -1940,19 +2010,16 @@ cv.pr.w <- function(modern_taxa, i <- NULL # Local binding x <- modern_climate y <- modern_taxa - + # Check the number of CPUs does not exceed the availability avail_cpus <- parallel::detectCores() - 1 cpus <- ifelse(cpus > avail_cpus, avail_cpus, cpus) - + # Start parallel backend - # cl <- parallel::makeCluster(cpus) - # on.exit(parallel::stopCluster(cl), add = TRUE) # Stop cluster doFuture::registerDoFuture() - # oplan <- future::plan(future::cluster, workers = cl) oplan <- future::plan(future::multisession, workers = cpus) on.exit(future::plan(oplan), add = TRUE) - + # Create list of indices to loop through idx <- seq_len(length(x)) # Reduce the list of indices, if test_mode = TRUE @@ -1961,132 +2028,115 @@ cv.pr.w <- function(modern_taxa, } # Set up progress API p <- progressr::progressor(along = idx) - all.cv.out <- foreach::foreach(i = idx, - .combine = rbind) %dopar% { - leave <- unlist(pseudo[i]) - # Strip out cols with no value or one value - cvtaxa=y[-leave,] - cvtaxa <- cvtaxa[, which(colSums(cvtaxa>0)>=2)] - - fit <- trainfun(modern_taxa=cvtaxa, - modern_climate=x[-leave], - nPLS=nPLS, - usefx=usefx, - fx_method=fx_method, - bin=bin) - xnew <- predictfun(fit, - y[i, which(colSums(y[-leave,]>0)>=2)])[["fit"]] - p() - data.frame(x[i], xnew) - } - + all.cv.out <- foreach::foreach( + i = idx, + .combine = rbind + ) %dopar% { + leave <- unlist(pseudo[i]) + # Strip out cols with no value or one value + cvtaxa <- y[-leave, ] + cvtaxa <- cvtaxa[, which(colSums(cvtaxa > 0) >= 2)] + + fit <- trainfun( + modern_taxa = cvtaxa, + modern_climate = x[-leave], + nPLS = nPLS, + usefx = usefx, + fx_method = fx_method, + bin = bin + ) + xnew <- predictfun( + fit, + y[i, which(colSums(y[-leave, ] > 0) >= 2)] + )[["fit"]] + p() + data.frame(x[i], xnew) + } + # assign column names to all.cv.out colnames(all.cv.out) <- c("test.x", paste0("comp", 1:nPLS)) - return(all.cv.out) } #' Random t-test -#' +#' #' Do a random t-test to the cross-validation results. -#' +#' #' @importFrom stats cor lm rbinom -#' -#' @param cvoutput Cross-validation output either from \code{\link{cv.w}} or +#' +#' @param cvoutput Cross-validation output either from \code{\link{cv.w}} or #' \code{\link{cv.pr.w}}. -#' @param n.perm The number of permutation times to get the p value, which -#' assesses whether using the current number of components is significantly +#' @param n.perm The number of permutation times to get the p value, which +#' assesses whether using the current number of components is significantly #' different from using one less. #' -#' @return A matrix of the statistics of the cross-validation results. Each +#' @return A matrix of the statistics of the cross-validation results. Each #' component is described below: #' \describe{ -#' \item{\code{R2}}{the coefficient of determination (the larger, the +#' \item{\code{R2}}{the coefficient of determination (the larger, the #' better the fit).} #' \item{\code{Avg.Bias}}{average bias.} #' \item{\code{Max.Bias}}{maximum bias.} #' \item{\code{Min.Bias}}{minimum bias.} -#' \item{\code{RMSEP}}{root-mean-square error of prediction (the smaller, +#' \item{\code{RMSEP}}{root-mean-square error of prediction (the smaller, #' the better the fit).} -#' \item{\code{delta.RMSEP}}{the percent change of RMSEP using the current +#' \item{\code{delta.RMSEP}}{the percent change of RMSEP using the current #' number of components than using one component less.} -#' \item{\code{p}}{assesses whether using the current number of components -#' is significantly different from using one component less, which is used -#' to choose the last significant number of components to avoid +#' \item{\code{p}}{assesses whether using the current number of components +#' is significantly different from using one component less, which is used +#' to choose the last significant number of components to avoid #' over-fitting.} -#' \item{\code{-}}{The degree of overall compression is assessed by doing -#' linear regression to the cross-validation result and the observed +#' \item{\code{-}}{The degree of overall compression is assessed by doing +#' linear regression to the cross-validation result and the observed #' climate values. #' \itemize{ #' \item \code{Compre.b0}: the intercept. -#' \item \code{Compre.b1}: the slope (the closer to 1, the less the +#' \item \code{Compre.b1}: the slope (the closer to 1, the less the #' overall compression). #' \item \code{Compre.b0.se}: the standard error of the intercept. #' \item \code{Compre.b1.se}: the standard error of the slope. #' } #' } #' } -#' +#' #' @export #' #' @examples #' \dontrun{ -#' # Load modern pollen data -#' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' -#' # Extract taxa -#' taxaColMin <- which(colnames(modern_pollen) == "taxa0") -#' taxaColMax <- which(colnames(modern_pollen) == "taxaN") -#' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' ## LOOCV -#' test_mode <- TRUE # It should be set to FALSE before running -#' ### without fx -#' cv_t_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() -#' ### with fx -#' cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' fxTWAPLS::TWAPLS.w, -#' fxTWAPLS::TWAPLS.predict.w, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02, -#' cpus = 2, # Remove the following line -#' test_mode = test_mode) %>% fxTWAPLS::pb() -#' +#' #' ## Random t-test -#' rand_t_Tmin <- fxTWAPLS::rand.t.test.w(cv_t_Tmin, n.perm = 999) -#' rand_tf_Tmin <- fxTWAPLS::rand.t.test.w(cv_tf_Tmin, n.perm = 999) +#' rand_pr_tf_Tmin2 <- fxTWAPLS::rand.t.test.w(cv_pr_tf_Tmin2, n.perm = 999) +#' +#' # note: choose the last significant number of components based on the p-value, +#' # see details at Liu Mengmeng, Prentice Iain Colin, ter Braak Cajo J. F., +#' # Harrison Sandy P.. 2020 An improved statistical approach for reconstructing +#' # past climates from biotic assemblages. Proc. R. Soc. A. 476: 20200346. +#' # #' } -#' +#' #' @seealso \code{\link{cv.w}} and \code{\link{cv.pr.w}} rand.t.test.w <- function(cvoutput, n.perm = 999) { ncomp <- ncol(cvoutput) - 1 output <- matrix(NA, ncomp, 11) - colnames(output) <- c("R2", - "Avg.Bias", - "Max.Bias", - "Min.Bias", - "RMSEP", - "delta.RMSEP", - "p", - "Compre.b0", - "Compre.b1", - "Compre.b0.se", - "Compre.b1.se") - + colnames(output) <- c( + "R2", + "Avg.Bias", + "Max.Bias", + "Min.Bias", + "RMSEP", + "delta.RMSEP", + "p", + "Compre.b0", + "Compre.b1", + "Compre.b0.se", + "Compre.b1.se" + ) + for (i in 1:ncomp) { cv.x <- cvoutput[, 1] cv.i <- cvoutput[, 1 + i] - output[i, "RMSEP"] <- sqrt(mean((cv.i - cv.x) ^ 2)) - output[i, "R2"] <- cor(cv.i, cv.x) ^ 2 + output[i, "RMSEP"] <- sqrt(mean((cv.i - cv.x)^2)) + output[i, "R2"] <- cor(cv.i, cv.x)^2 output[i, "Avg.Bias"] <- mean(cv.i - cv.x) output[i, "Max.Bias"] <- max(abs(cv.i - cv.x)) output[i, "Min.Bias"] <- min(abs(cv.i - cv.x)) @@ -2098,7 +2148,7 @@ rand.t.test.w <- function(cvoutput, n.perm = 999) { # get delta.RMSEP for (i in 1:ncomp) { if (i == 1) { - rmsep.null <- sqrt(mean((cv.x - mean(cv.x)) ^ 2)) + rmsep.null <- sqrt(mean((cv.x - mean(cv.x))^2)) output[i, "delta.RMSEP"] <- (output[i, "RMSEP"] - rmsep.null) * 100 / rmsep.null } else { @@ -2106,17 +2156,17 @@ rand.t.test.w <- function(cvoutput, n.perm = 999) { (output[i, "RMSEP"] - output[i - 1, "RMSEP"]) * 100 / output[i - 1, "RMSEP"] } } - # get p-value, which describes whether using the number of components now has + # get p-value, which describes whether using the number of components now has # a significant difference than using one less e0 <- cv.x - mean(cv.x) e <- cbind(e0, cvoutput[, 2:ncol(cvoutput)] - cv.x) - + t.res <- vector("numeric", ncomp) t <- vector("numeric", n.perm + 1) t.res[] <- NA n <- nrow(e) for (i in 1:ncomp) { - d <- e[, i] ^ 2 - e[, i + 1] ^ 2 + d <- e[, i]^2 - e[, i + 1]^2 t[1] <- mean(d, na.rm = TRUE) for (j in 1:n.perm) { sig <- 2 * rbinom(n, 1, 0.5) - 1 @@ -2125,143 +2175,131 @@ rand.t.test.w <- function(cvoutput, n.perm = 999) { t.res[i] <- sum(t >= t[1]) / (n.perm + 1) } output[, "p"] <- t.res - + print(output) return(output) } #' Plot the training results -#' -#' Plot the training results, the black line is the 1:1 line, the red line is -#' the linear regression line to fitted and \code{x}, which shows the degree +#' +#' Plot the training results, the black line is the 1:1 line, the red line is +#' the linear regression line to fitted and \code{x}, which shows the degree #' of overall compression. -#' -#' @param train_output Training output, can be the output of WA-PLS, WA-PLS with +#' +#' @param train_output Training output, can be the output of WA-PLS, WA-PLS with #' \code{fx} correction, TWA-PLS, or TWA-PLS with \code{fx} correction. -#' @param col Choose which column of the fitted value to plot, in other words, +#' @param col Choose which column of the fitted value to plot, in other words, #' how many number of components you want to use. -#' +#' #' @export -#' +#' #' @return Plotting status. -#' +#' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # MTCO -#' ## WAPLS and fxWAPLS -#' fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) -#' ## TWAPLS and fxTWAPLS -#' fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) -#' fxTWAPLS::plot_train(fit_Tmin, 3) -#' fxTWAPLS::plot_train(fit_f_Tmin, 3) -#' fxTWAPLS::plot_train(fit_t_Tmin, 3) -#' fxTWAPLS::plot_train(fit_tf_Tmin, 3) +#' +#' fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) +#' +#' nsig <- 3 # This should be got from the random t-test of the cross validation +#' fxTWAPLS::plot_train(fit_tf_Tmin2, nsig) #' } -#' +#' #' @seealso \code{\link{TWAPLS.w}} and \code{\link{WAPLS.w}} plot_train <- function(train_output, col) { x <- train_output[["x"]] fitted <- train_output[["fit"]][, col] plotdata <- cbind.data.frame(x, fitted) - + max <- max(fitted, x) min <- min(fitted, x) - # plot the fitted curve, the black line is the 1:1 line, the red line is the + # plot the fitted curve, the black line is the 1:1 line, the red line is the # linear regression line to fitted and x, which shows the overall compression - ggplot2::ggplot(plotdata, ggplot2::aes(x, fitted)) + - ggplot2::geom_point(size = 0.4) + ggplot2::theme_bw() + - ggplot2::geom_abline(slope = 1, intercept = 0) + - ggplot2::xlim(min, max) + ggplot2::ylim(min, max) + - ggplot2::geom_smooth(method = 'lm', - formula = y ~ x, - color = 'red') + ggplot2::ggplot(plotdata, ggplot2::aes(x, fitted)) + + ggplot2::geom_point(size = 0.4) + + ggplot2::theme_bw() + + ggplot2::geom_abline(slope = 1, intercept = 0) + + ggplot2::xlim(min, max) + + ggplot2::ylim(min, max) + + ggplot2::geom_smooth( + method = "lm", + formula = y ~ x, + color = "red" + ) } #' Plot the residuals -#' -#' Plot the residuals, the black line is 0 line, the red line is the locally -#' estimated scatterplot smoothing, which shows the degree of local +#' +#' Plot the residuals, the black line is 0 line, the red line is the locally +#' estimated scatterplot smoothing, which shows the degree of local #' compression. -#' -#' @param train_output Training output, can be the output of WA-PLS, WA-PLS with +#' +#' @param train_output Training output, can be the output of WA-PLS, WA-PLS with #' \code{fx} correction, TWA-PLS, or TWA-PLS with \code{fx} correction -#' @param col Choose which column of the fitted value to plot, in other words, +#' @param col Choose which column of the fitted value to plot, in other words, #' how many number of components you want to use. -#' +#' #' @export -#' +#' #' @return Plotting status. -#' +#' #' @examples #' \dontrun{ #' # Load modern pollen data #' modern_pollen <- read.csv("/path/to/modern_pollen.csv") -#' +#' #' # Extract taxa #' taxaColMin <- which(colnames(modern_pollen) == "taxa0") #' taxaColMax <- which(colnames(modern_pollen) == "taxaN") #' taxa <- modern_pollen[, taxaColMin:taxaColMax] -#' -#' # MTCO -#' ## WAPLS and fxWAPLS -#' fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) -#' ## TWAPLS and fxTWAPLS -#' fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -#' fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, -#' modern_pollen$Tmin, -#' nPLS = 5, -#' usefx = TRUE, -#' fx_method = "bin", -#' bin = 0.02) -#' fxTWAPLS::plot_residuals(fit_Tmin, 3) -#' fxTWAPLS::plot_residuals(fit_f_Tmin, 3) -#' fxTWAPLS::plot_residuals(fit_t_Tmin, 3) -#' fxTWAPLS::plot_residuals(fit_tf_Tmin, 3) +#' +#' fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( +#' taxa, +#' modern_pollen$Tmin, +#' nPLS = 5, +#' usefx = TRUE, +#' fx_method = "bin", +#' bin = 0.02 +#' ) +#' +#' nsig <- 3 # This should be got from the random t-test of the cross validation +#' fxTWAPLS::plot_residuals(fit_tf_Tmin2, nsig) #' } -#' +#' #' @seealso \code{\link{TWAPLS.w}} and \code{\link{WAPLS.w}} plot_residuals <- function(train_output, col) { x <- train_output[["x"]] residuals <- train_output[["fit"]][, col] - train_output[["x"]] plotdata <- cbind.data.frame(x, residuals) - + maxr <- max(abs(residuals)) - # plot the residuals, the black line is 0 line, the red line is the locally + # plot the residuals, the black line is 0 line, the red line is the locally # estimated scatterplot smoothing, which shows the degree of local compression - ggplot2::ggplot(plotdata, ggplot2::aes(x, residuals)) + - ggplot2::geom_point(size = 0.4) + ggplot2::theme_bw() + - ggplot2::geom_abline(slope = 0, intercept = 0) + - ggplot2::xlim(min(x), max(x)) + ggplot2::ylim(-maxr, maxr) + - ggplot2::geom_smooth(method = 'loess', - color = 'red', - formula = "y ~ x", - se = FALSE) -} \ No newline at end of file + ggplot2::ggplot(plotdata, ggplot2::aes(x, residuals)) + + ggplot2::geom_point(size = 0.4) + + ggplot2::theme_bw() + + ggplot2::geom_abline(slope = 0, intercept = 0) + + ggplot2::xlim(min(x), max(x)) + + ggplot2::ylim(-maxr, maxr) + + ggplot2::geom_smooth( + method = "loess", + color = "red", + formula = "y ~ x", + se = FALSE + ) +} diff --git a/R/utils.R b/R/utils.R index 3c0e76b..cdbfde0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,6 +9,7 @@ pb <- function(expr, ...) { progress_bar <- progressr::handler_progress(format = "(:current/:total) [:bar] :percent") progressr::with_progress(expr, - ..., - handlers = progress_bar) + ..., + handlers = progress_bar + ) } diff --git a/README.Rmd b/README.Rmd index ffb2c7d..23ea3f3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,8 +12,7 @@ knitr::opts_chunk$set( ) ``` -## fxTWAPLS: An Improved Version of WA-PLS - +## fxTWAPLS: An Improved Version of WA-PLS logo @@ -55,15 +54,28 @@ remotes::install_github("special-uor/fxTWAPLS", "dev") ## Publications +- ___Latest:___ Liu, M., Shen, Y., González-Sampériz, P., Gil-Romera, G., + ter Braak, C. J. F., Prentice, I. C., and Harrison, S. P.: Holocene climates + of the Iberian Peninsula: pollen-based reconstructions of changes in the + west-east gradient of temperature and moisture, Clim. Past Discuss. + [preprint], , in review, 2021.- + [`fxTWAPLS v0.1.0`](https://github.com/special-uor/fxTWAPLS/releases/tag/v0.1.0/) + + ``` r + install.packages("remotes") + remotes::install_github("special-uor/fxTWAPLS@v0.1.0") + ``` + - Liu Mengmeng, Prentice Iain Colin, ter Braak Cajo J. F., Harrison Sandy P.. 2020 An improved statistical approach for reconstructing past climates from biotic assemblages. _Proc. R. Soc. A._ __476__: 20200346. https://doi.org/10.1098/rspa.2020.0346 - [`fxTWAPLS v0.0.2`](https://github.com/special-uor/fxTWAPLS/releases/tag/v0.0.2/) -```r -install.packages("remotes") -remotes::install_github("special-uor/fxTWAPLS@v0.0.2") -``` + ```r + install.packages("remotes") + remotes::install_github("special-uor/fxTWAPLS@v0.0.2") + ``` + @@ -79,38 +91,167 @@ The following functions can be executed in parallel: To do so, include the `cpus` parameter. For example: ```{r, eval = FALSE} -# without fx -cv_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2) +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 +) ``` Optionally, a progress bar can be displayed for long computations. Just "pipe" the function call to `fxTWAPLS::pb()`. ```{r, eval = FALSE} -# without fx `%>%` <- magrittr::`%>%` -cv_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2) %>% +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 +) %>% fxTWAPLS::pb() ``` Alternatively, if you are not familiar with the "pipe" operator, you can run the following code: ```{r, eval = FALSE} -# without fx -cv_Tmin <- fxTWAPLS::pb(fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2)) - +cv_pr_tf_Tmin2 <- fxTWAPLS::pb( + fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 + ) +) +``` + +## Example + +#### Training + +```{r, eval = FALSE} +# Load modern data +modern_pollen <- read.csv("/path/to/modern_pollen.csv") + +# Extract modern pollen taxa +taxaColMin <- which(colnames(modern_pollen) == "taxa0") +taxaColMax <- which(colnames(modern_pollen) == "taxaN") +taxa <- modern_pollen[, taxaColMin:taxaColMax] + +# Set the binwidth to get the sampling frequency of the climate (fx), +# the fit is almost insenitive to binwidth when choosing pspline method. +bin <- 0.02 + +# Use fxTWAPLSv2 to train +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "pspline", + bin = bin +) +``` + +#### Cross validation + +```{r, eval = FALSE} +# Set CPUS to run in parallel +CPUS <- 6 + +# Import pipe operator to use with the progress bar +`%>%` <- magrittr::`%>%` + +# Get the location information of each sample +point <- modern_pollen[, c("Long", "Lat")] + +# Get the distance between each point +dist <- fxTWAPLS::get_distance(point, cpus = CPUS) + +# Get the pseudo sites (which are both geographically close and climatically +# close to the test site) which should be removed in cross validation +pseudo_Tmin <- fxTWAPLS::get_pseudo( + dist, + modern_pollen$Tmin, + cpus = CPUS +) + +# Leave-out cross validation +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = bin, + cpus = CPUS, + test_mode = FALSE +) %>% + fxTWAPLS::pb() + +# Random t test to the cross validation result +rand_pr_tf_Tmin2 <- + fxTWAPLS::rand.t.test.w(cv_pr_tf_Tmin2, n.perm = 999) +``` + +#### Reconstruction + +```{r, eval = FALSE} +# Load fossil data +Holocene <- read.csv("/path/to/Holocene.csv") + +# Extract fossil pollen taxa +taxaColMin <- which(colnames(Holocene) == "taxa0") +taxaColMax <- which(colnames(Holocene) == "taxaN") +core <- Holocene[, taxaColMin:taxaColMax] + +# Choose nsig (the last significant number of components) based on the p-value +nsig <- 3 + +# Predict +fossil_tf_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin2, core) + +# Get the sample specific errors +sse_tf_Tmin2 <- fxTWAPLS::sse.sample( + modern_taxa = taxa, + modern_climate = modern_pollen$Tmin, + fossil_taxa = core, + trainfun = fxTWAPLS::TWAPLS.w2, + predictfun = fxTWAPLS::TWAPLS.predict.w, + nboot = nboot, + nPLS = 5, + nsig = nsig, + usefx = TRUE, + fx_method = "pspline", + bin = bin, + cpus = CPUS +) %>% + fxTWAPLS::pb() +# Output +recon_result <- + cbind.data.frame( + recon_Tmin = fossil_tf_Tmin2[["fit"]][, nsig], + sse_recon_Tmin = sse_tf_Tmin2 + ) ``` diff --git a/README.md b/README.md index 5434f0f..1f7a47d 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ -## fxTWAPLS: An Improved Version of WA-PLS +## fxTWAPLS: An Improved Version of WA-PLS logo - -[![](https://img.shields.io/badge/devel%20version-0.1.1-yellow.svg)](https://github.com/special-uor/fxTWAPLS) +[![](https://img.shields.io/badge/devel%20version-0.1.2-yellow.svg)](https://github.com/special-uor/fxTWAPLS) [![](https://www.r-pkg.org/badges/version/fxTWAPLS?color=black)](https://cran.r-project.org/package=fxTWAPLS) [![R build status](https://github.com/special-uor/fxTWAPLS/workflows/R-CMD-check/badge.svg)](https://github.com/special-uor/fxTWAPLS/actions) @@ -50,16 +49,29 @@ remotes::install_github("special-uor/fxTWAPLS", "dev") ## Publications +- ***Latest:*** Liu, M., Shen, Y., González-Sampériz, P., Gil-Romera, + G., ter Braak, C. J. F., Prentice, I. C., and Harrison, S. P.: + Holocene climates of the Iberian Peninsula: pollen-based + reconstructions of changes in the west-east gradient of temperature + and moisture, Clim. Past Discuss. \[preprint\], + , in review, 2021.- + [`fxTWAPLS v0.1.0`](https://github.com/special-uor/fxTWAPLS/releases/tag/v0.1.0/) + + ``` r + install.packages("remotes") + remotes::install_github("special-uor/fxTWAPLS@v0.1.0") + ``` + - Liu Mengmeng, Prentice Iain Colin, ter Braak Cajo J. F., Harrison Sandy P.. 2020 An improved statistical approach for reconstructing past climates from biotic assemblages. *Proc. R. Soc. A.* **476**: 20200346. - [`fxTWAPLS v0.0.2`](https://github.com/special-uor/fxTWAPLS/releases/tag/v0.0.2/) -``` r -install.packages("remotes") -remotes::install_github("special-uor/fxTWAPLS@v0.0.2") -``` + ``` r + install.packages("remotes") + remotes::install_github("special-uor/fxTWAPLS@v0.0.2") + ``` @@ -77,27 +89,37 @@ The following functions can be executed in parallel: To do so, include the `cpus` parameter. For example: ``` r -# without fx -cv_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2) +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 +) ``` Optionally, a progress bar can be displayed for long computations. Just “pipe” the function call to `fxTWAPLS::pb()`. ``` r -# without fx `%>%` <- magrittr::`%>%` -cv_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2) %>% +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 +) %>% fxTWAPLS::pb() ``` @@ -105,12 +127,131 @@ Alternatively, if you are not familiar with the “pipe” operator, you can run the following code: ``` r -# without fx -cv_Tmin <- fxTWAPLS::pb(fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::WAPLS.w, - fxTWAPLS::WAPLS.predict.w, - cpus = 2)) - +cv_pr_tf_Tmin2 <- fxTWAPLS::pb( + fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = 0.02, + cpus = 2 + ) +) +``` + +## Example + +#### Training + +``` r +# Load modern data +modern_pollen <- read.csv("/path/to/modern_pollen.csv") + +# Extract modern pollen taxa +taxaColMin <- which(colnames(modern_pollen) == "taxa0") +taxaColMax <- which(colnames(modern_pollen) == "taxaN") +taxa <- modern_pollen[, taxaColMin:taxaColMax] + +# Set the binwidth to get the sampling frequency of the climate (fx), +# the fit is almost insenitive to binwidth when choosing pspline method. +bin <- 0.02 + +# Use fxTWAPLSv2 to train +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "pspline", + bin = bin +) +``` + +#### Cross validation + +``` r +# Set CPUS to run in parallel +CPUS <- 6 + +# Import pipe operator to use with the progress bar +`%>%` <- magrittr::`%>%` + +# Get the location information of each sample +point <- modern_pollen[, c("Long", "Lat")] + +# Get the distance between each point +dist <- fxTWAPLS::get_distance(point, cpus = CPUS) + +# Get the pseudo sites (which are both geographically close and climatically +# close to the test site) which should be removed in cross validation +pseudo_Tmin <- fxTWAPLS::get_pseudo( + dist, + modern_pollen$Tmin, + cpus = CPUS +) + +# Leave-out cross validation +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "pspline", + bin = bin, + cpus = CPUS, + test_mode = FALSE +) %>% + fxTWAPLS::pb() + +# Random t test to the cross validation result +rand_pr_tf_Tmin2 <- + fxTWAPLS::rand.t.test.w(cv_pr_tf_Tmin2, n.perm = 999) +``` + +#### Reconstruction + +``` r +# Load fossil data +Holocene <- read.csv("/path/to/Holocene.csv") + +# Extract fossil pollen taxa +taxaColMin <- which(colnames(Holocene) == "taxa0") +taxaColMax <- which(colnames(Holocene) == "taxaN") +core <- Holocene[, taxaColMin:taxaColMax] + +# Choose nsig (the last significant number of components) based on the p-value +nsig <- 3 + +# Predict +fossil_tf_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin2, core) + +# Get the sample specific errors +sse_tf_Tmin2 <- fxTWAPLS::sse.sample( + modern_taxa = taxa, + modern_climate = modern_pollen$Tmin, + fossil_taxa = core, + trainfun = fxTWAPLS::TWAPLS.w2, + predictfun = fxTWAPLS::TWAPLS.predict.w, + nboot = nboot, + nPLS = 5, + nsig = nsig, + usefx = TRUE, + fx_method = "pspline", + bin = bin, + cpus = CPUS +) %>% + fxTWAPLS::pb() +# Output +recon_result <- + cbind.data.frame( + recon_Tmin = fossil_tf_Tmin2[["fit"]][, nsig], + sse_recon_Tmin = sse_tf_Tmin2 + ) ``` diff --git a/man/TWAPLS.predict.w.Rd b/man/TWAPLS.predict.w.Rd index 998dad6..10490fc 100644 --- a/man/TWAPLS.predict.w.Rd +++ b/man/TWAPLS.predict.w.Rd @@ -29,7 +29,7 @@ TWA-PLS predict function \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") @@ -41,19 +41,31 @@ taxaColMin <- which(colnames(Holocene) == "taxa0") taxaColMax <- which(colnames(Holocene) == "taxaN") core <- Holocene[, taxaColMin:taxaColMax] -# MTCO ## Train fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) - +fit_tf_Tmin <- fxTWAPLS::TWAPLS.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) +fit_t_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) + ## Predict fossil_t_Tmin <- fxTWAPLS::TWAPLS.predict.w(fit_t_Tmin, core) fossil_tf_Tmin <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin, core) +fossil_t_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_t_Tmin2, core) +fossil_tf_Tmin2 <- fxTWAPLS::TWAPLS.predict.w(fit_tf_Tmin2, core) } } diff --git a/man/TWAPLS.w.Rd b/man/TWAPLS.w.Rd index 8bf83ed..28bedfc 100644 --- a/man/TWAPLS.w.Rd +++ b/man/TWAPLS.w.Rd @@ -62,20 +62,22 @@ TWA-PLS training function, which can perform \code{fx} correction. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# MTCO +# Training fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) +fit_tf_Tmin <- fxTWAPLS::TWAPLS.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) } } diff --git a/man/TWAPLS.w2.Rd b/man/TWAPLS.w2.Rd index b2c49a1..322befe 100644 --- a/man/TWAPLS.w2.Rd +++ b/man/TWAPLS.w2.Rd @@ -62,23 +62,22 @@ TWA-PLS training function, which can perform \code{fx} correction. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# Get the frequency of each climate variable fx -fx_Tmin <- fxTWAPLS::fx(modern_pollen$Tmin, bin = 0.02) - -# MTCO +# Training fit_t_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) -fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) } } diff --git a/man/WAPLS.predict.w.Rd b/man/WAPLS.predict.w.Rd index a7bd72d..f338de5 100644 --- a/man/WAPLS.predict.w.Rd +++ b/man/WAPLS.predict.w.Rd @@ -29,7 +29,7 @@ WA-PLS predict function \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") @@ -41,18 +41,30 @@ taxaColMin <- which(colnames(Holocene) == "taxa0") taxaColMax <- which(colnames(Holocene) == "taxaN") core <- Holocene[, taxaColMin:taxaColMax] -# MTCO ## Train fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) +fit_f_Tmin <- fxTWAPLS::WAPLS.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) +fit_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) +fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) ## Predict fossil_Tmin <- fxTWAPLS::WAPLS.predict.w(fit_Tmin, core) fossil_f_Tmin <- fxTWAPLS::WAPLS.predict.w(fit_f_Tmin, core) +fossil_Tmin2 <- fxTWAPLS::WAPLS.predict.w(fit_Tmin2, core) +fossil_f_Tmin2 <- fxTWAPLS::WAPLS.predict.w(fit_f_Tmin2, core) } } diff --git a/man/WAPLS.w.Rd b/man/WAPLS.w.Rd index ed20f64..6fcb58d 100644 --- a/man/WAPLS.w.Rd +++ b/man/WAPLS.w.Rd @@ -62,20 +62,22 @@ WA-PLS training function, which can perform \code{fx} correction. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# MTCO +# Training fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) +fit_f_Tmin <- fxTWAPLS::WAPLS.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) } } diff --git a/man/WAPLS.w2.Rd b/man/WAPLS.w2.Rd index 7ee6f66..6b9a377 100644 --- a/man/WAPLS.w2.Rd +++ b/man/WAPLS.w2.Rd @@ -62,23 +62,22 @@ WA-PLS training function, which can perform \code{fx} correction. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# Get the frequency of each climate variable fx -fx_Tmin <- fxTWAPLS::fx(modern_pollen$Tmin, bin = 0.02) - -# MTCO +# Training fit_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, modern_pollen$Tmin, nPLS = 5) -fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) +fit_f_Tmin2 <- fxTWAPLS::WAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) } } diff --git a/man/cv.pr.w.Rd b/man/cv.pr.w.Rd index 59ef02a..4cb9f94 100644 --- a/man/cv.pr.w.Rd +++ b/man/cv.pr.w.Rd @@ -66,7 +66,7 @@ Pseudo-removed leave-out cross-validation \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") @@ -74,56 +74,48 @@ taxa <- modern_pollen[, taxaColMin:taxaColMax] point <- modern_pollen[, c("Long", "Lat")] test_mode <- TRUE # It should be set to FALSE before running -dist <- fxTWAPLS::get_distance(point, - cpus = 2, # Remove the following line - test_mode = test_mode) -pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, - modern_pollen$Tmin, - cpus = 2, # Remove the following line - test_mode = test_mode) - -cv_pr_t_Tmin <- fxTWAPLS::cv.pr.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - pseudo_Tmin, - cpus = 2, # Remove the following line - test_mode = test_mode) -cv_pr_tf_Tmin <- fxTWAPLS::cv.pr.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - pseudo_Tmin, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, # Remove the following line - test_mode = test_mode) +dist <- fxTWAPLS::get_distance( + point, + cpus = 2, # Remove the following line + test_mode = test_mode +) +pseudo_Tmin <- fxTWAPLS::get_pseudo( + dist, + modern_pollen$Tmin, + cpus = 2, # Remove the following line + test_mode = test_mode +) + +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, # Remove the following line + test_mode = test_mode +) # Run with progress bar `\%>\%` <- magrittr::`\%>\%` -cv_pr_t_Tmin <- fxTWAPLS::cv.pr.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - pseudo_Tmin, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() -cv_pr_tf_Tmin <- fxTWAPLS::cv.pr.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - pseudo_Tmin, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() - +cv_pr_tf_Tmin2 <- fxTWAPLS::cv.pr.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + pseudo_Tmin, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, # Remove the following line + test_mode = test_mode +) \%>\% + fxTWAPLS::pb() } } diff --git a/man/cv.w.Rd b/man/cv.w.Rd index be858b7..d78e54e 100644 --- a/man/cv.w.Rd +++ b/man/cv.w.Rd @@ -63,7 +63,7 @@ Leave-one-out cross-validation as \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") @@ -71,47 +71,33 @@ taxa <- modern_pollen[, taxaColMin:taxaColMax] ## LOOCV test_mode <- TRUE # It should be set to FALSE before running -### without fx -cv_t_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - cpus = 2, # Remove the following line - test_mode = test_mode) -### with fx -cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, # Remove the following line - test_mode = test_mode) +cv_tf_Tmin2 <- fxTWAPLS::cv.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, # Remove the following line + test_mode = test_mode +) # Run with progress bar `\%>\%` <- magrittr::`\%>\%` -### without fx -cv_t_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() -### with fx -cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() +cv_tf_Tmin2 <- fxTWAPLS::cv.w( + taxa, + modern_pollen$Tmin, + nPLS = 5, + fxTWAPLS::TWAPLS.w2, + fxTWAPLS::TWAPLS.predict.w, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, # Remove the following line + test_mode = test_mode +) \%>\% fxTWAPLS::pb() } } \seealso{ diff --git a/man/fx_pspline.Rd b/man/fx_pspline.Rd index 76f0908..5596d11 100644 --- a/man/fx_pspline.Rd +++ b/man/fx_pspline.Rd @@ -27,15 +27,21 @@ provide \code{fx} correction for WA-PLS and TWA-PLS. modern_pollen <- read.csv("/path/to/modern_pollen.csv") # Get the frequency of each climate variable fx -fx_pspline_Tmin <- fxTWAPLS::fx_pspline(modern_pollen$Tmin, - bin = 0.02, - show_plot = TRUE) -fx_pspline_gdd <- fxTWAPLS::fx_pspline(modern_pollen$gdd, - bin = 20, - show_plot = TRUE) -fx_pspline_alpha <- fxTWAPLS::fx_pspline(modern_pollen$alpha, - bin = 0.002, - show_plot = TRUE) +fx_pspline_Tmin <- fxTWAPLS::fx_pspline( + modern_pollen$Tmin, + bin = 0.02, + show_plot = TRUE +) +fx_pspline_gdd <- fxTWAPLS::fx_pspline( + modern_pollen$gdd, + bin = 20, + show_plot = TRUE +) +fx_pspline_alpha <- fxTWAPLS::fx_pspline( + modern_pollen$alpha, + bin = 0.002, + show_plot = TRUE +) } } diff --git a/man/get_distance.Rd b/man/get_distance.Rd index 1b623f4..977d49c 100644 --- a/man/get_distance.Rd +++ b/man/get_distance.Rd @@ -33,14 +33,19 @@ modern_pollen <- read.csv("/path/to/modern_pollen.csv") point <- modern_pollen[, c("Long", "Lat")] test_mode <- TRUE # It should be set to FALSE before running -dist <- fxTWAPLS::get_distance(point, - cpus = 2, # Remove the following line - test_mode = test_mode) +dist <- fxTWAPLS::get_distance( + point, + cpus = 2, # Remove the following line + test_mode = test_mode +) # Run with progress bar `\%>\%` <- magrittr::`\%>\%` -dist <- fxTWAPLS::get_distance(point, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() +dist <- fxTWAPLS::get_distance( + point, + cpus = 2, # Remove the following line + test_mode = test_mode +) \%>\% + fxTWAPLS::pb() } } diff --git a/man/get_pseudo.Rd b/man/get_pseudo.Rd index 4f860a9..c93ee3a 100644 --- a/man/get_pseudo.Rd +++ b/man/get_pseudo.Rd @@ -35,19 +35,26 @@ modern_pollen <- read.csv("/path/to/modern_pollen.csv") point <- modern_pollen[, c("Long", "Lat")] test_mode <- TRUE # It should be set to FALSE before running -dist <- fxTWAPLS::get_distance(point, - cpus = 2, # Remove the following line - test_mode = test_mode) -pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, - modern_pollen$Tmin, - cpus = 2, # Remove the following line - test_mode = test_mode) +dist <- fxTWAPLS::get_distance( + point, + cpus = 2, # Remove the following line + test_mode = test_mode +) +pseudo_Tmin <- fxTWAPLS::get_pseudo( + dist, + modern_pollen$Tmin, + cpus = 2, # Remove the following line + test_mode = test_mode +) # Run with progress bar `\%>\%` <- magrittr::`\%>\%` -pseudo_Tmin <- fxTWAPLS::get_pseudo(dist, - modern_pollen$Tmin, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() +pseudo_Tmin <- fxTWAPLS::get_pseudo( + dist, + modern_pollen$Tmin, + cpus = 2, # Remove the following line + test_mode = test_mode +) \%>\% + fxTWAPLS::pb() } } \seealso{ diff --git a/man/plot_residuals.Rd b/man/plot_residuals.Rd index 5032eb1..e490ccc 100644 --- a/man/plot_residuals.Rd +++ b/man/plot_residuals.Rd @@ -25,33 +25,23 @@ compression. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# MTCO -## WAPLS and fxWAPLS -fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) -## TWAPLS and fxTWAPLS -fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) -fxTWAPLS::plot_residuals(fit_Tmin, 3) -fxTWAPLS::plot_residuals(fit_f_Tmin, 3) -fxTWAPLS::plot_residuals(fit_t_Tmin, 3) -fxTWAPLS::plot_residuals(fit_tf_Tmin, 3) +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) + +nsig <- 3 # This should be got from the random t-test of the cross validation +fxTWAPLS::plot_residuals(fit_tf_Tmin2, nsig) } } diff --git a/man/plot_train.Rd b/man/plot_train.Rd index 03cc4c0..5a140c0 100644 --- a/man/plot_train.Rd +++ b/man/plot_train.Rd @@ -25,33 +25,23 @@ of overall compression. \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") taxa <- modern_pollen[, taxaColMin:taxaColMax] -# MTCO -## WAPLS and fxWAPLS -fit_Tmin <- fxTWAPLS::WAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_f_Tmin <- fxTWAPLS::WAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) -## TWAPLS and fxTWAPLS -fit_t_Tmin <- fxTWAPLS::TWAPLS.w(taxa, modern_pollen$Tmin, nPLS = 5) -fit_tf_Tmin <- fxTWAPLS::TWAPLS.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - usefx = TRUE, - fx_method = "bin", - bin = 0.02) -fxTWAPLS::plot_train(fit_Tmin, 3) -fxTWAPLS::plot_train(fit_f_Tmin, 3) -fxTWAPLS::plot_train(fit_t_Tmin, 3) -fxTWAPLS::plot_train(fit_tf_Tmin, 3) +fit_tf_Tmin2 <- fxTWAPLS::TWAPLS.w2( + taxa, + modern_pollen$Tmin, + nPLS = 5, + usefx = TRUE, + fx_method = "bin", + bin = 0.02 +) + +nsig <- 3 # This should be got from the random t-test of the cross validation +fxTWAPLS::plot_train(fit_tf_Tmin2, nsig) } } diff --git a/man/rand.t.test.w.Rd b/man/rand.t.test.w.Rd index a055dfc..158074e 100644 --- a/man/rand.t.test.w.Rd +++ b/man/rand.t.test.w.Rd @@ -49,39 +49,15 @@ Do a random t-test to the cross-validation results. } \examples{ \dontrun{ -# Load modern pollen data -modern_pollen <- read.csv("/path/to/modern_pollen.csv") - -# Extract taxa -taxaColMin <- which(colnames(modern_pollen) == "taxa0") -taxaColMax <- which(colnames(modern_pollen) == "taxaN") -taxa <- modern_pollen[, taxaColMin:taxaColMax] -## LOOCV -test_mode <- TRUE # It should be set to FALSE before running -### without fx -cv_t_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() -### with fx -cv_tf_Tmin <- fxTWAPLS::cv.w(taxa, - modern_pollen$Tmin, - nPLS = 5, - fxTWAPLS::TWAPLS.w, - fxTWAPLS::TWAPLS.predict.w, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, # Remove the following line - test_mode = test_mode) \%>\% fxTWAPLS::pb() - ## Random t-test -rand_t_Tmin <- fxTWAPLS::rand.t.test.w(cv_t_Tmin, n.perm = 999) -rand_tf_Tmin <- fxTWAPLS::rand.t.test.w(cv_tf_Tmin, n.perm = 999) +rand_pr_tf_Tmin2 <- fxTWAPLS::rand.t.test.w(cv_pr_tf_Tmin2, n.perm = 999) + +# note: choose the last significant number of components based on the p-value, +# see details at Liu Mengmeng, Prentice Iain Colin, ter Braak Cajo J. F., +# Harrison Sandy P.. 2020 An improved statistical approach for reconstructing +# past climates from biotic assemblages. Proc. R. Soc. A. 476: 20200346. +# } } diff --git a/man/sse.sample.Rd b/man/sse.sample.Rd index 0b7c28e..05e82b2 100644 --- a/man/sse.sample.Rd +++ b/man/sse.sample.Rd @@ -77,7 +77,7 @@ Calculate Sample Specific Errors \dontrun{ # Load modern pollen data modern_pollen <- read.csv("/path/to/modern_pollen.csv") - + # Extract taxa taxaColMin <- which(colnames(modern_pollen) == "taxa0") taxaColMax <- which(colnames(modern_pollen) == "taxaN") @@ -91,67 +91,41 @@ core <- Holocene[, taxaColMin:taxaColMax] ## SSE nboot <- 5 # Recommended 1000 -### without fx -sse_t_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, - modern_climate = modern_pollen$Tmin, - fossil_taxa = core, - trainfun = fxTWAPLS::TWAPLS.w, - predictfun = - fxTWAPLS::TWAPLS.predict.w, - nboot = nboot, - nPLS = 5, - nsig = 3, - usefx = FALSE, - cpus = 2, - seed = 1) -### with fx -sse_tf_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, - modern_climate = - modern_pollen$Tmin, - fossil_taxa = core, - trainfun = fxTWAPLS::TWAPLS.w, - predictfun = - fxTWAPLS::TWAPLS.predict.w, - nboot = nboot, - nPLS = 5, - nsig = 3, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, - seed = 1) - +nsig <- 3 # This should be got from the random t-test of the cross validation + +sse_tf_Tmin2 <- fxTWAPLS::sse.sample( + modern_taxa = taxa, + modern_climate = modern_pollen$Tmin, + fossil_taxa = core, + trainfun = fxTWAPLS::TWAPLS.w2, + predictfun = fxTWAPLS::TWAPLS.predict.w, + nboot = nboot, + nPLS = 5, + nsig = nsig, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, + seed = 1 +) + # Run with progress bar `\%>\%` <- magrittr::`\%>\%` -### without fx -sse_t_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, - modern_climate = modern_pollen$Tmin, - fossil_taxa = core, - trainfun = fxTWAPLS::TWAPLS.w, - predictfun = - fxTWAPLS::TWAPLS.predict.w, - nboot = nboot, - nPLS = 5, - nsig = 3, - usefx = FALSE, - cpus = 2, - seed = 1) \%>\% fxTWAPLS::pb() -### with fx -sse_tf_Tmin_WAPLS <- fxTWAPLS::sse.sample(modern_taxa = taxa, - modern_climate = - modern_pollen$Tmin, - fossil_taxa = core, - trainfun = fxTWAPLS::TWAPLS.w, - predictfun = - fxTWAPLS::TWAPLS.predict.w, - nboot = nboot, - nPLS = 5, - nsig = 3, - usefx = TRUE, - fx_method = "bin", - bin = 0.02, - cpus = 2, - seed = 1) \%>\% fxTWAPLS::pb() +sse_tf_Tmin2 <- fxTWAPLS::sse.sample( + modern_taxa = taxa, + modern_climate = modern_pollen$Tmin, + fossil_taxa = core, + trainfun = fxTWAPLS::TWAPLS.w2, + predictfun = fxTWAPLS::TWAPLS.predict.w, + nboot = nboot, + nPLS = 5, + nsig = nsig, + usefx = TRUE, + fx_method = "bin", + bin = 0.02, + cpus = 2, + seed = 1 +) \%>\% fxTWAPLS::pb() } }