From 50ffc1849a368f4b52d649510861e9073eabc242 Mon Sep 17 00:00:00 2001 From: wverastegui Date: Thu, 29 Aug 2024 15:18:20 +0200 Subject: [PATCH] docuemented functions --- R/adaptive.bin.R | 30 ++++++++++++++++++++++---- R/plot.R | 52 +++++++++++++++++++++++++++++++++++++++++++++ R/two.step.hybrid.R | 6 ++++++ R/utils.R | 14 +++++++++++- 4 files changed, 97 insertions(+), 5 deletions(-) diff --git a/R/adaptive.bin.R b/R/adaptive.bin.R index ade6743..463ed33 100644 --- a/R/adaptive.bin.R +++ b/R/adaptive.bin.R @@ -1,7 +1,14 @@ -#' @import tibble dplyr -NULL -#> NULL - +#' Compute densities for given masses. +#' @description +#' Compute the density estimation for a set of masses using kernel density estimation. +#' It allows for intensity-weighted densities and custom bandwidth functions. +#' @param masses A numeric vector of mass values. +#' @param mz_tol The mass-to-charge ratio tolerance. +#' @param intensity_weighted A logical value indicating whether to weight the densities by intensities. +#' @param intensities A numeric vector of intensity values corresponding to the masses. +#' @param bw_func A function to compute the bandwidth based on the masses. +#' @param n The number of equally spaced points at which the density is to be estimated. Default is 512. +#' @return A density object representing the estimated density of the masses. #' @export compute_densities <- function(masses, mz_tol, intensity_weighted, intensities, bw_func, n = 512) { bandwidth <- 0.5 * mz_tol * bw_func(masses) @@ -14,6 +21,15 @@ compute_densities <- function(masses, mz_tol, intensity_weighted, intensities, b return(all.mass.den) } +#' Compute mass values based on kernel density estimation. +#' @description +#' This function computes the mass values by performing kernel density estimation on the given masses. +#' It identifies the valleys in the density plot to determine the mass values. +#' @param mz_tol The mass-to-charge ratio tolerance. +#' @param masses A numeric vector of mass values. +#' @param intensity_binned A numeric vector of binned intensity values corresponding to the masses. +#' @param intensity_weighted A logical value indicating whether to weight the densities by intensities. +#' @return A numeric vector of mass values corresponding to the valleys in the density plot. #' @export compute_mass_values <- function(mz_tol, masses, intensity_binned, intensity_weighted) { n <- 2^min(15, floor(log2(length(masses))) - 2) @@ -33,6 +49,12 @@ compute_breaks <- function(mz_tol, masses, intensity_binned, intensity_weighted) } +#' Increment pointers in a list. +#' @description +#' This function increments the values of `prof.pointer`, `height.pointer`, and `curr.label` in the provided list of pointers. +#' @param pointers A list containing the pointers to be incremented. +#' @param that.n An integer value to increment the `prof.pointer`. +#' @return The updated list of pointers. #' @export increment_counter <- function(pointers, that.n){ pointers$prof.pointer <- pointers$prof.pointer + that.n diff --git a/R/plot.R b/R/plot.R index 246d0a5..b71bfd6 100644 --- a/R/plot.R +++ b/R/plot.R @@ -14,6 +14,17 @@ tolerance_plot <- function(x, y, exp_y, selected, main) { abline(v = x[selected], col = "blue") } +#' Draw retention time normal peaks. +#' @description +#' This function draws the normal peaks for retention time data based on the provided parameters. +#' @param x A numeric vector of retention time values. +#' @param truth A numeric vector containing the parameters for the normal peaks: +#' \itemize{ +#' \item mean of the peak +#' \item standard deviation for the left side of the peak +#' \item standard deviation for the right side of the peak +#' \item scaling factor for the peak height +#' } #' @export draw_rt_normal_peaks <- function(x, truth) { true.y1 <- dnorm(x[x < truth[1]], mean = truth[1], sd = truth[2]) * truth[2] * truth[4] @@ -21,6 +32,15 @@ draw_rt_normal_peaks <- function(x, truth) { lines(x, c(true.y1, true.y2), col = "green") } +#' Plot raw profile histogram. +#' @description +#' This function plots various histograms and density plots for a given raw profile, including noise groups, selected groups, retention time range distribution, and signal presence distribution. +#' @param raw.prof A list containing raw profile data, including height records and minimum count run. +#' @param min_pres A numeric value indicating the minimum presence threshold. +#' @param baseline.correct A numeric value for baseline correction. If NA, it will be computed automatically. +#' @param baseline.correct.noise.percentile A numeric value indicating the percentile of noise group heights for baseline correction. +#' @param mz_tol A numeric value for the mass-to-charge ratio tolerance. +#' @param new.prof A list containing new profile data, including height records, time range records, and m/z presence records. #' @export plot_raw_profile_histogram <- function(raw.prof, min_pres, @@ -80,6 +100,11 @@ plot_raw_profile_histogram <- function(raw.prof, ) } +#' Plot peak summary. +#' @description +#' This function plots a summary of peak characteristics, including m/z standard deviation, retention time standard deviation, and peak strength. +#' @param feature_groups A list of data frames, where each data frame represents a group of features with m/z values. +#' @param processed_features A data frame containing processed feature information with columns "sd1", "sd2", and "area". #' @export plot_peak_summary <- function(feature_groups, processed_features) { mz_sd <- compute_mz_sd(feature_groups) @@ -92,6 +117,13 @@ plot_peak_summary <- function(feature_groups, processed_features) { hist(log10(processed_features[, "area"]), xlab = "peak strength (log scale)", ylab = "Frequency", main = "Peak strength distribution") } +#' Plot retention time profile. +#' @description +#' This function plots the retention time profile, including the base curve, intensity, and fitted components. +#' @param rt_profile A data frame containing the retention time profile with columns "base_curve" and "intensity". +#' @param bw The bandwidth used for the kernel density estimation. +#' @param fit A matrix containing the fitted components for the retention time profile. +#' @param m A numeric vector of positions where vertical lines should be drawn. #' @export plot_rt_profile <- function(rt_profile, bw, fit, m) { plot(rt_profile[, "base_curve"], rt_profile[, "intensity"], cex = .1, main = paste("bw=", bw)) @@ -105,6 +137,18 @@ plot_rt_profile <- function(rt_profile, bw, fit, m) { } } +#' Plot normalized mixture model with BIC. +#' @description +#' This function plots the data points and the fitted Gaussian mixture model components with different bandwidths. +#' @param x A numeric vector of data points on the x-axis. +#' @param y A numeric vector of data points on the y-axis. +#' @param bw The bandwidth used for the kernel density estimation. +#' @param aaa A matrix containing the parameters of the Gaussian mixture model components: +#' \itemize{ +#' \item mean of the Gaussian component +#' \item standard deviation of the Gaussian component +#' \item scaling factor for the Gaussian component +#' } #' @export plot_normix_bic <- function(x, y, bw, aaa) { plot(x, y, cex = .1, main = paste("bw=", bw)) @@ -116,6 +160,14 @@ plot_normix_bic <- function(x, y, bw, aaa) { } } +#' Draw retention time correction plot. +#' @description +#' This function draws a plot showing the retention time correction for extracted features. +#' It plots the deviation of corrected retention times from the original retention times. +#' @param colors A character vector of colors to use for plotting each sample. If NA, default colors are used. +#' @param extracted_features A list of data frames, where each data frame contains the original retention times and m/z values of features. +#' @param corrected_features A list of data frames, where each data frame contains the corrected retention times of features. +#' @param rt_tol_relative A numeric value representing the relative retention time tolerance. #' @export draw_rt_correction_plot <- function(colors, extracted_features, diff --git a/R/two.step.hybrid.R b/R/two.step.hybrid.R index 559c2c6..00003c2 100644 --- a/R/two.step.hybrid.R +++ b/R/two.step.hybrid.R @@ -102,6 +102,12 @@ wide_to_long_feature_table <- function(wide_table, sample_names) { return(long_features) } +#' Extract column names matching a pattern. +#' @description +#' This function extracts the column names from a dataframe that contain a specified pattern. +#' @param dataframe A dataframe from which to extract column names. +#' @param pattern A character string containing the pattern to match in the column names. +#' @return A character vector of column names that match the specified pattern. extract_pattern_colnames <- function(dataframe, pattern) { dataframe <- dplyr::select(dataframe, contains(pattern)) return(colnames(dataframe)) diff --git a/R/utils.R b/R/utils.R index db06a3b..d8101f1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -125,13 +125,21 @@ load_aligned_features <- function(metadata_file, intensities_file, rt_file, tol_ return(result) } +#' Calculate the span of a numeric vector. +#' @description +#' This function calculates the span (range) of a numeric vector, ignoring NA values. +#' @param x A numeric vector. +#' @return A numeric value representing the span of the vector. #' @export span <- function(x) { diff(range(x, na.rm = TRUE)) } +#' Compute standard deviation of m/z values for feature groups. #' @description -#' Compute standard deviation of m/z values groupwise +#' This function computes the standard deviation of m/z values for each group of features. +#' @param feature_groups A list of data frames, where each data frame represents a group of features with m/z values. +#' @return A numeric vector of standard deviations of m/z values for each feature group. #' @export compute_mz_sd <- function(feature_groups) { mz_sd <- c() @@ -146,6 +154,10 @@ compute_mz_sd <- function(feature_groups) { return(mz_sd) } +#' Get the number of available worker cores. +#' @description +#' This function determines the number of available worker cores, taking into account CRAN's limit on the number of cores. +#' @return An integer representing the number of available worker cores. #' @export get_num_workers <- function() { # CRAN limits the number of cores available to packages to 2