Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature.alignment function refactored #221

Merged
merged 24 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
db792a8
test and input/output files for the select_mz function
KristinaGomoryova Jul 19, 2024
a4e9579
create_empty_tibble added to @export
KristinaGomoryova Jul 19, 2024
a4cc11a
documentation to functions added
KristinaGomoryova Jul 19, 2024
127e714
roxygen updated documentation
KristinaGomoryova Jul 19, 2024
b2ca09d
weird change
KristinaGomoryova Jul 22, 2024
3bede62
state pre refactoring
KristinaGomoryova Jul 22, 2024
2036537
updated select_mz to return tibble rows
KristinaGomoryova Jul 22, 2024
563389f
test updated for refactored code
KristinaGomoryova Jul 23, 2024
b5f78ff
tests updated for refactored code
KristinaGomoryova Jul 23, 2024
40457b0
function documentation updated
KristinaGomoryova Jul 23, 2024
ac6e3d3
functions updated
KristinaGomoryova Jul 23, 2024
fbff31a
plyr added as dependency
KristinaGomoryova Jul 23, 2024
b820975
documentation updated
KristinaGomoryova Jul 23, 2024
39189e3
documentation updated
KristinaGomoryova Jul 23, 2024
b4ee7c2
plyr added
KristinaGomoryova Jul 23, 2024
e9b0510
export added on functions
KristinaGomoryova Jul 23, 2024
31ca6f0
styler linted
KristinaGomoryova Jul 23, 2024
95e0bde
test files renamed
KristinaGomoryova Jul 23, 2024
ee1d2dc
import foreach deleted
KristinaGomoryova Jul 23, 2024
47000dd
Merge branch 'master' into alignment
hechth Jul 23, 2024
5b0d831
Merge branch 'master' into alignment
hechth Jul 26, 2024
9fa9225
fixed tests
hechth Jul 26, 2024
1dfb73f
Started adding documentation
hechth Jul 29, 2024
ef2f003
Finalized documentation for adjust time
hechth Jul 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: This is a customized fork of the original work from Tianwei Yu.
It takes the adaptive processing of LC/MS metabolomics data further
with focus on high resolution MS for both LC and GC applications.
Depends: R (>= 3.50), MASS, mzR, splines, doParallel, foreach,
snow, dplyr, tidyr, stringr, tibble, tools, arrow
snow, dplyr, tidyr, stringr, tibble, tools, arrow, plyr
biocViews: Technology, MassSpectrometry
License: GPL-2
LazyLoad: yes
Expand Down
14 changes: 11 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(adaptive.bin)
export(add_feature_ids)
export(adjust.time)
export(aggregate_by_rt)
export(as_feature_sample_table)
export(bigauss.esti)
export(bigauss.esti.EM)
export(bigauss.mix)
export(check_files)
export(clean_data_matrix)
export(comb)
export(compute_boundaries)
export(compute_bounds)
Expand All @@ -16,6 +20,7 @@ export(compute_clusters)
export(compute_clusters_simple)
export(compute_comb)
export(compute_corrected_features)
export(compute_corrected_features_v2)
export(compute_curr_rec_with_enough_peaks)
export(compute_delta_rt)
export(compute_densities)
Expand All @@ -40,10 +45,14 @@ export(compute_template)
export(compute_template_adjusted_rt)
export(compute_uniq_grp)
export(correct_time)
export(correct_time_v2)
export(count_peaks)
export(create_aligned_feature_table)
export(create_features_from_cluster)
export(create_intensity_row)
export(create_metadata)
export(create_output)
export(create_rows)
export(create_rt_row)
export(draw_rt_correction_plot)
export(draw_rt_normal_peaks)
export(duplicate.row.remove)
Expand All @@ -59,6 +68,7 @@ export(get_features_in_rt_range)
export(get_mzrange_bound_indices)
export(get_num_workers)
export(get_rt_region_indices)
export(get_sample_name)
export(get_single_occurrence_mask)
export(get_times_to_use)
export(hybrid)
Expand Down Expand Up @@ -90,8 +100,6 @@ export(remove_noise)
export(rev_cum_sum)
export(rm.ridge)
export(run_filter)
export(select_mz)
export(select_rt)
export(semi.sup)
export(solve_a)
export(solve_sigma)
Expand Down
59 changes: 53 additions & 6 deletions R/adjust.time.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
NULL
#> NULL

#' Combine template and sample features
#' @param template_features Tibble Template feature table (mz, rt, cluster, sample_id).
#' @param features Tibble Sample feature table (mz, rt, cluster, sample_id).
#' @return Tibble Combined feature table (rbind).
#' @export
compute_comb <- function(template_features, features) {
combined <- dplyr::bind_rows(
Expand All @@ -11,6 +15,12 @@ compute_comb <- function(template_features, features) {
return(combined)
}

#' Select features to use for retention time alignment
#' @description This function selects features present in both the sample
#' feature table and template feature table given they have the same cluster,
#' are adjacent in the combined table.
#' @param combined Tibble Table with (mz, rt, cluster, sample_id).
#' @return List of bool Returns list of bools with TRUE at each index where this condition is met.
#' @export
compute_sel <- function(combined) {
l <- nrow(combined)
Expand All @@ -19,6 +29,11 @@ compute_sel <- function(combined) {
return(sel)
}

#' Create two column table with paired sample and template retention times.
#' @param combined Tibble Table with features from sample and template.
#' @param sel list of bools List of bools indiciating which features to pair.
#' See 'compute_sel'.
#' @param j string Template sample_id.
#' @export
compute_template_adjusted_rt <- function(combined, sel, j) {
all_features <- cbind(combined$rt[sel], combined$rt[sel + 1])
Expand All @@ -34,6 +49,13 @@ compute_template_adjusted_rt <- function(combined, sel, j) {
return(all_features)
}

#' Correct the rt in feature table based on paired feature rts and differences.
#' @description This is a newer implementation based on dplyr which might be more efficient than the other function.
#' @param features Tibble The feature table for which to correct rts.
#' @param template_rt List of floats Template retention times for the paired features.
#' @param delta_rt List of floats Differences between the paired rts.
#' @return Tibble A table with corrected retention times.
#' @export
compute_corrected_features_v2 <- function(features, template_rt, delta_rt) {
features <- features |> dplyr::arrange_at(c("rt", "mz"))
idx <- dplyr::between(features$rt, min(template_rt), max(template_rt))
Expand All @@ -59,29 +81,34 @@ compute_corrected_features_v2 <- function(features, template_rt, delta_rt) {
return(features |> dplyr::arrange_at(c("mz", "rt")))
}

#' Correct the rt in feature table based on paired feature rts and differences.
#' @param features Tibble The feature table for which to correct rts.
#' @param template_rt List of floats Template retention times for the paired features.
#' @param delta_rt List of floats Differences between the paired rts.
#' @return Tibble A table with corrected retention times.
#' @export
compute_corrected_features <- function(features, delta_rt, avg_time) {
compute_corrected_features <- function(features, template_rt, delta_rt) {
features <- features |> dplyr::arrange_at(c("rt", "mz"))

corrected <- features$rt
original <- features$rt

idx <- dplyr::between(original, min(delta_rt), max(delta_rt))
idx <- dplyr::between(original, min(template_rt), max(template_rt))
to_correct <- original[idx]
this.smooth <- ksmooth(
template_rt,
delta_rt,
avg_time,
kernel = "normal",
bandwidth = (max(delta_rt) - min(delta_rt)) / 5,
bandwidth = (max(template_rt) - min(template_rt)) / 5,
x.points = to_correct
)

corrected[idx] <- this.smooth$y + to_correct
lower_bound_adjustment <- mean(this.smooth$y[this.smooth$x == min(this.smooth$x)])
upper_bound_adjustment <- mean(this.smooth$y[this.smooth$x == max(this.smooth$x)])

idx_lower <- original < min(delta_rt)
idx_upper <- original > max(delta_rt)
idx_lower <- original < min(template_rt)
idx_upper <- original > max(template_rt)

corrected[idx_lower] <- corrected[idx_lower] + lower_bound_adjustment
corrected[idx_upper] <- corrected[idx_upper] + upper_bound_adjustment
Expand All @@ -91,6 +118,10 @@ compute_corrected_features <- function(features, delta_rt, avg_time) {
return(features)
}

#' Fill missing values based on original retention times.
#' @param orig.features Non-corrected feature table.
#' @param this.features Feature table with eventual missing values.
#' @return Tibble Feature table with filles values.
#' @export
fill_missing_values <- function(orig.feature, this.feature) {
missing_values <- which(is.na(this.feature$rt))
Expand All @@ -104,6 +135,10 @@ fill_missing_values <- function(orig.feature, this.feature) {
return(this.feature)
}

#' Function to perform retention time correction
#' @param this.feature Tibble Feature table for which to correct rt.
#' @param template_features Tibble Template feature table to use for correction.
#' @return Tibble this.feature table with corrected rt values.
#' @export
correct_time <- function(this.feature, template_features) {
orig.features <- this.feature
Expand Down Expand Up @@ -137,6 +172,10 @@ correct_time <- function(this.feature, template_features) {
return(tibble::as_tibble(this.feature, column_name = c("mz", "rt", "sd1", "sd2", "area", "sample_id", "cluster")))
}

#' Select the template feature table.
#' @description The current implementation selects the table with the most features as the template.
#' @param extracted_features List of tables Tables from which to select the template.
#' @return Tibble Template feature table.
#' @export
compute_template <- function(extracted_features) {
num.ftrs <- sapply(extracted_features, nrow)
Expand All @@ -149,6 +188,14 @@ compute_template <- function(extracted_features) {
return(tibble::as_tibble(template_features))
}

#' Rewritten version of 'correct_time'
#' @description This function uses dplyr to do the same as
#' 'correct_time', just with less code. Most functions used in the original
#' function are replaced with simple data transformations.
#' @param features Tibble Table with features to correct.
#' @param template Tibble Template feature table to use for correction.
#' @return Tibble Corrected feature table.
#' @export
correct_time_v2 <- function(features, template) {
if (unique(features$sample_id) == unique(template$sample_id))
return(tibble::as_tibble(features))
Expand Down
Loading