Skip to content

Commit

Permalink
Merge pull request #196 from RECETOX/small_refactorings
Browse files Browse the repository at this point in the history
Small refactorings on combine.seq.3 and general adaptive.bin
  • Loading branch information
hechth authored Apr 19, 2023
2 parents 463e1cd + a555966 commit 387c165
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 143 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [dev] - unreleased
### Added
### Changed
- refactored adaptive.bin and combine.seq.3 [#196](https://github.com/RECETOX/recetox-aplcms/pull/196)
- refactored find.match [#193](https://github.com/RECETOX/recetox-aplcms/pull/193)
- Simplified Evaluation Conditions For Test Cases [#197](https://github.com/RECETOX/recetox-aplcms/pull/197)
- Renamed `proc.cdf` to `remove_noise` [#190](https://github.com/RECETOX/recetox-aplcms/pull/190)
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ S3method(solve,a)
S3method(solve,sigma)
export(adaptive.bin)
export(adjust.time)
export(aggregate_by_rt)
export(bigauss.esti)
export(bigauss.esti.EM)
export(bigauss.mix)
export(comb)
export(combine.seq.3)
export(compute_boundaries)
export(compute_bounds)
export(compute_breaks)
Expand Down
43 changes: 22 additions & 21 deletions R/adaptive.bin.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ adaptive.bin <- function(features,

cat(c("m/z tolerance is: ", mz_tol, "\n"))

times <- sort(unique(features$rt))
scan_times <- sort(unique(features$rt))

min_time <- min(times)
max_time <- max(times)
time_range <- (max_time - min_time)
num_scans <- length(scan_times)
time_range <- span(scan_times)

# calculate function parameters
min.count.run <- min_run * length(times) / time_range
aver.time.range <- (time_range) / length(times)
scans_per_second <- num_scans / time_range
min_scans <- min_run * scans_per_second
aver_cycle_time <- (time_range) / num_scans

# init data
newprof <- matrix(0, nrow = length(features$mz), ncol = 4)
Expand All @@ -106,7 +106,7 @@ adaptive.bin <- function(features,

this_table <- dplyr::slice(features, (start:end))

if (length(unique(this_table$rt)) >= min.count.run * min_pres) {
if (length(unique(this_table$rt)) >= min_scans * min_pres) {
# reorder in order of rt (scan number)
this_table <- this_table |> dplyr::arrange_at("rt")
mass.den <- compute_densities(this_table$mz, mz_tol, intensity_weighted, this_table$intensities, median)
Expand All @@ -127,39 +127,40 @@ adaptive.bin <- function(features,
}

# get rows which fulfill condition
that <- this_table |> dplyr::filter(mz > boundaries$lower & mz <= boundaries$upper)
that <- this_table |> dplyr::filter(dplyr::between(mz, boundaries$lower, boundaries$upper))

if (nrow(that) > 0) {
that <- combine.seq.3(that) |> dplyr::arrange_at("mz")
that <- aggregate_by_rt(that) |> dplyr::arrange_at("mz")

that.range <- span(that$rt)

if (that.range > 0.5 * time_range & length(that$rt) > that.range * min_pres & length(that$rt) / (that.range / aver.time.range) > min_pres) {
if (that.range > 0.5 * time_range & length(that$rt) > that.range * min_pres & length(that$rt) / (that.range / aver_cycle_time) > min_pres) {
that$intensities <- rm.ridge(that$rt, that$intensities, bw = max(10 *min_run, that.range / 2))

that <- that |> dplyr::filter(intensities != 0)
}

that.n <- length(that$mz)
num_pts_in_group <- length(that$mz)

newprof[pointers$prof.pointer:(pointers$prof.pointer + that.n - 1), ] <- cbind(that$mz, that$rt, that$intensities, rep(pointers$curr.label, that.n))
height.rec[pointers$height.pointer, ] <- c(pointers$curr.label, that.n, max(that$intensities))
newprof[pointers$prof.pointer:(pointers$prof.pointer + num_pts_in_group - 1), ] <- cbind(that$mz, that$rt, that$intensities, rep(pointers$curr.label, num_pts_in_group))
height.rec[pointers$height.pointer, ] <- c(pointers$curr.label, num_pts_in_group, max(that$intensities))

# increment counters
pointers <- increment_counter(pointers, that.n)
pointers <- increment_counter(pointers, num_pts_in_group)
}
}
} else {
} else { # not enough points in profile
if (runif(1) < 0.05) {
this_table <- this_table |> dplyr::arrange_at("rt")

that.merged <- combine.seq.3(this_table)
that.n <- nrow(that.merged)
that.merged <- aggregate_by_rt(this_table)
num_pts_in_group <- nrow(that.merged)

newprof[pointers$prof.pointer:(pointers$prof.pointer + that.n - 1), ] <- cbind(that.merged$mz, that.merged$rt, that.merged$intensities, rep(pointers$curr.label, that.n))
height.rec[pointers$height.pointer, ] <- c(pointers$curr.label, that.n, max(that.merged$intensities))
newprof[pointers$prof.pointer:(pointers$prof.pointer + num_pts_in_group - 1), ] <- cbind(that.merged$mz, that.merged$rt, that.merged$intensities, rep(pointers$curr.label, num_pts_in_group))
height.rec[pointers$height.pointer, ] <- c(pointers$curr.label, num_pts_in_group, max(that.merged$intensities))

# increment counters
pointers <- increment_counter(pointers, that.n)
pointers <- increment_counter(pointers, num_pts_in_group)
}
}
}
Expand All @@ -174,7 +175,7 @@ adaptive.bin <- function(features,
raw.prof <- new("list")
raw.prof$height.rec <- height.rec
raw.prof$features <- newprof_tibble
raw.prof$min.count.run <- min.count.run
raw.prof$min.count.run <- min_scans

return(raw.prof)
}
17 changes: 17 additions & 0 deletions R/aggregate_by_rt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#' Aggregate the intensities and select median mz for features with identical rt.
#'
#' @description This functions computes median mz and sum of intensities over features with same rt.
#' @param features dataframe of retention time, m/z ratio, signal strength.
#' @return returns a tibble with the following columns
#' \itemize{
#' \item mz - m/z ratio
#' \item rt - retention time
#' \item intensities - signal strength
#' }
#' @export
aggregate_by_rt <- function(features) {
features |>
dplyr::group_by(rt) |>
dplyr::summarize(mass = median(mz[which.max(intensities)]), area = sum(intensities)) |>
dplyr::rename(mz = mass, intensities = area)
}
34 changes: 0 additions & 34 deletions R/combine.seq.3.R

This file was deleted.

27 changes: 26 additions & 1 deletion R/compute_clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,29 @@ compute_clusters <- function(feature_tables,
feature_tables <- sort_data(sample_names, feature_tables)

return(list(feature_tables = feature_tables, rt_tol_relative = rt_tol_relative, mz_tol_relative = mz_tol_relative))
}
}



# compute_clusters_v2 <- function(feature_tables, mz_tol_ppm, rt_tol) {

# mz_tol <- mz_tol_ppm * 1e-06

# match_fun <- list(
# ~ abs(.x - .y) < mz_tol * .x,
# ~ abs(.x - .y) < rt_tol
# )

# join_and_update <- function(df_a, df_b) {
# merged <- fuzzyjoin::fuzzy_ful.l_join(df_a, df_b, by=c("mz", "rt"), match_fun = match_fun) |>
# dplyr::rename(mz = mz.x, rt = rt.x)
# }

# aligned <- purrr::reduce(feature_tables, join_and_update)
# }

# multi_match_fun <- function(x, y) {
# return(abs(x[,1] - y[,1]) < 10e-05 & abs(x[,2] - y[,2]) < 2)
# }

# f4 <- fuzzyjoin::fuzzy_full_join(f1, f2, by=NULL, multi_by=c("mz", "rt"), match_fun = NULL, multi_match_fun = multi_match_fun)
2 changes: 1 addition & 1 deletion R/recover.weaker.R
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ compute_rectangle <- function(data_table,

# get values in RT region of interest?
if (nrow(that) > recover_min_count) {
that.prof <- combine.seq.3(that)
that.prof <- aggregate_by_rt(that)
that.mass <- sum(that.prof$mz * that.prof$intensities) / sum(that.prof$intensities)
curr.rec <- c(that.mass, NA, NA)

Expand Down
17 changes: 10 additions & 7 deletions R/remove_noise.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ load_file <- function(filename) {
}

#' Load data either from cache or load raw file and detect peaks.
#' @description THIS FUNCTION IS DEPRECATED!
#' @export
load_data <- function(filename,
cache,
Expand Down Expand Up @@ -70,13 +71,14 @@ remove_noise <- function(filename,
intensity_weighted,
do.plot,
cache) {
raw.prof <- load_data(
filename,
cache,
min_run,
min_pres,
mz_tol,
intensity_weighted
raw.data <- load_file(filename)

raw.prof <- adaptive.bin(
raw.data,
min_run = min_run,
min_pres = min_pres,
mz_tol = mz_tol,
intensity_weighted = intensity_weighted
)

newprof <- cbind(
Expand All @@ -85,6 +87,7 @@ remove_noise <- function(filename,
raw.prof$features$intensities,
raw.prof$features$grps
)

run.sel <- raw.prof$height.rec[which(raw.prof$height.rec[, 2] >= raw.prof$min.count.run * min_pres & raw.prof$height.rec[, 3] > baseline_correct), 1]

newprof <- newprof[newprof[, 4] %in% run.sel, ]
Expand Down
Loading

0 comments on commit 387c165

Please sign in to comment.