From 9b0bbbfe7610d96ca1fa7bdb6e67c253dc394754 Mon Sep 17 00:00:00 2001 From: EnesSefaAyar Date: Fri, 23 Feb 2024 12:26:08 +0100 Subject: [PATCH] peptide data updated with recently shared file by author. --- R/data.R | 25 +++++++---------- inst/scripts/make-data_khan2023.R | 45 +++++++++++++------------------ 2 files changed, 29 insertions(+), 41 deletions(-) diff --git a/R/data.R b/R/data.R index 4a0e003..6d80d81 100644 --- a/R/data.R +++ b/R/data.R @@ -2431,14 +2431,14 @@ ##' single-cell runs. Both table are then combined in a single ##' [QFeatures] object using the [scp::readSCP] function. ##' -##' The peptide data were generated from the SCoPE2 R script, -##' `EMTTGFB_singleCellProcessing.R`). The data were formated -##' to a [SingleCellExperiment] object and the sample metadata -##' were matched to the column names (mapping is retrieved -##' after running the SCoPE2 R script) and stored in the `colData`. -##' The object is then added to the [QFeatures] object and the rows -##' of the peptide data are linked to the rows of the PSM data based -##' on the peptide sequence information through an `AssayLink` object. +##' The peptide data were taken from the same google drive folder +##' (`EpiToMesen.TGFB.nPoP_trial1_pepByCellMatrix_NSThreshDART_medIntCrNorm.txt`). +##' The data were formated to a [SingleCellExperiment] object and the sample +##' metadata were matched to the column names (mapping is retrieved +##' after running the SCoPE2 R script, `EMTTGFB_singleCellProcessing.R`) and +##' stored in the `colData`. The object is then added to the [QFeatures] object +##' and the rows of the PSM data are linked to the rows of the peptide data +##' based on the peptide sequence information through an `AssayLink` object. ##' ##' The imputed protein data were taken from the same google drive folder ##' (`EpiToMesen.TGFB.nPoP_trial1_ProtByCellMatrix_NSThreshDART_medIntCrNorm_imputedNotBC.csv`). @@ -2451,13 +2451,8 @@ ##' ##' The unimputed protein data were taken from the same google drive folder ##' (`EpiToMesen.TGFB.nPoP_trial1_ProtByCellMatrix_NSThreshDART_medIntCrNorm_unimputed.csv`). -##' The data were formated to a [SingleCellExperiment] object and the sample -##' metadata were matched to the column names (mapping is retrieved -##' after running the SCoPE2 R script, `EMTTGFB_singleCellProcessing.R`) and -##' stored in the `colData`. The object is then added to the [QFeatures] object -##' and the rows of the peptide data are linked to the rows of the protein data -##' based on the protein sequence information through an `AssayLink` object. -##' +##' The data were formated and added exactly as imputed data. +##' ##' @source ##' The data were downloaded from the ##' [Slavov Lab](https://scp.slavovlab.net/Khan_et_al_2023) website via a diff --git a/inst/scripts/make-data_khan2023.R b/inst/scripts/make-data_khan2023.R index c8417a6..629a961 100644 --- a/inst/scripts/make-data_khan2023.R +++ b/inst/scripts/make-data_khan2023.R @@ -104,15 +104,13 @@ idMap <- read.csv(paste0(root, "cellIDToChannel.csv"), row.names = 1) ####---- Add the peptide data ----#### -## The `peptides.csv` and `peptides_rowData.csv` files were generated using the -## `EMTTGFB_singleCellProcessing.R` script from -## https://github.com/SlavovLab/EMT_TGFB_2023/tree/main. -## `peptides.csv`: contains peptides x cells before the aggregation. -## `peptides_rowData.csv`: contains rowData of peptides. - -read.csv(paste0(root, "peptides.csv")) %>% - rename(peptide = X) %>% - readSingleCellExperiment(ecol = 2:422, fnames = "peptide") -> +## Peptide quantity matrix downloaded from: +## https://drive.google.com/drive/folders/1zCsRKWNQuAz5msxx0DfjDrIe6pUjqQmj + +peps <- read.delim(paste0(root, "EpiToMesen.TGFB.nPoP_trial1_pepByCellMatrix_NSThreshDART_medIntCrNorm.txt")) +peps %>% + rename(peptide = pep) %>% + readSingleCellExperiment(ecol = 1:421, fnames = "peptide") -> peptides colnames(peptides) <- idMap$Channel[match(colnames(peptides), idMap$cellID)] @@ -120,30 +118,25 @@ colData(peptides) <- DataFrame(annot[colnames(peptides), ]) khan2023 <- addAssay(khan2023, peptides, name = "peptides") +## Include rowData to peptides assay +rowData(khan2023[["peptides"]]) <- DataFrame(peptide = peps$pep, + protein = peps$prot) + ## First find which PSM assays were included -sel <- sapply(grep("eSK", names(khan2023), value = TRUE), function(name) { - x <- khan2023[[name]] - ## Does the current PSM data have at least 1 colname in common with pep? - inColnames <- any(colnames(x) %in% colnames(peptides)) - ## Does the current PSM data have at least 1 peptide sequence in common with pep? - inSequence <- any(rowData(x)$peptide %in% rowData(peptides)$peptide) - return(inColnames && inSequence) ## The PSM assay must fulfill both conditions +sel <- sapply(grep("eSK", names(khan2023), value = TRUE), + function(name) { + x <- khan2023[[name]] + ## Does the current PSM data have at least 1 colname in common with pep? + inColnames <- any(colnames(x) %in% colnames(peptides)) + ## Does the current PSM data have at least 1 peptide sequence in common with pep? + inSequence <- any(rowData(x)$peptide %in% rowData(peptides)$peptide) + return(inColnames && inSequence) ## The PSM assay must fulfill both conditions }) ## Add an AssayLink that bridges the PSM assays and the peptide assay khan2023 <- addAssayLink(khan2023, from = which(sel), to = "peptides", varFrom = rep("peptide", sum(sel)), varTo = "peptide") -## Include rowData to peptides assay -read.csv(paste0(root, "peptides_rowData.csv"), row.names = 1) %>% - select(pep, prot) %>% - mutate(peptide = pep, protein = prot, pep = NULL, prot = NULL) %>% - unique() %>% - DataFrame() -> - pepRow - -rowData(khan2023[["peptides"]]) <- pepRow - ####---- Add the protein data ----#### ## Imputed and un-imputed protein quantity matrices downloaded from: