Error in running Casper #85

mehlu22 · 2024-10-10T00:48:40Z

#options(download.file.method = "wininet")

#program to run Casper
library(data.table)
library(CaSpER)
library(edgeR)

sub <- read.csv('./cod_length.csv', row.names = 1)
dat <- fread("SL1to2_raw.csv")
dat$GeneID <- sub("\..*", "", dat$GeneID)
sub <- sub <- sub[sub$ensm %in% dat$GeneID, ]

cytoband <- cytoband_hg38

Generate the LOH annotation

rannotation <- generateAnnotation(id_type = "ensembl_gene_id", genes = dat$GeneID, ishg19 = FALSE, centromere)

Extract gene IDs from the LOH annotation (adjust if necessary based on the structure)

loh_genes <- unique(rannotation$Gene) # Change 'id' if needed based on the structure of rannotation

Find the common genes between the gene length data, gene expression data, and LOH annotation

common_genes <- intersect(intersect(dat$GeneID, sub$ensm), loh_genes)

Filter the gene expression data to retain only the common genes

dat_filtered <- dat[dat$GeneID %in% common_genes, ]

Filter the gene length data (`sub`) to retain only the common genes

sub_filtered <- sub[sub$ensm %in% common_genes, ]

Optionally, filter the LOH annotation to keep only the common genes

rannotation_filtered <- rannotation[rannotation$Gene %in% common_genes, ] # Adjust field name based on rannotation structure

Ensure that the filtered data is aligned

cat("Number of common genes: ", length(common_genes), "\n")

List of paths

paths <- c("./SL1/", "./SL2/")
folder_names <- basename(paths)
folder_names <- gsub("/$", "", folder_names)

Define a function to read the output, unlist it, and assign name

read_and_name <- function(path) {
result <- readBAFExtractOutput(path = path, sequencing.type = "bulk")
unlisted_result <- result[[1]]
return(unlisted_result)
}

Use lapply to apply the function to each path

loh <- lapply(paths, read_and_name)
names(loh) <- gsub(".baf", "", names(loh))

control.sample.ids = "PDA75"

loh.name.mapping <- data.frame(loh.name = folder_names, sample.name = folder_names)

dat_matrix <- as.matrix(data.frame(dat_filtered[!duplicated(dat_filtered$GeneID),], row.names = 1, check.names = F))
#convert to rpkm
dat_matrix <- rpkm(dat_matrix, gene.length = sub_filtered$Length, log=T)
#dat_matrix[is.na(dat_matrix)] <- 0
object <- CreateCasperObject(raw.data=dat_matrix, loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=1,
annotation=rannotation_filtered, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=centromere, genomeVersion="hg38", log.transformed = T)

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
Performing recursive median filtering...
Performing HMM segmentation...
Processing cnv.scale:1 loh.scale:1...
Error in value[[jvseq[[jjj]]]] : subscript out of bounds

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error in running Casper #85

Error in running Casper #85

mehlu22 commented Oct 10, 2024

Error in running Casper #85

Error in running Casper #85

Comments

mehlu22 commented Oct 10, 2024

Generate the LOH annotation

Extract gene IDs from the LOH annotation (adjust if necessary based on the structure)

Find the common genes between the gene length data, gene expression data, and LOH annotation

Filter the gene expression data to retain only the common genes

Filter the gene length data (sub) to retain only the common genes

Optionally, filter the LOH annotation to keep only the common genes

Ensure that the filtered data is aligned

List of paths

Define a function to read the output, unlist it, and assign name

Use lapply to apply the function to each path

Filter the gene length data (`sub`) to retain only the common genes