Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error in running Casper #85

Open
mehlu22 opened this issue Oct 10, 2024 · 0 comments
Open

Error in running Casper #85

mehlu22 opened this issue Oct 10, 2024 · 0 comments

Comments

@mehlu22
Copy link

mehlu22 commented Oct 10, 2024

#options(download.file.method = "wininet")

#program to run Casper
library(data.table)
library(CaSpER)
library(edgeR)

sub <- read.csv('./cod_length.csv', row.names = 1)
dat <- fread("SL1to2_raw.csv")
dat$GeneID <- sub("\..*", "", dat$GeneID)
sub <- sub <- sub[sub$ensm %in% dat$GeneID, ]

cytoband <- cytoband_hg38

Generate the LOH annotation

rannotation <- generateAnnotation(id_type = "ensembl_gene_id", genes = dat$GeneID, ishg19 = FALSE, centromere)

Extract gene IDs from the LOH annotation (adjust if necessary based on the structure)

loh_genes <- unique(rannotation$Gene) # Change 'id' if needed based on the structure of rannotation

Find the common genes between the gene length data, gene expression data, and LOH annotation

common_genes <- intersect(intersect(dat$GeneID, sub$ensm), loh_genes)

Filter the gene expression data to retain only the common genes

dat_filtered <- dat[dat$GeneID %in% common_genes, ]

Filter the gene length data (sub) to retain only the common genes

sub_filtered <- sub[sub$ensm %in% common_genes, ]

Optionally, filter the LOH annotation to keep only the common genes

rannotation_filtered <- rannotation[rannotation$Gene %in% common_genes, ] # Adjust field name based on rannotation structure

Ensure that the filtered data is aligned

cat("Number of common genes: ", length(common_genes), "\n")

List of paths

paths <- c("./SL1/", "./SL2/")
folder_names <- basename(paths)
folder_names <- gsub("/$", "", folder_names)

Define a function to read the output, unlist it, and assign name

read_and_name <- function(path) {
result <- readBAFExtractOutput(path = path, sequencing.type = "bulk")
unlisted_result <- result[[1]]
return(unlisted_result)
}

Use lapply to apply the function to each path

loh <- lapply(paths, read_and_name)
names(loh) <- gsub(".baf", "", names(loh))

control.sample.ids = "PDA75"

loh.name.mapping <- data.frame(loh.name = folder_names, sample.name = folder_names)

dat_matrix <- as.matrix(data.frame(dat_filtered[!duplicated(dat_filtered$GeneID),], row.names = 1, check.names = F))
#convert to rpkm
dat_matrix <- rpkm(dat_matrix, gene.length = sub_filtered$Length, log=T)
#dat_matrix[is.na(dat_matrix)] <- 0
object <- CreateCasperObject(raw.data=dat_matrix, loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=1,
annotation=rannotation_filtered, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=centromere, genomeVersion="hg38", log.transformed = T)

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
Performing recursive median filtering...
Performing HMM segmentation...
Processing cnv.scale:1 loh.scale:1...
Error in value[[jvseq[[jjj]]]] : subscript out of bounds

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant