Skip to content

Commit

Permalink
SMuRFv1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
HUANG Weitai committed Jul 25, 2017
1 parent ce08ef9 commit ffc6ba3
Show file tree
Hide file tree
Showing 20 changed files with 7,040 additions and 7,073 deletions.
54 changes: 24 additions & 30 deletions Readme.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -63,46 +63,40 @@ myresults$smurf_indel$stats_indel
#Mutect2 383
#FreeBayes 85
#VarDict 166
#VarScan 158
#Atleast1 765
#Atleast2 25
#VarScan 208
#Atleast1 810
#Atleast2 30
#Atleast3 2
#All4 0
#SMuRF_INDEL 1

myresults$smurf_indel$predicted_indel

#Chr START_POS_REF END_POS_REF REF_MFVdVs ALT_MFVdVs SMuRF_score
#1 "1" "17820432" "17820433" "AT/AT/AT/AT" "A/A/A/A" "0.571875"
#1 "1" "17820432" "17820433" "AT/AT/AT/AT" "A/A/A/A" "0.6210418"

myresults$smurf_snv$stats_snv

# Passed_Calls
#Mutect2 5004
#FreeBayes 115
#VarDict 124
#VarScan 501
#Atleast1 5684
#Atleast2 42
#Atleast3 16
#All4 2
#SMuRF_SNV 12

myresults$smurf_snv$predicted_snv

#Chr START_POS_REF END_POS_REF REF_MFVdVs ALT_MFVdVs TRUTH_confidence
#1 "1" "12207135" "12207135" "G/G/G/G" "A/A/A/A" "0.9941938"
#2 "1" "14955425" "14955425" "C/C/C/C" "A/A/A/A" "0.9025974"
#3 "1" "18525077" "18525077" "G/G/G/G" "A/A/A/A" "0.9972492"
#4 "1" "21459584" "21459584" "T/T/T/T" "A/A/A/A" "0.9919866"
#5 "1" " 2180985" " 2180985" "A/A/A/A" "G/G/G/G" "0.9992510"
#6 "1" "22385823" "22385823" "A/A/A/A" "G/G/G/G" "0.6207352"
#7 "1" "25322776" "25322776" "C/C/NA/C" "T/T/NA/T" "0.8995001"
#8 "1" "28317030" "28317030" "G/NA/G/G" "T/NA/T/T" "0.8962453"
#9 "1" " 5035185" " 5035185" "C/C/C/C" "T/T/T/T" "0.9321060"
#10 "1" " 8881322" " 8881322" "G/G/G/G" "A/A/A/A" "0.9116895"
#11 "1" " 8929624" " 8929624" "A/A/A/A" "G/G/G/G" "0.8765643"
#12 "1" " 9478609" " 9478609" "A/A/A/A" "G/G/G/G" "0.5984848"
# Passed_Calls
#Mutect2 5004
#FreeBayes 115
#VarDict 124
#VarScan 535
#Atleast1 5700
#Atleast2 49
#Atleast3 19
#All4 10
#SMuRF_SNV 399

head(myresults$smurf_snv$predicted_snv)

#Chr START_POS_REF END_POS_REF REF_MFVdVs ALT_MFVdVs SMuRF_score
#1 "1" "100080141" "100080141" "T/NA/NA/NA" "A/NA/NA/NA" "0.5449806"
#2 "1" "100418794" "100418794" "G/NA/NA/NA" "T/NA/NA/NA" "0.4698960"
#3 "1" "100444255" "100444255" "A/NA/NA/NA" "T/NA/NA/NA" "0.4725965"
#4 "1" "100470006" "100470006" "C/NA/NA/NA" "A/NA/NA/NA" "0.4074448"
#5 "1" "100494646" "100494646" "G/NA/NA/NA" "T/NA/NA/NA" "0.3901333"
#6 "1" "100997475" "100997475" "C/NA/NA/NA" "A/NA/NA/NA" "0.4804897"


```
Expand Down
6 changes: 6 additions & 0 deletions smurf/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(indelRFparse)
export(indelRFparseall)
export(indelRFparsefeatures)
export(parsevcf)
export(parsevcfall)
export(parsevcffeatures)
export(smurf)
export(snvRFparse)
export(snvRFparseall)
export(snvRFparsefeatures)
39 changes: 25 additions & 14 deletions smurf/R/indelRFparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ indelRFparse = function(a){
colnames(varscan)<-c("X.CHROM", "POS", "END_POS_Varscan", "REF_Varscan", "ALT_Varscan", "vs_SSC","vs_SPV")

vardict<-as.data.frame(a[[7]][!isSNV(a[[7]], singleAltOnly=FALSE)],row.names=NULL)
vardict<-vardict[,c("seqnames","start","end", "ref", "alt", "SSF","MSI")]
vardict<-vardict[,c("seqnames","start","end", "ref", "alt", "SSF","MSI","SOR")]
vardict<-vardict[grep("GL*", vardict$seqnames, invert=TRUE),]
colnames(vardict)<-c("X.CHROM", "POS", "END_POS_Vardict", "REF_Vardict", "ALT_Vardict", "vd_SSF","vd_MSI")
colnames(vardict)<-c("X.CHROM", "POS", "END_POS_Vardict", "REF_Vardict", "ALT_Vardict", "vd_SSF","vd_MSI","vd_SOR")

#Filtering Indels from collapsed vcf
mutect_Indel<-a[[2]][!isSNV(a[[2]], singleAltOnly=FALSE)]
Expand All @@ -67,7 +67,9 @@ indelRFparse = function(a){
colnames(freebayes_coordinates)[1:2]<-c("X.CHROM", "POS")
freebayes_coordinates<-freebayes_coordinates[grep("GL*", freebayes_coordinates$X.CHROM, invert = TRUE),]

varscan_clean_Indel<-rowRanges(varscan_Indel[varscan_Indel@fixed$FILTER=="PASS",])[,1]
#we are including the SpvFreq as PASSED calls
varscan_clean_Indel<-rowRanges(varscan_Indel[varscan_Indel@fixed$FILTER=="PASS" | varscan_Indel@fixed$FILTER=="SpvFreq",])[,1]
#varscan_clean_Indel<-rowRanges(varscan_Indel[varscan_Indel@fixed$FILTER=="PASS",])[,1]
varscan_coordinates<-as.data.frame(unlist(varscan_clean_Indel),row.names=NULL)
varscan_coordinates<-varscan_coordinates[1:2]
varscan_coordinates$FILTER_Varscan<-"PASS"
Expand All @@ -88,9 +90,15 @@ indelRFparse = function(a){
coordinates<-unique(coordinates)

#Pulling out calls from VRanges object for each caller using coordinates
m2<-dim(mutect)[2]
mutect<-merge(mutect, coordinates, by=c("X.CHROM", "POS"), all.y=TRUE, sort=TRUE)
mutect<-unique(mutect)
mutect<-mutect[,1:(dim(mutect)[2]-3)]
M_2<-which( colnames(mutect)=="FILTER_Mutect2" )
mutect<-mutect[,c(1:m2,M_2)]

# mutect<-merge(mutect, coordinates, by=c("X.CHROM", "POS"), all.y=TRUE, sort=TRUE)
# mutect<-unique(mutect)
# mutect<-mutect[,1:(dim(mutect)[2]-3)]

fb<-dim(freebayes)[2]
freebayes<-merge(freebayes, coordinates, by=c("X.CHROM", "POS"), all.y=TRUE, sort=TRUE)
Expand Down Expand Up @@ -128,16 +136,16 @@ indelRFparse = function(a){
final<-final[,c("X.CHROM", "POS", "END_POS", "REF", "ALT", "FILTER_Mutect2", "FILTER_Freebayes", "FILTER_Vardict", "FILTER_Varscan",
"m2_MQ","m2_MQRankSum","m2_NLOD","m2_TLOD",
"f_LEN",
"vs_SSC","vs_SPV","vd_SSF","vd_MSI")]
"vs_SSC","vs_SPV","vd_SSF","vd_MSI","vd_SOR")]
colnames(final)<-c("X.CHROM", "START_POS_REF", "END_POS_REF", "REF_MFVdVs", "ALT_MFVdVs", "FILTER_Mutect2", "FILTER_Freebayes", "FILTER_Vardict", "FILTER_Varscan",
"m2_MQ","m2_MQRankSum","m2_NLOD","m2_TLOD",
"f_LEN",
"vs_SSC","vs_SPV","vd_SSF","vd_MSI")
"vs_SSC","vs_SPV","vd_SSF","vd_MSI","vd_SOR")

#Removing Duplicates
for(i in c("m2_MQ","m2_MQRankSum","m2_NLOD","m2_TLOD",
"f_LEN",
"vs_SSC","vs_SPV","vd_SSF","vd_MSI"))
"vs_SSC","vs_SPV","vd_SSF","vd_MSI", "vd_SOR"))
{
final[,i]<-as.numeric(final[,i])
}
Expand All @@ -153,26 +161,29 @@ indelRFparse = function(a){
final[,i] <- as.logical(final[,i])
}

final$vd_SOR[which(is.infinite(final$vd_SOR)==TRUE)] <- 0

indel_parse <- final

print("Predicting INDELs")

df <- final[,c("X.CHROM","START_POS_REF","FILTER_Mutect2","FILTER_Freebayes","FILTER_Vardict","FILTER_Varscan",
"m2_MQ","m2_MQRankSum","m2_NLOD","m2_TLOD","f_LEN","vs_SSC","vs_SPV","vd_SSF","vd_MSI")]
write.csv(df, 'indel_parse.csv',row.names = F)
df <- h2o.importFile(path = normalizePath("indel_parse.csv"),header=T)
"m2_MQ","m2_MQRankSum","m2_NLOD","m2_TLOD","f_LEN","vs_SSC","vs_SPV","vd_SSF","vd_MSI","vd_SOR")]
df <- as.h2o(df)

# write.csv(df, 'indel_parse.csv',row.names = F)
# df <- h2o.importFile(path = normalizePath("indel_parse.csv"),header=T)

smurfdir <- find.package("smurf")
smurfmodeldir <- paste0(smurfdir, "/data/indel-model-combined-grid")
indel_model <- h2o.loadModel(path = smurfmodeldir)

#indel_model <- h2o.loadModel(path = "/home/huangwt/R/x86_64-pc-linux-gnu-library/3.3/smbio/data/indel_model-combined-0205")
#indel_model <- h2o.loadModel(path = "D:/Users/Tyler/Dropbox/Scripts/Real-v3/Results-Indels-v3/indel_model-combined-0205")
#indel_model <- h2o.loadModel(path = "C:/Users/Tyler/Dropbox/Scripts/smurf/smurf1.0/data/indel-model-combined-grid")
#indel_model <- h2o.loadModel(path = "D:/Users/Tyler/Dropbox/SMuRFv1.1/smurf/data/indel-model-combined-grid")

predicted <- h2o.predict(object = indel_model, newdata = df)

suppressMessages(file.remove("indel_parse.csv"))
#suppressMessages(file.remove("indel_parse.csv"))
p <- as.data.frame(predicted)
table <- final

Expand All @@ -198,7 +209,7 @@ indelRFparse = function(a){
colnames(stats) <- c("Passed_Calls")
rownames(stats) <- c("Mutect2", "FreeBayes", "VarDict", "VarScan", "Atleast1", "Atleast2", "Atleast3", "All4", "SMuRF_INDEL")

counts <- apply(table[, c(6,7,8,9)], 1, function(x) length(which(x=="TRUE")))
counts <- apply(table[, c("FILTER_Mutect2","FILTER_Freebayes","FILTER_Vardict","FILTER_Varscan")], 1, function(x) length(which(x=="TRUE")))

stats$Passed_Calls[1] <- length(which((table$FILTER_Mutect2==TRUE)))
stats$Passed_Calls[2] <- length(which((table$FILTER_Freebayes==TRUE)))
Expand Down
Loading

0 comments on commit ffc6ba3

Please sign in to comment.