heatmap functionality

adebali · Feb 15, 2018 · 7b72019 · 7b72019
1 parent 4d9836d
commit 7b72019
Showing 1 changed file with 54 additions and 9 deletions.
diff --git a/projects/yeast/XR/pipeline.py b/projects/yeast/XR/pipeline.py
@@ -135,6 +135,13 @@ def mergeGeneCounts(self, outputName):
         output = os.path.join(self.outputDir, '..', outputName)
         self.catFiles(wildcard, headers, output)
         return self
+
+    def mergeHundred(self, outputName):
+        wildcard = self.fullPath2wildcard(self.input[0]).replace("_TS", "*")
+        headers = ['chromosome', 'start', 'end', 'gene', 'score', 'strand', 'category', 'geneNo', 'position', 'count'] + self.attributes
+        output = os.path.join(self.outputDir, '..', outputName)
+        self.catFiles(wildcard, headers, output)
+        return self
 
     def mergeBinCounts(self, outputName):
         wildcard = self.fullPath2wildcard(self.input[0])
@@ -655,6 +662,26 @@ def transcriptIntersect_bed2txt(self, args={}):
         return self
 
 
+    def intersectToTranscript_bed2txt(self):
+        codeList = [
+            'bedtools',
+            'intersect',
+            '-a', self.reference['scoredGenes'],
+            '-b', self.input,
+            '-wa',
+            '-wb',
+            '-F', 0.50,
+            '|',
+            'bedIntersect2positionTxt.py',
+            '-fieldsToSwitch', 2,3,5,
+            '|',
+            '../position2list.py',
+            '>',
+            self.output
+        ]
+        self.execM(codeList)
+        return self
+
     def dnaseIntersect_bed2txt(self, args={}):
         bedAfile = self.reference['dnase']
 
@@ -1157,6 +1184,17 @@ def geneMap_bed2txt(self):
         ]
         self.execM(codeList)
         return self
+
+    def subsample_bed2bed(self, readNumber):
+        codeList = [
+            'subsample',
+            '-n', readNumber,
+            '--seed', 123,
+            self.input,
+            '>', self.output
+        ]
+        self.execM(codeList)
+        return self
 
 def getArgs():
     parser = argparse.ArgumentParser(description='XR-seq Mouse Organs Pipeline', prog="pipeline.py")
@@ -1198,46 +1236,53 @@ def main():
     (p
         .run(p.uniqueSort_bed2bed, False)
 
-        .branch(True)
+        .branch(False)
             .run(p.geneMap_bed2txt, True)
             .run(p.normalizeCounts_txt2txt, True)
             .run(p.addTSNTS_txt2txt, True)
-            .cat(p.mergeGeneCounts, True)
+            .cat(p.mergeGeneCounts, True, 'merged_geneCounts.txt')
         .stop()
 
-       # TCR Analysis
         .branch(True)
+            .run(p.subsample_bed2bed, True, 6000000)
+            .run(p.intersectToTranscript_bed2txt, True)
+            .run(p.addTreatment_txt2txt, True)            
+            .cat(p.mergeHundred, True, 'merged_hundred.txt')    
+        .stop()
+
+       # TCR Analysis
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False)
             .run(p.addTreatment_txt2txt, False, 'real')
         .stop()
 
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {'random':True})
             .run(p.addTreatment_txt2txt, False, 'random')
         .stop()
 
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {"slice":True, "n":4, "sliceNum":1, "keyword":'_Q1'})
             .run(p.addTreatment_txt2txt, False, 'Q1')
         .stop()
 
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {"slice":True, "n":4, "sliceNum":2, "keyword":'_Q2'})
             .run(p.addTreatment_txt2txt, False, 'Q2')
         .stop()
 
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {"slice":True, "n":4, "sliceNum":3, "keyword":'_Q3'})
             .run(p.addTreatment_txt2txt, False, 'Q3')
         .stop()
 
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {"slice":True, "n":4, "sliceNum":4, "keyword":'_Q4'})
             .run(p.addTreatment_txt2txt, False, 'Q4')
         .stop()
 
        # Non overlapping transcripts
-        .branch(True)
+        .branch(False)
             .run(p.transcriptIntersect_bed2txt, False, {'genes': 'genes_noNeighborIn500bp', 'keyword': '_no_neighbor'})
             .run(p.addTreatment_txt2txt, False, 'no_neighbor')
             .cat(p.mergeTCR, False)