-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: add 'STAR' to validate if file contains RNA alignments (#464)
- Add new tests for rna alignment validation - this includes a new mock bam file with "star" alignments (change the sequence bases to N's) - update changelog
- Loading branch information
Showing
4 changed files
with
87 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
@HD VN:1.4 SO:coordinate | ||
@SQ SN:2R LN:61545105 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:c7fce117bede496505ba3c8b6e01c4e7 SP:Anopheles gambiae PEST | ||
@SQ SN:3R LN:53200684 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:f0319650793090d2408cf7ee417a358c SP:Anopheles gambiae PEST | ||
@SQ SN:2L LN:49364325 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:a4da4bafa82830c0a418c5a42138377b SP:Anopheles gambiae PEST | ||
@SQ SN:UNKN LN:42389979 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:51c20c06ad36f7ee256a70a2315c11f6 SP:Anopheles gambiae PEST | ||
@SQ SN:3L LN:41963435 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:94c0bf9b2a1e56c5b18ba0f02f927461 SP:Anopheles gambiae PEST | ||
@SQ SN:X LN:24393108 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:b6ee78dd87860defe33adbf619ea0ebe SP:Anopheles gambiae PEST | ||
@SQ SN:Y_unplaced LN:237045 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:51ca08338a90e2d47857ae5fdff8c00c SP:Anopheles gambiae PEST | ||
@SQ SN:Mt LN:15363 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:b079acda5858d4bfefbc9ef053bea7dc SP:Anopheles gambiae PEST | ||
@RG ID:6_6#6 PL:ILLUMINA PU:170518_HS34_6_B_CAKARANXX_3#107 LB:19123756 DS:Malaria Programme R&D: Malaria Programme R&D DT:2017-05-18T01:00:00+0100 PG:SCS SM:4472STDY6864139 CN:SC | ||
@PG ID:SCS PN:RTA VN:1.18.66.3 DS:Controlling software on instrument | ||
@PG ID:basecalling PN:RTA PP:SCS VN:1.18.66.3 DS:Basecalling Package | ||
@PG ID:bamadapterfind PN:bamadapterfind PP:bambi VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamadapterfind level=0 | ||
@PG ID:bambi.1 PN:bambi PP:bamadapterfind VN:0.9.4 CL:/software/solexa/pkg/bambi/0.9.4/bin/bambi decode --metrics-file /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/6_6.bam.tag_decode.metrics --barcode-file /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/metadata_cache_6/lane_3.taglist --compression-level 0 - | ||
@PG ID:bamcollate2 PN:bamcollate2 PP:bambi.1 VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamcollate2 collate=2 level=0 | ||
@PG ID:bwa PN:bwa PP:bamcollate2 VN:0.7.15-r1140 CL:/software/solexa/pkg/bwa/0.7.15/bwa sampe /lustre/scratch117/core/sciops_repository/references/PhiX/Sanger-SNPs/all/bwa0_6/phix_unsnipped_short_no_N.fa /tmp/VrFN7XNwme/alnphix_bwa_aln_1_out /tmp/pe16HUV04q/alnphix_bwa_aln_2_out /tmp/7ekNFi0DXx/alnphix_simple_cat1_out /tmp/x36rSRryLA/alnphix_simple_cat2_out | ||
@PG ID:scramble PN:scramble PP:bwa VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -0 -t 2 -I sam -O bam | ||
@PG ID:bam12auxmerge PN:bam12auxmerge PP:scramble VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=1 zztoname=0 clipreinsert=1 /tmp/31dU21DKlR/simple_cat_out | ||
@PG ID:scramble.1 PN:scramble PP:bam12auxmerge VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -I bam -O cram -x -3 | ||
@PG ID:scramble.2 PN:scramble PP:scramble.1 VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -I cram -O bam -0 | ||
@PG ID:spf PN:spatial_filter PP:scramble.2 DS:A program to apply a spatial filter VN:v10.27-dirty CL:/software/solexa/pkg/pb_calibration/10.27/bin/spatial_filter -c -F /dev/stdout -t /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/qc/tileviz/6_6 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 /dev/stdin ; /software/solexa/pkg/pb_calibration/10.27/bin/spatial_filter -a -f -u -l /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/6_6.bam.filter.stats -F /tmp/8fqzTzOOE7/tee_post_filter_creation:__APPLY_FILTER_OUT___out /dev/stdin | ||
@PG ID:samtools PN:samtools PP:spf VN:1.4 CL:/software/solexa/pkg/samtools/1.4/bin/samtools split -f /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/lane3/%!.bam - | ||
@PG ID:bamcollate2' PN:bamcollate2 PP:samtools VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamcollate2 collate=1 level=0 | ||
@PG ID:bamreset PN:bamreset PP:bamcollate2' VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamreset resetaux=0 level=0 verbose=0 | ||
@PG ID:bamadapterclip PN:bamadapterclip PP:bamreset VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamadapterclip verbose=0 level=0 | ||
@PG ID:bwa' PN:bwa PP:bamadapterclip VN:0.7.15-r1140 CL:/software/solexa/pkg/bwa/0.7.15/bwa mem -t 16 -p -Y -K 100000000 /lustre/scratch117/core/sciops_repository/references/Plasmodium_falciparum/3D7_Jan16v3/all/bwa0_6/Pf3D7_v3.fa /tmp/qZJDmtn5Aa/alntgt_bamtofastq_out | ||
@PG ID:scramble' PN:scramble PP:bwa' VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -0 -I sam -O bam | ||
@PG ID:samtools' PN:samtools PP:scramble' VN:1.4 CL:/software/solexa/pkg/samtools/1.4/bin/samtools reheader /tmp/iy1R1qORBH/postalntgt_alterSQ_headerSQfix_out /tmp/_PRhyKAVvT/postalntgt_mbuffer_headerSQfix_out | ||
@PG ID:bam12split PN:bam12split PP:samtools' VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12split verbose=0 level=0 | ||
@PG ID:bamsormadup PN:bamsormadup PP:bam12split VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamsormadup threads=16 SO=queryname level=0 | ||
@PG ID:bam12auxmerge' PN:bam12auxmerge PP:bamsormadup VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 ./hs_intfile0_6_6#6.bam | ||
@PG ID:AlignmentFilter PN:AlignmentFilter PP:bam12auxmerge' DS:Give a list of SAM/BAM files with the same set of records and in the same order but aligned with different references, split reads into different files according to alignments. You have option to put unaligned reads into one of output files or a separate file VN:V1.19 CL:uk.ac.sanger.npg.picard.AlignmentFilter INPUT_ALIGNMENT=[./initial_phix_aln_6_6#6.bam, /tmp/L1MpV9HHku/postalnhs_bam12auxmerge_out, /tmp/gwghhDNpNZ/postalntgt_bam12auxmerge_out] OUTPUT_ALIGNMENT=[/tmp/HtoclagWjb/alignment_filter:__PHIX_BAM_OUT___out, /tmp/uKigoodsmU/alignment_filter:__HUMAN_SPLIT_BAM_OUT___out, /dev/stdout] METRICS_FILE=6_6#6.bam_alignment_filter_metrics.json VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=SILENT COMPRESSION_LEVEL=0 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false | ||
@PG ID:bamsormadup' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamsormadup threads=16 SO=coordinate level=0 verbose=0 fixmate=1 adddupmarksupport=1 tmpfile=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/bsfopt_6_6#6.tmp PP:AlignmentFilter VN:2.0.72 | ||
@PG ID:bamstreamingmarkduplicates PN:bamstreamingmarkduplicates CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/bmdfopt_6_6#6.tmp M=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/6_6#6.markdups_metrics.txt PP:bamsormadup' VN:2.0.72 | ||
@PG ID:scramble.3 PN:scramble PP:bamstreamingmarkduplicates VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -t 7 -7 -I bam -O cram -r /lustre/scratch117/core/sciops_repository/references/Plasmodium_falciparum/3D7_Jan16v3/all/fasta/Pf3D7_v3.fa | ||
@PG ID:bamcollate2'' PN:bamcollate2 CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamcollate2 inputformat=cram collate=1 level=0 PP:scramble.3 VN:2.0.76 | ||
@PG ID:bamreset' PN:bamreset CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamreset resetaux=0 auxfilter=RG,PG,BC,RT,QT,tr,tq,br,qr level=0 verbose=0 PP:bamcollate2'' VN:2.0.76 | ||
@PG ID:bamadapterfind' PN:bamadapterfind CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamadapterfind clip=1 verbose=0 level=0 PP:bamreset' VN:2.0.76 | ||
@PG ID:STAR PN:STAR CL:/software/solexa/pkg/star/2.5.2b/bin/STAR --runMode alignReads --runThreadN 16 --genomeDir /nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/star --genomeLoad NoSharedMemory --readFilesIn ./intfile_1_6_6#6.fq.gz ./intfile_2_6_6#6.fq.gz --outFileNamePrefix ./_6_6#6_ --outStd BAM_Unsorted --outSAMtype BAM Unsorted --outSAMstrandField intronMotif --outSAMattributes NH HI NM MD AS XS --outSAMunmapped Within KeepPairs --outFilterIntronMotifs RemoveNoncanonicalUnannotated --chimSegmentMin 15 --chimJunctionOverhangMin 15 --chimOutType SeparateSAMold --sjdbGTFfile /nfs/srpipe_references/transcriptomes/Anopheles_gambiae/vectorbase_181016_transcriptome/PEST_AgamP4/gtf/vectorbase_181016_transcriptome-PEST_AgamP4.gtf --sjdbOverhang 74 --quantMode TranscriptomeSAM GeneCounts PP:bamadapterfind' VN:STAR_2.5.2b | ||
@PG ID:samtools'' PN:samtools CL:/software/solexa/pkg/samtools/1.5/bin/samtools reheader /tmp/M14m1oXbFR/postalntgt_alterSQ_headerSQfix_out /tmp/KTtrPBKGjI/postalntgt_mbuffer_headerSQfix_out PP:STAR VN:1.5 | ||
@PG ID:bam12split' PN:bam12split CL:/software/solexa/pkg/biobambam/2.0.76/bin/bam12split verbose=0 level=0 PP:samtools'' VN:2.0.76 | ||
@PG ID:bamsormadup'' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamsormadup threads=16 SO=queryname level=0 PP:bam12split' VN:2.0.76 | ||
@PG ID:bam12auxmerge'' PN:bam12auxmerge CL:/software/solexa/pkg/biobambam/2.0.76/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 /tmp/XGZGjESM3P/amp_bamadapterclip_pre_auxmerge_out PP:bamsormadup'' VN:2.0.76 | ||
@PG ID:bamsormadup''' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamsormadup threads=16 SO=coordinate level=0 verbose=0 fixmate=1 adddupmarksupport=1 tmpfile=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/bsfopt_6_6#6.tmp PP:bam12auxmerge'' VN:2.0.76 | ||
@PG ID:bamstreamingmarkduplicates' PN:bamstreamingmarkduplicates CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/bmdfopt_6_6#6.tmp M=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/6_6#6.markdups_metrics.txt PP:bamsormadup''' VN:2.0.76 | ||
@PG ID:scramble.4 PN:scramble PP:bamstreamingmarkduplicates' VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -t 7 -7 -I bam -O cram -r /nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa | ||
@CO TY:checksum ST:all PA:all HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d | ||
@CO TY:checksum ST:all PA:pass HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d | ||
@CO TY:checksum ST: PA:all HA:crc32prod CO:0 BS:1 NS:1 SQ:1 ST:BC,FI,QT,RT,TC:1 | ||
@CO TY:checksum ST: PA:pass HA:crc32prod CO:0 BS:1 NS:1 SQ:1 ST:BC,FI,QT,RT,TC:1 | ||
@CO TY:checksum ST:6_6#6 PA:all HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d | ||
@CO TY:checksum ST:6_6#6 PA:pass HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d | ||
HS34_6:3:1314:8997:38475 163 2R 21758 255 75M = 22074 391 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN BBBBBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF< NH:i:1 HI:i:1 NS:i:148 MQ:i:255 ms:i:2755 mN:i:22148 MN:Z:75M MD:Z:75 NM:i:0 RG:Z:6_6#6 | ||
HS34_6:3:2307:11843:19357 163 2R 21770 255 75M = 21847 152 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF<BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NH:i:1 HI:i:1 NS:i:148 MQ:i:255 ms:i:2751 mN:i:21921 MN:Z:75M MD:Z:75 NM:i:0 RG:Z:6_6#6 |