diff --git a/Changes b/Changes index 04f02dd1d..a4060d4bd 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ LIST OF CHANGES FOR NPG-QC PACKAGE - utility QC feature: db tables, DBIx classes, retrieval + - Add STAR aligner bams as a valid RNA alignments - also added tests for it release 64.3.1 - error in a statement for data update is fixed diff --git a/lib/npg_qc/autoqc/checks/rna_seqc.pm b/lib/npg_qc/autoqc/checks/rna_seqc.pm index 6f39c315c..4c1570998 100644 --- a/lib/npg_qc/autoqc/checks/rna_seqc.pm +++ b/lib/npg_qc/autoqc/checks/rna_seqc.pm @@ -136,7 +136,7 @@ sub _build_is_rna_alignment { my $command = $self->samtools_cmd . ' view -H ' . $self->_bam_file . ' |'; my $ph = IO::File->new($command) or croak qq[Cannot fork '$command', error $ERRNO]; while (my $line = <$ph>) { - if (!$rna_alignment && $line =~ /^\@PG\s+.*tophat/ismx) { + if (!$rna_alignment && $line =~ /^\@PG\s+.*TopHat|STAR/smx) { $rna_alignment = 1; } } diff --git a/t/60-autoqc-checks-rna_seqc.t b/t/60-autoqc-checks-rna_seqc.t index d0b1a0603..8a06757cc 100644 --- a/t/60-autoqc-checks-rna_seqc.t +++ b/t/60-autoqc-checks-rna_seqc.t @@ -20,6 +20,10 @@ my $repos = getcwd . '/t/data/autoqc/rna_seqc'; `touch $dir/RNA-SeQC.jar`; +my $si = join q[/], $dir, q[samtools]; +`touch $si`; +`chmod +x $si`; + subtest 'Find CLASSPATH' => sub { plan tests => 3; my $rnaseqc = npg_qc::autoqc::checks::rna_seqc->new( @@ -86,7 +90,7 @@ subtest 'Parse metrics' => sub { }; subtest 'Argument input files' => sub { - plan tests => 11; + plan tests => 14; my $ref_repos_dir = join q[/],$dir,'references'; my $ref_dir = join q[/], $ref_repos_dir,'Mus_musculus','GRCm38','all'; `mkdir -p $ref_dir/fasta`; @@ -98,11 +102,9 @@ subtest 'Argument input files' => sub { `mkdir -p $trans_dir/RNA-SeQC`; `touch $trans_dir/RNA-SeQC/ensembl_75_transcriptome-GRCm38.gtf`; - my $si = join q[/], $dir, q[samtools]; open my $fh, q[>], $si; print $fh qq[cat $repos/data/17550_3#8.bam\n]; close $fh; - `chmod +x $si`; my $check = npg_qc::autoqc::checks::rna_seqc->new( id_run => 17550, @@ -167,8 +169,34 @@ subtest 'Argument input files' => sub { ref_repository => $ref_repos_dir, transcriptome_repository => $trans_repos_dir,); is($check->_bam_file, 't/data/autoqc/rna_seqc/data/17550_1#1.bam', 'bam file path for id run 17550 lane 1 tag 1'); + is($check->_is_rna_alignment, 0, 'bam for id run 17550 lane 1 tag 1 from bwa aligner is not RNA alignment'); lives_ok { $check->execute } 'execution ok for no RNA alignment'; like ($check->result->comments, qr/BAM file is not RNA alignment/, 'comment when bam file is not RNA alignment'); + + open $fh, q[>], $si; + print $fh qq[cat $repos/data/17550_3#8.bam\n]; + close $fh; + + $check = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 17550, + position => 3, + tag_index => 8, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + is($check->_is_rna_alignment, 1, 'bam for id run 17550 lane 3 tag 8 from TopHat aligner is RNA alignment'); + + open $fh, q[>], $si; + print $fh qq[cat $repos/data/6_6#6.bam\n]; + close $fh; + + $check = npg_qc::autoqc::checks::rna_seqc->new( + id_run => 6, + position => 6, + tag_index => 6, + path => 't/data/autoqc/rna_seqc/data', + repository => $repos,); + is($check->_is_rna_alignment, 1, 'bam for id run 6 lane 6 tag 6 from STAR aligner is RNA alignment'); + }; 1; diff --git a/t/data/autoqc/rna_seqc/data/6_6#6.bam b/t/data/autoqc/rna_seqc/data/6_6#6.bam new file mode 100644 index 000000000..621e9d89a --- /dev/null +++ b/t/data/autoqc/rna_seqc/data/6_6#6.bam @@ -0,0 +1,54 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:2R LN:61545105 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:c7fce117bede496505ba3c8b6e01c4e7 SP:Anopheles gambiae PEST +@SQ SN:3R LN:53200684 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:f0319650793090d2408cf7ee417a358c SP:Anopheles gambiae PEST +@SQ SN:2L LN:49364325 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:a4da4bafa82830c0a418c5a42138377b SP:Anopheles gambiae PEST +@SQ SN:UNKN LN:42389979 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:51c20c06ad36f7ee256a70a2315c11f6 SP:Anopheles gambiae PEST +@SQ SN:3L LN:41963435 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:94c0bf9b2a1e56c5b18ba0f02f927461 SP:Anopheles gambiae PEST +@SQ SN:X LN:24393108 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:b6ee78dd87860defe33adbf619ea0ebe SP:Anopheles gambiae PEST +@SQ SN:Y_unplaced LN:237045 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:51ca08338a90e2d47857ae5fdff8c00c SP:Anopheles gambiae PEST +@SQ SN:Mt LN:15363 UR:/nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa AS:AgamP4 M5:b079acda5858d4bfefbc9ef053bea7dc SP:Anopheles gambiae PEST +@RG ID:6_6#6 PL:ILLUMINA PU:170518_HS34_6_B_CAKARANXX_3#107 LB:19123756 DS:Malaria Programme R&D: Malaria Programme R&D DT:2017-05-18T01:00:00+0100 PG:SCS SM:4472STDY6864139 CN:SC +@PG ID:SCS PN:RTA VN:1.18.66.3 DS:Controlling software on instrument +@PG ID:basecalling PN:RTA PP:SCS VN:1.18.66.3 DS:Basecalling Package +@PG ID:bamadapterfind PN:bamadapterfind PP:bambi VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamadapterfind level=0 +@PG ID:bambi.1 PN:bambi PP:bamadapterfind VN:0.9.4 CL:/software/solexa/pkg/bambi/0.9.4/bin/bambi decode --metrics-file /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/6_6.bam.tag_decode.metrics --barcode-file /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/metadata_cache_6/lane_3.taglist --compression-level 0 - +@PG ID:bamcollate2 PN:bamcollate2 PP:bambi.1 VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamcollate2 collate=2 level=0 +@PG ID:bwa PN:bwa PP:bamcollate2 VN:0.7.15-r1140 CL:/software/solexa/pkg/bwa/0.7.15/bwa sampe /lustre/scratch117/core/sciops_repository/references/PhiX/Sanger-SNPs/all/bwa0_6/phix_unsnipped_short_no_N.fa /tmp/VrFN7XNwme/alnphix_bwa_aln_1_out /tmp/pe16HUV04q/alnphix_bwa_aln_2_out /tmp/7ekNFi0DXx/alnphix_simple_cat1_out /tmp/x36rSRryLA/alnphix_simple_cat2_out +@PG ID:scramble PN:scramble PP:bwa VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -0 -t 2 -I sam -O bam +@PG ID:bam12auxmerge PN:bam12auxmerge PP:scramble VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=1 zztoname=0 clipreinsert=1 /tmp/31dU21DKlR/simple_cat_out +@PG ID:scramble.1 PN:scramble PP:bam12auxmerge VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -I bam -O cram -x -3 +@PG ID:scramble.2 PN:scramble PP:scramble.1 VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -I cram -O bam -0 +@PG ID:spf PN:spatial_filter PP:scramble.2 DS:A program to apply a spatial filter VN:v10.27-dirty CL:/software/solexa/pkg/pb_calibration/10.27/bin/spatial_filter -c -F /dev/stdout -t /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/qc/tileviz/6_6 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 /dev/stdin ; /software/solexa/pkg/pb_calibration/10.27/bin/spatial_filter -a -f -u -l /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/6_6.bam.filter.stats -F /tmp/8fqzTzOOE7/tee_post_filter_creation:__APPLY_FILTER_OUT___out /dev/stdin +@PG ID:samtools PN:samtools PP:spf VN:1.4 CL:/software/solexa/pkg/samtools/1.4/bin/samtools split -f /nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/lane3/%!.bam - +@PG ID:bamcollate2' PN:bamcollate2 PP:samtools VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamcollate2 collate=1 level=0 +@PG ID:bamreset PN:bamreset PP:bamcollate2' VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamreset resetaux=0 level=0 verbose=0 +@PG ID:bamadapterclip PN:bamadapterclip PP:bamreset VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamadapterclip verbose=0 level=0 +@PG ID:bwa' PN:bwa PP:bamadapterclip VN:0.7.15-r1140 CL:/software/solexa/pkg/bwa/0.7.15/bwa mem -t 16 -p -Y -K 100000000 /lustre/scratch117/core/sciops_repository/references/Plasmodium_falciparum/3D7_Jan16v3/all/bwa0_6/Pf3D7_v3.fa /tmp/qZJDmtn5Aa/alntgt_bamtofastq_out +@PG ID:scramble' PN:scramble PP:bwa' VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -0 -I sam -O bam +@PG ID:samtools' PN:samtools PP:scramble' VN:1.4 CL:/software/solexa/pkg/samtools/1.4/bin/samtools reheader /tmp/iy1R1qORBH/postalntgt_alterSQ_headerSQfix_out /tmp/_PRhyKAVvT/postalntgt_mbuffer_headerSQfix_out +@PG ID:bam12split PN:bam12split PP:samtools' VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12split verbose=0 level=0 +@PG ID:bamsormadup PN:bamsormadup PP:bam12split VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamsormadup threads=16 SO=queryname level=0 +@PG ID:bam12auxmerge' PN:bam12auxmerge PP:bamsormadup VN:2.0.72 CL:/software/solexa/pkg/biobambam/2.0.72/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 ./hs_intfile0_6_6#6.bam +@PG ID:AlignmentFilter PN:AlignmentFilter PP:bam12auxmerge' DS:Give a list of SAM/BAM files with the same set of records and in the same order but aligned with different references, split reads into different files according to alignments. You have option to put unaligned reads into one of output files or a separate file VN:V1.19 CL:uk.ac.sanger.npg.picard.AlignmentFilter INPUT_ALIGNMENT=[./initial_phix_aln_6_6#6.bam, /tmp/L1MpV9HHku/postalnhs_bam12auxmerge_out, /tmp/gwghhDNpNZ/postalntgt_bam12auxmerge_out] OUTPUT_ALIGNMENT=[/tmp/HtoclagWjb/alignment_filter:__PHIX_BAM_OUT___out, /tmp/uKigoodsmU/alignment_filter:__HUMAN_SPLIT_BAM_OUT___out, /dev/stdout] METRICS_FILE=6_6#6.bam_alignment_filter_metrics.json VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=SILENT COMPRESSION_LEVEL=0 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamsormadup' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamsormadup threads=16 SO=coordinate level=0 verbose=0 fixmate=1 adddupmarksupport=1 tmpfile=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/bsfopt_6_6#6.tmp PP:AlignmentFilter VN:2.0.72 +@PG ID:bamstreamingmarkduplicates PN:bamstreamingmarkduplicates CL:/software/solexa/pkg/biobambam/2.0.72/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/bmdfopt_6_6#6.tmp M=/nfs/sf44/ILorHSany_sf44/analysis/170518_HS34_6_B_CAKARANXX/Data/Intensities/BAM_basecalls_20170522-231912/no_cal/archive/lane3/6_6#6.markdups_metrics.txt PP:bamsormadup' VN:2.0.72 +@PG ID:scramble.3 PN:scramble PP:bamstreamingmarkduplicates VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -t 7 -7 -I bam -O cram -r /lustre/scratch117/core/sciops_repository/references/Plasmodium_falciparum/3D7_Jan16v3/all/fasta/Pf3D7_v3.fa +@PG ID:bamcollate2'' PN:bamcollate2 CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamcollate2 inputformat=cram collate=1 level=0 PP:scramble.3 VN:2.0.76 +@PG ID:bamreset' PN:bamreset CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamreset resetaux=0 auxfilter=RG,PG,BC,RT,QT,tr,tq,br,qr level=0 verbose=0 PP:bamcollate2'' VN:2.0.76 +@PG ID:bamadapterfind' PN:bamadapterfind CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamadapterfind clip=1 verbose=0 level=0 PP:bamreset' VN:2.0.76 +@PG ID:STAR PN:STAR CL:/software/solexa/pkg/star/2.5.2b/bin/STAR --runMode alignReads --runThreadN 16 --genomeDir /nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/star --genomeLoad NoSharedMemory --readFilesIn ./intfile_1_6_6#6.fq.gz ./intfile_2_6_6#6.fq.gz --outFileNamePrefix ./_6_6#6_ --outStd BAM_Unsorted --outSAMtype BAM Unsorted --outSAMstrandField intronMotif --outSAMattributes NH HI NM MD AS XS --outSAMunmapped Within KeepPairs --outFilterIntronMotifs RemoveNoncanonicalUnannotated --chimSegmentMin 15 --chimJunctionOverhangMin 15 --chimOutType SeparateSAMold --sjdbGTFfile /nfs/srpipe_references/transcriptomes/Anopheles_gambiae/vectorbase_181016_transcriptome/PEST_AgamP4/gtf/vectorbase_181016_transcriptome-PEST_AgamP4.gtf --sjdbOverhang 74 --quantMode TranscriptomeSAM GeneCounts PP:bamadapterfind' VN:STAR_2.5.2b +@PG ID:samtools'' PN:samtools CL:/software/solexa/pkg/samtools/1.5/bin/samtools reheader /tmp/M14m1oXbFR/postalntgt_alterSQ_headerSQfix_out /tmp/KTtrPBKGjI/postalntgt_mbuffer_headerSQfix_out PP:STAR VN:1.5 +@PG ID:bam12split' PN:bam12split CL:/software/solexa/pkg/biobambam/2.0.76/bin/bam12split verbose=0 level=0 PP:samtools'' VN:2.0.76 +@PG ID:bamsormadup'' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamsormadup threads=16 SO=queryname level=0 PP:bam12split' VN:2.0.76 +@PG ID:bam12auxmerge'' PN:bam12auxmerge CL:/software/solexa/pkg/biobambam/2.0.76/bin/bam12auxmerge level=0 rankstrip=1 ranksplit=0 zztoname=0 clipreinsert=1 /tmp/XGZGjESM3P/amp_bamadapterclip_pre_auxmerge_out PP:bamsormadup'' VN:2.0.76 +@PG ID:bamsormadup''' PN:bamsormadup CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamsormadup threads=16 SO=coordinate level=0 verbose=0 fixmate=1 adddupmarksupport=1 tmpfile=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/bsfopt_6_6#6.tmp PP:bam12auxmerge'' VN:2.0.76 +@PG ID:bamstreamingmarkduplicates' PN:bamstreamingmarkduplicates CL:/software/solexa/pkg/biobambam/2.0.76/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/bmdfopt_6_6#6.tmp M=/lustre/scratch117/sciops/team117/npg/rb11/viv_reprocessing/rt585843_4472_Anopheles+Plasmodium_Tophat+Star_remapping/output/star/6/6_6#6/6_6#6.markdups_metrics.txt PP:bamsormadup''' VN:2.0.76 +@PG ID:scramble.4 PN:scramble PP:bamstreamingmarkduplicates' VN:1.14.9 CL:/software/solexa/pkg/scramble/1.14.9/bin/scramble -t 7 -7 -I bam -O cram -r /nfs/srpipe_references/references/Anopheles_gambiae/PEST_AgamP4/all/fasta/chromosomes_AgamP4.fa +@CO TY:checksum ST:all PA:all HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d +@CO TY:checksum ST:all PA:pass HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d +@CO TY:checksum ST: PA:all HA:crc32prod CO:0 BS:1 NS:1 SQ:1 ST:BC,FI,QT,RT,TC:1 +@CO TY:checksum ST: PA:pass HA:crc32prod CO:0 BS:1 NS:1 SQ:1 ST:BC,FI,QT,RT,TC:1 +@CO TY:checksum ST:6_6#6 PA:all HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d +@CO TY:checksum ST:6_6#6 PA:pass HA:crc32prod CO:1679086 BS:469df137 NS:166e375c SQ:bacdbfb ST:BC,FI,QT,RT,TC:7096b44d +HS34_6:3:1314:8997:38475 163 2R 21758 255 75M = 22074 391 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN BBBBBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF< NH:i:1 HI:i:1 NS:i:148 MQ:i:255 ms:i:2755 mN:i:22148 MN:Z:75M MD:Z:75 NM:i:0 RG:Z:6_6#6 +HS34_6:3:2307:11843:19357 163 2R 21770 255 75M = 21847 152 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF