Skip to content

Commit

Permalink
Various changes, mostly related to storing paths and files relevant t…
Browse files Browse the repository at this point in the history
…o the quality check. That makes it easier for users to investigate the results.
  • Loading branch information
Michael Nuhn committed Mar 18, 2016
1 parent 20bf772 commit 68c3ae7
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 17 deletions.
1 change: 1 addition & 0 deletions modules/Bio/EnsEMBL/Funcgen/Hive/Config/QC_Chance.pm
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ sub pipeline_analyses {
. qq( --pass #tracking_db_pass# )
. qq( --host #tracking_db_host# )
. qq( --dbname #tracking_db_name# )
. qq( --work_dir #tempdir# )
},
},
];
Expand Down
1 change: 1 addition & 0 deletions modules/Bio/EnsEMBL/Funcgen/Hive/Config/QC_Fastqc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ sub pipeline_analyses {
cmd => qq(load_fastqc_summary_file.pl )
. qq( --input_subset_id #input_subset_id# )
. qq( --summary_file #fastqc_summary_file# )
. qq( --work_dir #tempdir# )
. qq( | mysql )
. qq( --host #tracking_db_host# )
. qq( --port #tracking_db_port# )
Expand Down
3 changes: 3 additions & 0 deletions modules/Bio/EnsEMBL/Funcgen/Hive/Config/QC_Flagstats.pm
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ sub pipeline_analyses {
. qq( --result_set_id #result_set_id# )
. qq( --flagstats_file #flagstats_file# )
. qq( --user #tracking_db_user# --pass #tracking_db_pass# --host #tracking_db_host# --dbname #tracking_db_name# )
. qq( --work_dir #tempdir# )
. qq( --bam_file #bam_file# )


},
},
Expand Down
3 changes: 3 additions & 0 deletions modules/Bio/EnsEMBL/Funcgen/Hive/Config/QC_PhantomPeaks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ sub pipeline_analyses {
. qq( --pass #tracking_db_pass# )
. qq( --host #tracking_db_host# )
. qq( --dbname #tracking_db_name# )
. qq( --work_dir #tempdir# )
. qq( --bam_file #bam_file# )

},
},
];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ sub pipeline_analyses {
cmd => qq( proportion_of_reads_in_peaks.pl )
. qq( --peak_file #peak_file# )
. qq( --temp_dir #temp_dir# )
. qq( --bam_file #bam_file# )
. qq( --peak_caller #peak_caller# )
. qq( --feature_set_id #feature_set_id# )
. qq( --user #tracking_db_user# )
. qq( --pass #tracking_db_pass# )
. qq( --host #tracking_db_host# )
. qq( --dbname #tracking_db_name# )
. qq( --bam_file #bam_file# )
},
-rc_name => 'normal_2GB',
},
Expand Down
10 changes: 7 additions & 3 deletions scripts/sequencing/load_argenrich_qc_file.pl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ =head1 DESCRIPTION
my $host;
my $dbname;
my $signal_result_set_id;
my $work_dir;

my %config_hash = (
"argenrich_file" => \$argenrich_file,
Expand All @@ -88,6 +89,7 @@ =head1 DESCRIPTION
'pass' => \$pass,
'host' => \$host,
'dbname' => \$dbname,
'work_dir' => \$work_dir,
);

my $result = GetOptions(
Expand All @@ -100,6 +102,7 @@ =head1 DESCRIPTION
'pass=s',
'host=s',
'dbname=s',
'work_dir=s',
);

die unless(-e $argenrich_file);
Expand Down Expand Up @@ -147,7 +150,7 @@ =head1 DESCRIPTION
signal_result_set_id, analysis_id, p, q, divergence, z_score, percent_genome_enriched, input_scaling_factor, differential_percentage_enrichment,
control_enrichment_stronger_than_chip_at_bin,
first_nonzero_bin_at,
pcr_amplification_bias_in_Input_coverage_of_1_percent_of_genome
pcr_amplification_bias_in_Input_coverage_of_1_percent_of_genome, path
) values (
$signal_result_set_id,
$analysis_id,
Expand All @@ -160,7 +163,8 @@ =head1 DESCRIPTION
$key_value_pairs{'differential_percentage_enrichment'},
$key_value_pairs{'Control enrichment stronger than ChIP at bin'},
$key_value_pairs{'Zero-enriched IP, maximum difference at bin'},
$key_value_pairs{'PCR amplification bias in Input, coverage of 1% of genome'}
$key_value_pairs{'PCR amplification bias in Input, coverage of 1% of genome'},
'$work_dir'
)
);

Expand Down Expand Up @@ -276,7 +280,7 @@ sub create_table {
-- greater deviations from that are reported.
--
`pcr_amplification_bias_in_Input_coverage_of_1_percent_of_genome`double default NULL,
`path` varchar(100) NOT NULL,
`path` varchar(512) NOT NULL,
PRIMARY KEY (`result_set_qc_chance_id`)
);
SQL
Expand Down
7 changes: 5 additions & 2 deletions scripts/sequencing/load_fastqc_summary_file.pl
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,19 @@ =head1 DESCRIPTION

my $summary_file;
my $input_subset_id;
my $work_dir;

my %config_hash = (
"summary_file" => \$summary_file,
"input_subset_id" => \$input_subset_id,
"work_dir" => \$work_dir,
);

my $result = GetOptions(
\%config_hash,
'input_subset_id=s',
'summary_file=s',
'work_dir=s',
);

die unless(-e $summary_file);
Expand All @@ -98,7 +101,7 @@ =head1 DESCRIPTION
#print " - $current_line\n";
my @f = split "\t", $current_line;
#print Dumper(\@f);
my $sql = "INSERT ignore INTO input_subset_fastqc (input_subset_id,status,title,file_name) VALUES (".$input_subset_id.", '".$f[0]."', '".$f[1]."', '".$f[2]."')";
my $sql = "INSERT ignore INTO input_subset_fastqc (input_subset_id,status,title,file_name,path) VALUES (".$input_subset_id.", '".$f[0]."', '".$f[1]."', '".$f[2]."', '".$work_dir."')";

print "$sql;\n";
}
Expand All @@ -114,7 +117,7 @@ sub create_table_sql {
`status` varchar(100) NOT NULL,
`title` varchar(100) NOT NULL,
`file_name` varchar(100) NOT NULL,
`path` varchar(100) NOT NULL,
`path` varchar(512) NOT NULL,
PRIMARY KEY (`input_subset_qc_id`),
UNIQUE KEY `name_exp_idx` (`input_subset_id`,`title`)
) ENGINE=MyISAM;
Expand Down
21 changes: 16 additions & 5 deletions scripts/sequencing/load_phantom_peak_file.pl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ =head1 DESCRIPTION
my $pass;
my $host;
my $dbname;
my $work_dir;
my $bam_file;

my %config_hash = (
"result_file" => \$result_file,
Expand All @@ -68,6 +70,8 @@ =head1 DESCRIPTION
'pass' => \$pass,
'host' => \$host,
'dbname' => \$dbname,
'work_dir' => \$work_dir,
'bam_file' => \$bam_file,
);

my $result = GetOptions(
Expand All @@ -79,6 +83,8 @@ =head1 DESCRIPTION
'pass=s',
'host=s',
'dbname=s',
'work_dir=s',
'bam_file=s',
);

if (! $result_file) {
Expand Down Expand Up @@ -133,6 +139,7 @@ =head1 DESCRIPTION
} else {
$sql_processor = sub {
my $sql = shift;
$logger->info($sql . "\n");
$dbc->do($sql);
};
}
Expand Down Expand Up @@ -219,13 +226,15 @@ =head1 DESCRIPTION
. "min_corr, "
. "NSC, "
. "RSC, "
. "QualityTag "
. "QualityTag, "
. "path "
. ") VALUES ("
. (
join ', ', (
$result_set_id,
$analysis_id,
quote($filename),
#quote($filename),
quote($bam_file),
$numReads,

$estFragLen,
Expand All @@ -243,6 +252,7 @@ =head1 DESCRIPTION
$NSC,
$RSC,
$QualityTag,
quote($work_dir)
)
)
. ");";
Expand All @@ -265,7 +275,7 @@ sub create_table {
`result_set_qc_phantom_peak_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`analysis_id` int(10) unsigned,
`result_set_id` int(10) unsigned NOT NULL,
`filename` varchar(100) NOT NULL,
`filename` varchar(512) NOT NULL,
`numReads` int(10) unsigned NOT NULL,
`estFragLen` double default NULL,
`estFragLen2` double default NULL,
Expand Down Expand Up @@ -309,9 +319,10 @@ sub create_table {
-- Quality values derived from the RSC
--
`QualityTag` int(10),
`path` varchar(100) NOT NULL,
`path` varchar(512) NOT NULL,
PRIMARY KEY (`result_set_qc_phantom_peak_id`),
UNIQUE KEY `filename_idx` (`filename`)
-- UNIQUE KEY `filename_idx` (`filename`)
KEY `filename_idx` (`filename`)
);
SQL
;
Expand Down
15 changes: 11 additions & 4 deletions scripts/sequencing/load_samtools_flagstats.pl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ =head1 DESCRIPTION
my $pass;
my $host;
my $dbname;
my $work_dir;
my $bam_file;

my %config_hash = (
'flagstats_file' => \$flagstats_file,
Expand All @@ -60,6 +62,8 @@ =head1 DESCRIPTION
'pass' => \$pass,
'host' => \$host,
'dbname' => \$dbname,
'work_dir' => \$work_dir,
'bam_file' => \$bam_file,
);

# Loading command line paramters into variables and into a hash.
Expand All @@ -72,6 +76,8 @@ =head1 DESCRIPTION
'pass=s',
'host=s',
'dbname=s',
'work_dir=s',
'bam_file=s',
);

die unless(-e $flagstats_file);
Expand Down Expand Up @@ -146,7 +152,7 @@ sub create_insert_sql {
my $sql_processor = $param->{sql_processor};

open IN, $flagstats_file;

while (my $current_line = <IN>) {
chomp $current_line;
my $recognized = $current_line =~ /^(\d+) \+ (\d) (.+)$/;
Expand All @@ -156,9 +162,9 @@ sub create_insert_sql {
my $category = $3;

my $sql = "INSERT INTO result_set_qc_flagstats "
. "(result_set_id,analysis_id,category,qc_passed_reads,qc_failed_reads) "
. "(result_set_id, analysis_id, category, qc_passed_reads, qc_failed_reads, path, bam_file) "
. "VALUES "
. "($result_set_id, $analysis_id, '$category', $qc_passed_reads, $qc_failed_reads);";
. "($result_set_id, $analysis_id, '$category', $qc_passed_reads, $qc_failed_reads, '$work_dir', '$bam_file');";
$sql_processor->($sql);
} else {
$logger->debug("Can't parse: " . $current_line . "\n");
Expand All @@ -182,7 +188,8 @@ sub create_flagstats_table {
`category` varchar(100) NOT NULL,
`qc_passed_reads` int(10) unsigned,
`qc_failed_reads` int(10) unsigned,
`path` varchar(100) NOT NULL,
`path` varchar(512) NOT NULL,
`bam_file` varchar(512) NOT NULL,
PRIMARY KEY (`result_set_qc_id`),
UNIQUE KEY `name_exp_idx` (`result_set_qc_id`,`category`)
);
Expand Down
9 changes: 7 additions & 2 deletions scripts/sequencing/proportion_of_reads_in_peaks.pl
Original file line number Diff line number Diff line change
Expand Up @@ -394,14 +394,18 @@ sub create_insert_sql {
. "analysis_id, "
. "feature_set_id, "
. "prop_reads_in_peaks, "
. "total_reads"
. "total_reads, "
. "path,"
. "bam_file"
. ") VALUES ("
. (
join ', ', (
$analysis_id,
$feature_set_id,
$proportion_of_reads_in_peaks,
$num_reads_in_total,
"'$temp_dir'",
"'$bam_file'"
)
)
. ");";
Expand Down Expand Up @@ -523,7 +527,8 @@ sub create_table {
`feature_set_id` int(10) unsigned NOT NULL,
`prop_reads_in_peaks` double default NULL,
`total_reads` int(10) default NULL,
`path` varchar(100) NOT NULL,
`path` varchar(512) NOT NULL,
`bam_file` varchar(512) NOT NULL,
PRIMARY KEY (`feature_set_qc_prop_reads_in_peaks_id`)
);
SQL
Expand Down

0 comments on commit 68c3ae7

Please sign in to comment.