Skip to content

Commit

Permalink
address VS-152, remove extra headers from extract (#7466)
Browse files Browse the repository at this point in the history
* address VS-152, remove extra headers from extract

* explicit imports

* fixed inconsistency between prepare and extract with respect to is_loaded in the __SAMPLES table

* fixed inconsistency between prepare and extract with respect to is_loaded in the __SAMPLES table
  • Loading branch information
kcibul authored Sep 13, 2021
1 parent dc110b8 commit a9206a2
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_all_sample_ids(fq_destination_table_samples):

def create_extract_samples_table(fq_destination_table_samples, fq_sample_name_table, fq_sample_mapping_table):
sql = f"CREATE OR REPLACE TABLE `{fq_destination_table_samples}` AS (" \
f"SELECT m.sample_id, m.sample_name FROM `{fq_sample_name_table}` s JOIN `{fq_sample_mapping_table}` m ON (s.sample_name = m.sample_name) " \
f"SELECT m.sample_id, m.sample_name, m.is_loaded FROM `{fq_sample_name_table}` s JOIN `{fq_sample_mapping_table}` m ON (s.sample_name = m.sample_name) " \
f"WHERE m.is_loaded is TRUE)"

results = execute_with_retry("create extract sample table", sql)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,6 @@ public static VCFHeader generateVcfHeader(Set<String> sampleNames,
return header;
}

public static VCFHeader generateVcfHeader(Set<String> sampleNames,
final SAMSequenceDictionary sequenceDictionary) {

Set<VCFHeaderLine> noExtraHeaders = new HashSet<>();
return generateVcfHeader(sampleNames, sequenceDictionary, noExtraHeaders);
}


// TODO is this specific for cohort extract? if so name it such
public static Set<VCFHeaderLine> getEvoquerVcfHeaderLines() {
final Set<VCFHeaderLine> headerLines = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private TableResult querySampleTable(
// Get the query string:
final String sampleListQueryString =
"SELECT " + SchemaUtils.SAMPLE_ID_FIELD_NAME + ", " + SchemaUtils.SAMPLE_NAME_FIELD_NAME +
" FROM `" + fqSampleTableName + "` " + whereClause;
" FROM `" + fqSampleTableName + "` " + ((whereClause!=null)?" WHERE ":"") + whereClause;

Map<String, String> labelForQuery = new HashMap<String, String>();
if (originTool.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package org.broadinstitute.hellbender.tools.gvs.extract;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Advanced;
Expand All @@ -12,7 +15,6 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.gvs.common.CommonCode;
import org.broadinstitute.hellbender.tools.gvs.common.SampleList;
import org.broadinstitute.hellbender.tools.gvs.common.SchemaUtils;
import org.broadinstitute.hellbender.tools.gvs.common.ExtractTool;
Expand Down Expand Up @@ -140,6 +142,45 @@ public enum VQSLODFilteringType { GENOTYPE, SITES, NONE }
optional=true)
private boolean excludeFilteredSites = false;

protected static VCFHeader generateVcfHeader(Set<String> sampleNames,
final SAMSequenceDictionary sequenceDictionary,
final Set<VCFHeaderLine> extraHeaders) {
final Set<VCFHeaderLine> headerLines = new HashSet<>();

// Filter fields
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.NAY_FROM_YNG));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.EXCESS_HET_KEY));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.EXCESS_ALLELES));
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.NO_HQ_GENOTYPES));

// Info fields
VCFStandardHeaderLines.addStandardInfoLines( headerLines, true,
VCFConstants.ALLELE_COUNT_KEY,
VCFConstants.ALLELE_FREQUENCY_KEY,
VCFConstants.ALLELE_NUMBER_KEY,
VCFConstants.END_KEY
);
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.RAW_QUAL_APPROX_KEY));

VCFStandardHeaderLines.addStandardFormatLines(headerLines, true,
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY
);
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));

headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_LOD_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_YNG_STATUS_KEY));


headerLines.addAll( extraHeaders );

final VCFHeader header = new VCFHeader(headerLines, sampleNames);
header.setSequenceDictionary(sequenceDictionary);

return header;
}

@Override
protected void onStartup() {
Expand Down Expand Up @@ -177,16 +218,14 @@ protected void onStartup() {
}
}

extraHeaderLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));

if (vqslodfilteringType.equals(VQSLODFilteringType.GENOTYPE)) {
extraHeaderLines.add(new VCFFormatHeaderLine("FT", 1, VCFHeaderLineType.String, "Genotype Filter Field"));
}

SampleList sampleList = new SampleList(sampleTableName, sampleFileName, projectID, printDebugInformation, "extract-cohort");
Map<Long, String> sampleIdToName = sampleList.getMap();

VCFHeader header = CommonCode.generateVcfHeader(new HashSet<>(sampleIdToName.values()), reference.getSequenceDictionary(), extraHeaderLines);
VCFHeader header = generateVcfHeader(new HashSet<>(sampleIdToName.values()), reference.getSequenceDictionary(), extraHeaderLines);

final List<SimpleInterval> traversalIntervals = getTraversalIntervals();

Expand Down

0 comments on commit a9206a2

Please sign in to comment.