Skip to content

Commit

Permalink
numerical consistency between python and java, transpose fix
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidtronix committed Apr 24, 2018
1 parent e530c2e commit 9945a5c
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ public final class ReadGroupBlackListReadFilter extends ReadFilter implements Se
// Command line parser requires a no-arg constructor
public ReadGroupBlackListReadFilter() {};

/**
* Creates a filter using the lists of files with blacklisted read groups.
* Any entry can be a path to a file (ending with "list" or "txt" which
* will load blacklist from that file. This scheme works recursively
* (ie the file may contain names of further files etc).
*/
/**
* Creates a filter using the lists of files with blacklisted read groups.
* Any entry can be a path to a file (ending with "list" or "txt" which
* will load blacklist from that file. This scheme works recursively
* (ie the file may contain names of further files etc).
*/
public ReadGroupBlackListReadFilter(final List<String> blackLists, final SAMFileHeader header) {
super.setHeader(header);
this.blackList.addAll(blackLists);
final Map<String, Collection<String>> filters = new TreeMap<>();
for (String blackList : blackLists) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.engine.filters.VariantFilter;
import org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary;
import org.broadinstitute.hellbender.engine.filters.*;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter;
import org.broadinstitute.hellbender.utils.io.Resource;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.python.StreamingPythonScriptExecutor;
Expand Down Expand Up @@ -170,7 +170,7 @@ public class CNNScoreVariants extends VariantWalker {

private int curBatchSize = 0;
private int windowEnd = windowSize / 2;
private int windowStart = (windowSize / 2) - 1;
private int windowStart = windowSize / 2;
private boolean waitforBatchCompletion = false;
private File scoreFile;

Expand All @@ -190,7 +190,6 @@ protected String[] customCommandLineValidation() {
return new String[]{"No default architecture for tensor type:" + tensorType.name()};
}
}

return null;
}

Expand All @@ -208,6 +207,17 @@ protected VariantFilter makeVariantFilter(){
}
}

@Override
public List<ReadFilter> getDefaultReadFilters() {
List<ReadFilter> readFilters = new ArrayList<>();
readFilters.addAll(super.getDefaultReadFilters());
List<String> filterList = new ArrayList<>();
filterList.add("ID:" + HaplotypeBAMWriter.DEFAULT_HAPLOTYPE_READ_GROUP_ID);
filterList.add("ID:" + HaplotypeBAMWriter.DEFAULT_GATK3_HAPLOTYPE_READ_GROUP_ID);
readFilters.add(new ReadGroupBlackListReadFilter(filterList, null));
return readFilters;
}

@Override
public void onTraversalStart() {
scoreKey = getScoreKeyAndCheckModelAndReadsHarmony();
Expand Down Expand Up @@ -319,7 +329,7 @@ private void transferReadsToPythonViaFifo(final VariantContext variant, final Re
}
Iterator<GATKRead> readIt = readsContext.iterator();
if (!readIt.hasNext()) {
logger.warn("No reads at contig:" + variant.getContig() + "site:" + String.valueOf(variant.getStart()));
logger.warn("No reads at contig:" + variant.getContig() + " site:" + String.valueOf(variant.getStart()));
}
while (readIt.hasNext()) {
sb.append(GATKReadToString(readIt.next()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ public abstract class HaplotypeBAMWriter implements AutoCloseable {
*/
private long uniqueNameCounter = 1;

private static final String DEFAULT_HAPLOTYPE_READ_GROUP_ID = "ArtificialHaplotypeRG";
public static final String DEFAULT_HAPLOTYPE_READ_GROUP_ID = "ArtificialHaplotypeRG";
// For read filters that need backwards compatibility with the GATK3 artificial haplotype
public static final String DEFAULT_GATK3_HAPLOTYPE_READ_GROUP_ID = "ArtificialHaplotype";
private static final int bestHaplotypeMQ = 60;
private static final int otherMQ = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def score_and_write_batch(args, model, file_out, fifo, batch_size, python_batch_
variant_data = []
read_batch = []

for i in range(batch_size):
for _ in range(batch_size):
fifo_line = fifo.readline()
fifo_data = fifo_line.split(defines.SEPARATOR_CHAR)

variant_data.append(fifo_data[0] + '\t' + fifo_data[1] + '\t' + fifo_data[2] + '\t' + fifo_data[3])
reference_batch.append(reference_string_to_tensor(fifo_data[4]))
annotation_batch.append(annotation_string_to_tensor(args, fifo_data[5]))
variant_types.append(fifo_data[6])
variant_types.append(fifo_data[6].strip())

fidx = 7 # 7 Because above we parsed: contig pos ref alt reference_string annotation variant_type
if args.tensor_name in defines.TENSOR_MAPS_2D and len(fifo_data) > fidx:
Expand All @@ -69,7 +69,7 @@ def score_and_write_batch(args, model, file_out, fifo, batch_size, python_batch_
_, ref_start, _ = get_variant_window(args, var)
insert_dict = get_inserts(args, read_tuples, var)
tensor = read_tuples_to_read_tensor(args, read_tuples, ref_start, insert_dict)
reference_sequence_into_tensor(args, fifo_data[4], tensor)
reference_sequence_into_tensor(args, fifo_data[4], tensor, insert_dict)
if os.path.exists(tensor_dir):
_write_tensor_to_hd5(args, tensor, annotation_batch[-1], fifo_data[0], fifo_data[1], fifo_data[6])
read_batch.append(tensor)
Expand Down Expand Up @@ -201,8 +201,8 @@ def cigar_string_to_tuples(cigar):

def get_variant_window(args, variant):
index_offset = (args.window_size//2)
reference_start = variant.pos-(index_offset+1)
reference_end = variant.pos+index_offset
reference_start = variant.pos-index_offset
reference_end = variant.pos + index_offset + (args.window_size%2)
return index_offset, reference_start, reference_end


Expand Down Expand Up @@ -245,6 +245,7 @@ def read_tuples_to_read_tensor(args, read_tuples, ref_start, insert_dict):

if i == args.window_size:
break

if b == defines.SKIP_CHAR:
continue
elif flag_start == -1:
Expand Down Expand Up @@ -290,8 +291,6 @@ def read_tuples_to_read_tensor(args, read_tuples, ref_start, insert_dict):
else:
tensor[channel_map['mapping_quality'], j, flag_start:flag_end] = mq



return tensor


Expand Down Expand Up @@ -335,9 +334,15 @@ def sequence_and_qualities_from_read(args, read, ref_start, insert_dict):
return rseq, rqual


def reference_sequence_into_tensor(args, reference_seq, tensor):
def reference_sequence_into_tensor(args, reference_seq, tensor, insert_dict):
ref_offset = len(set(args.input_symbols.values()))

for i in sorted(insert_dict.keys(), key=int, reverse=True):
if i < 0:
reference_seq = defines.INDEL_CHAR*insert_dict[i] + reference_seq
else:
reference_seq = reference_seq[:i] + defines.INDEL_CHAR*insert_dict[i] + reference_seq[i:]

for i,b in enumerate(reference_seq):
if i == args.window_size:
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -607,11 +607,11 @@ def reference_sequence_into_tensor(args, reference_seq, tensor):
else:
tensor[ref_offset+args.input_symbols[b], :, i] = 1.0
elif b in vqsr_cnn.AMBIGUITY_CODES:
ambiguous_vector = np.transpose(np.tile(vqsr_cnn.AMBIGUITY_CODES[b], (args.read_limit, 1)))
ambiguous_vector = np.tile(vqsr_cnn.AMBIGUITY_CODES[b], (args.read_limit, 1))
if K.image_data_format() == 'channels_last':
tensor[:, i, ref_offset:ref_offset+4] = ambiguous_vector
else:
tensor[ref_offset:ref_offset+4, :, i] = ambiguous_vector
tensor[ref_offset:ref_offset+4, :, i] = np.transpose(ambiguous_vector)


def flag_to_array(flag):
Expand Down
Git LFS file not shown
Git LFS file not shown

0 comments on commit 9945a5c

Please sign in to comment.