From 174cb31741759e90369f32880c1017bef73fc264 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 29 May 2019 17:21:09 -0400 Subject: [PATCH 1/6] added an option to opt out of the protections to sorting multiple bam inputs --- .../markduplicates/MarkDuplicatesSpark.java | 18 ++++++++++++++---- .../MarkDuplicatesSparkIntegrationTest.java | 11 +++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index 93936fd95dd..053860aafd6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -6,6 +6,7 @@ import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -140,6 +141,11 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { fullName = StandardArgumentDefinitions.METRICS_FILE_LONG_NAME) protected String metricsFile; + @Advanced + @Argument(doc = "Override to allow non-queryname sorted inputs for multiple input bams.", optional=true, + fullName = "allow-multiple-sort-orders-in-input") + protected boolean allowMultipleSortOrders = false; + @ArgumentCollection protected MarkDuplicatesSparkArgumentCollection markDuplicatesSparkArgumentCollection = new MarkDuplicatesSparkArgumentCollection(); @@ -298,10 +304,14 @@ protected void runTool(final JavaSparkContext ctx) { // Check if we are using multiple inputs that the headers are all in the correct querygrouped ordering, if so set the aggregate header to reflect this Map headerMap = getReadSourceHeaderMap(); if (headerMap.size() > 1) { - headerMap.entrySet().stream().forEach(h -> {if(!ReadUtils.isReadNameGroupedBam(h.getValue())) { - throw new UserException("Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted or querygroup-sorted for multi-input processing but input "+h.getKey()+" was sorted in "+h.getValue().getSortOrder()+" order"); - }}); - mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); + if (!allowMultipleSortOrders) { + headerMap.entrySet().stream().forEach(h -> { + if (!ReadUtils.isReadNameGroupedBam(h.getValue())) { + throw new UserException("Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted or querygroup-sorted for multi-input processing but input " + h.getKey() + " was sorted in " + h.getValue().getSortOrder() + " order. Try running with '--allow-multiple-sort-orders-in-input' to run by sorting all the input."); + } + }); + mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); + } } JavaRDD reads = getReads(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index a2e12cc37ff..30bc33bf235 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -482,4 +482,15 @@ public void testAssertCorrectSortOrderMultipleBams() { args.addInput(new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam")); runCommandLine(args); } + + @Test + public void testAssertCorrectSortOrderMultipleBamsOverriding() { + final File output = createTempFile("supplementaryReadUnmappedMate", "bam"); + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addOutput(output); + args.addInput(new File(TEST_DATA_DIR,"optical_dupes.bam")); + args.addInput(new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam")); + args.addArgument("allow-multiple-sort-orders-in-input"); + runCommandLine(args); + } } From 61e6e48c58f2fd66e03978461ed24d3fb833613c Mon Sep 17 00:00:00 2001 From: James Date: Thu, 30 May 2019 12:01:37 -0400 Subject: [PATCH 2/6] responded to comments and added a second option --- .../markduplicates/MarkDuplicatesSpark.java | 13 ++++++++++--- .../MarkDuplicatesSparkIntegrationTest.java | 13 ++++++++++++- .../optical_dupes.unsorted.querygrouped.sam | 16 ++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.unsorted.querygrouped.sam diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index 053860aafd6..9d581ba7ba9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -128,6 +128,8 @@ programGroup = ReadDataManipulationProgramGroup.class) public final class MarkDuplicatesSpark extends GATKSparkTool { private static final long serialVersionUID = 1L; + public static final String ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG = "allow-multiple-sort-orders-in-input"; + public static final String TREAT_UNSORTED_AS_OREDED = "treat-unsorted-as-querygroup-ordered-for-multiple-inputs"; @Override public boolean requiresReads() { return true; } @@ -143,9 +145,14 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { @Advanced @Argument(doc = "Override to allow non-queryname sorted inputs for multiple input bams.", optional=true, - fullName = "allow-multiple-sort-orders-in-input") + fullName = ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG) protected boolean allowMultipleSortOrders = false; + @Advanced + @Argument(doc = "Treat unsorted files as query-group orderd files. NOTE: this may result in mark duplicates crashing if the file is unordered", optional=true, + fullName = TREAT_UNSORTED_AS_OREDED) + protected boolean treatUnsortedAsOrdered = false; + @ArgumentCollection protected MarkDuplicatesSparkArgumentCollection markDuplicatesSparkArgumentCollection = new MarkDuplicatesSparkArgumentCollection(); @@ -306,8 +313,8 @@ protected void runTool(final JavaSparkContext ctx) { if (headerMap.size() > 1) { if (!allowMultipleSortOrders) { headerMap.entrySet().stream().forEach(h -> { - if (!ReadUtils.isReadNameGroupedBam(h.getValue())) { - throw new UserException("Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted or querygroup-sorted for multi-input processing but input " + h.getKey() + " was sorted in " + h.getValue().getSortOrder() + " order. Try running with '--allow-multiple-sort-orders-in-input' to run by sorting all the input."); + if (!ReadUtils.isReadNameGroupedBam(h.getValue()) && (!treatUnsortedAsOrdered && h.getValue().getSortOrder().equals(SAMFileHeader.SortOrder.unsorted))) { + throw new UserException("Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted or querygroup-sorted for multi-input processing but input " + h.getKey() + " was sorted in " + h.getValue().getSortOrder() + " order. Try running with '"+ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG+"' to run by sorting all the input."); } }); mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index 30bc33bf235..14228a0f7b4 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -490,7 +490,18 @@ public void testAssertCorrectSortOrderMultipleBamsOverriding() { args.addOutput(output); args.addInput(new File(TEST_DATA_DIR,"optical_dupes.bam")); args.addInput(new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam")); - args.addArgument("allow-multiple-sort-orders-in-input"); + args.addArgument(MarkDuplicatesSpark.ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG); + runCommandLine(args); + } + + @Test + public void testAssertAssumeUnsortedFilesAreQueryGroupedFiles() { + final File output = createTempFile("supplementaryReadUnmappedMate", "bam"); + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addOutput(output); + args.addInput(new File(TEST_DATA_DIR,"optical_dupes.queryname.bam")); + args.addInput(new File(TEST_DATA_DIR,"optical_dupes.unsorted.querygrouped.sam")); + args.addArgument(MarkDuplicatesSpark.TREAT_UNSORTED_AS_OREDED); runCommandLine(args); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.unsorted.querygrouped.sam b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.unsorted.querygrouped.sam new file mode 100644 index 00000000000..fc15f4cdd12 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.unsorted.querygrouped.sam @@ -0,0 +1,16 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:chr1 LN:101 +@SQ SN:chr2 LN:101 +@SQ SN:chr3 LN:101 +@SQ SN:chr4 LN:101 +@SQ SN:chr5 LN:101 +@SQ SN:chr6 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:1AAXX.1 SM:Hi,Mom! LB:mylib PL:ILLUMINA +@PG ID:MarkDuplicates PN:MarkDuplicates VN:1 CL:MarkDuplicates merge1.sam PP:bwa +@PG ID:bwa PN:bwa VN:1 CL:bwa aln +C4N4WACXX140821:8:1112:2344:1984 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& PG:Z:MarkDuplicates RG:Z:1AAXX.1 +C4N4WACXX140821:8:1112:2344:1984 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 PG:Z:MarkDuplicates RG:Z:1AAXX.1 +C4N4WACXX140821:8:1112:2344:1985 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& PG:Z:MarkDuplicates RG:Z:1AAXX.1 +C4N4WACXX140821:8:1112:2344:1985 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 PG:Z:MarkDuplicates RG:Z:1AAXX.1 From da6fff1229a03b16cd45514663c98814ba34b564 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 30 May 2019 14:12:34 -0400 Subject: [PATCH 3/6] adding louis's changes to the order handling --- .../markduplicates/MarkDuplicatesSpark.java | 39 +++++++++++++++---- .../MarkDuplicatesSparkIntegrationTest.java | 2 +- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index 9d581ba7ba9..033ebbbb777 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -129,7 +129,7 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { private static final long serialVersionUID = 1L; public static final String ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG = "allow-multiple-sort-orders-in-input"; - public static final String TREAT_UNSORTED_AS_OREDED = "treat-unsorted-as-querygroup-ordered-for-multiple-inputs"; + public static final String TREAT_UNSORTED_AS_ORDERED = "treat-unsorted-as-querygroup-ordered-for-multiple-inputs"; @Override public boolean requiresReads() { return true; } @@ -150,7 +150,7 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { @Advanced @Argument(doc = "Treat unsorted files as query-group orderd files. NOTE: this may result in mark duplicates crashing if the file is unordered", optional=true, - fullName = TREAT_UNSORTED_AS_OREDED) + fullName = TREAT_UNSORTED_AS_ORDERED) protected boolean treatUnsortedAsOrdered = false; @ArgumentCollection @@ -311,12 +311,23 @@ protected void runTool(final JavaSparkContext ctx) { // Check if we are using multiple inputs that the headers are all in the correct querygrouped ordering, if so set the aggregate header to reflect this Map headerMap = getReadSourceHeaderMap(); if (headerMap.size() > 1) { - if (!allowMultipleSortOrders) { - headerMap.entrySet().stream().forEach(h -> { - if (!ReadUtils.isReadNameGroupedBam(h.getValue()) && (!treatUnsortedAsOrdered && h.getValue().getSortOrder().equals(SAMFileHeader.SortOrder.unsorted))) { - throw new UserException("Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted or querygroup-sorted for multi-input processing but input " + h.getKey() + " was sorted in " + h.getValue().getSortOrder() + " order. Try running with '"+ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG+"' to run by sorting all the input."); - } - }); + final Optional> badlySorted = headerMap.entrySet() + .stream() + .filter(h -> treatAsReadGroupOrdered(h.getValue(), treatUnsortedAsOrdered)) + .findFirst(); + + if(badlySorted.isPresent()) { + if (allowMultipleSortOrders) { + //don't set an ordering, the files will all be sorted downstream + logger.info("Input files are not all grouped by read name so they will be sorted."); + } else { + final Map.Entry badPair = badlySorted.get(); + throw new UserException( + "Multiple inputs to MarkDuplicatesSpark detected. MarkDuplicatesSpark requires all inputs to be queryname sorted " + + "or querygroup-sorted for multi-input processing but input " + badPair.getKey() + " was sorted in " + badPair + .getValue().getSortOrder() + " order"); + } + } else { mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); } } @@ -349,4 +360,16 @@ protected void runTool(final JavaSparkContext ctx) { writeReads(ctx, output, readsForWriting, mergedHeader, true); } + // helper method to determin if an input header is to be treated as a query group sorted file. + private boolean treatAsReadGroupOrdered(SAMFileHeader header, boolean treatUnsortedAsReadGrouped) { + final SAMFileHeader.SortOrder sortOrder = header.getSortOrder(); + if( ReadUtils.isReadNameGroupedBam(header) ){ + return true; + } else if ( treatUnsortedAsReadGrouped && (sortOrder.equals(SAMFileHeader.SortOrder.unknown) || sortOrder.equals(SAMFileHeader.SortOrder.unsorted))) { + logger.warn("Input bam was marked as " + sortOrder.toString() + " but " + TREAT_UNSORTED_AS_ORDERED + " is specified so it's being treated as read name grouped"); + return true; + } + return false; + } + } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index 14228a0f7b4..30d4ff1fca2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -501,7 +501,7 @@ public void testAssertAssumeUnsortedFilesAreQueryGroupedFiles() { args.addOutput(output); args.addInput(new File(TEST_DATA_DIR,"optical_dupes.queryname.bam")); args.addInput(new File(TEST_DATA_DIR,"optical_dupes.unsorted.querygrouped.sam")); - args.addArgument(MarkDuplicatesSpark.TREAT_UNSORTED_AS_OREDED); + args.addArgument(MarkDuplicatesSpark.TREAT_UNSORTED_AS_ORDERED); runCommandLine(args); } } From ff5cfa1a46e97e3e93d23b19a0b7d4abc93e5e5e Mon Sep 17 00:00:00 2001 From: James Date: Thu, 30 May 2019 14:13:38 -0400 Subject: [PATCH 4/6] typo --- .../spark/transforms/markduplicates/MarkDuplicatesSpark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index 033ebbbb777..a911a2b50fd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -319,7 +319,7 @@ protected void runTool(final JavaSparkContext ctx) { if(badlySorted.isPresent()) { if (allowMultipleSortOrders) { //don't set an ordering, the files will all be sorted downstream - logger.info("Input files are not all grouped by read name so they will be sorted."); + logger.info("Input files are not all grouped by read name so they will be sorted together."); } else { final Map.Entry badPair = badlySorted.get(); throw new UserException( From 79b63378f91ed50d30fdbf4ee68ac2ccc0da1dfa Mon Sep 17 00:00:00 2001 From: James Date: Thu, 30 May 2019 14:35:50 -0400 Subject: [PATCH 5/6] expanding the unsorted file treatment to apply to the single file input case --- .../markduplicates/MarkDuplicatesSpark.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index a911a2b50fd..aa1b61ab101 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -129,7 +129,7 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { private static final long serialVersionUID = 1L; public static final String ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG = "allow-multiple-sort-orders-in-input"; - public static final String TREAT_UNSORTED_AS_ORDERED = "treat-unsorted-as-querygroup-ordered-for-multiple-inputs"; + public static final String TREAT_UNSORTED_AS_ORDERED = "treat-unsorted-as-querygroup-ordered"; @Override public boolean requiresReads() { return true; } @@ -144,12 +144,12 @@ public final class MarkDuplicatesSpark extends GATKSparkTool { protected String metricsFile; @Advanced - @Argument(doc = "Override to allow non-queryname sorted inputs for multiple input bams.", optional=true, + @Argument(doc = "Allow non-queryname sorted inputs when specifying multiple input bams.", optional=true, fullName = ALLOW_MULTIPLE_SORT_ORDERS_IN_INPUT_ARG) protected boolean allowMultipleSortOrders = false; @Advanced - @Argument(doc = "Treat unsorted files as query-group orderd files. NOTE: this may result in mark duplicates crashing if the file is unordered", optional=true, + @Argument(doc = "Treat unsorted files as query-group orderd files. WARNING: This option disables a basic safety check and may result in unexpected behavior if the file is truly unordered", optional=true, fullName = TREAT_UNSORTED_AS_ORDERED) protected boolean treatUnsortedAsOrdered = false; @@ -330,6 +330,13 @@ protected void runTool(final JavaSparkContext ctx) { } else { mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); } + + // If there is only one file and we are in treatUnsortedAsOrdered mode than set its group order accordingly. + } else { + if (treatUnsortedAsOrdered && (mergedHeader.getSortOrder().equals(SAMFileHeader.SortOrder.unknown) || mergedHeader.getSortOrder().equals(SAMFileHeader.SortOrder.unsorted))) { + logger.warn("Input bam was marked as " + mergedHeader.getSortOrder().toString() + " but " + TREAT_UNSORTED_AS_ORDERED + " is specified so it's being treated as read name grouped"); + mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); + } } JavaRDD reads = getReads(); From 0de40f965ec9faf0ba6936813f6836124391fc65 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 30 May 2019 14:47:37 -0400 Subject: [PATCH 6/6] Flipping all of the confusing boolean states that were causing tests to fail --- .../transforms/markduplicates/MarkDuplicatesSpark.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index aa1b61ab101..1696b05f813 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -313,7 +313,7 @@ protected void runTool(final JavaSparkContext ctx) { if (headerMap.size() > 1) { final Optional> badlySorted = headerMap.entrySet() .stream() - .filter(h -> treatAsReadGroupOrdered(h.getValue(), treatUnsortedAsOrdered)) + .filter(h -> !treatAsReadGroupOrdered(h.getValue(), treatUnsortedAsOrdered)) .findFirst(); if(badlySorted.isPresent()) { @@ -328,7 +328,10 @@ protected void runTool(final JavaSparkContext ctx) { .getValue().getSortOrder() + " order"); } } else { - mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); + // The default sort order for merged input files is unsorted, so this will be fed to the tool to be sorted + if (!allowMultipleSortOrders) { + mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.query); + } } // If there is only one file and we are in treatUnsortedAsOrdered mode than set its group order accordingly.