Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert FastqToSam and RevertSam to use PicardHtsPath #1794

Merged
merged 18 commits into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/main/java/picard/cmdline/CommandLineProgram.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import htsjdk.samtools.util.BlockGunzipper;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.zip.DeflaterFactory;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import org.broadinstitute.barclay.argparser.Argument;
Expand All @@ -51,7 +50,6 @@
import org.broadinstitute.barclay.argparser.CommandLineParserOptions;
import org.broadinstitute.barclay.argparser.LegacyCommandLineArgumentParser;
import org.broadinstitute.barclay.argparser.SpecialArgumentsCollection;
import picard.PicardException;
import picard.cmdline.argumentcollections.OptionalReferenceArgumentCollection;
import picard.cmdline.argumentcollections.ReferenceArgumentCollection;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
Expand Down
48 changes: 29 additions & 19 deletions src/main/java/picard/sam/FastqToSam.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,15 @@
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
Expand Down Expand Up @@ -150,10 +156,10 @@ public class FastqToSam extends CommandLineProgram {
private static final Log LOG = Log.getInstance(FastqToSam.class);

@Argument(shortName="F1", doc="Input fastq file (optionally gzipped) for single end data, or first read in paired end data.")
public File FASTQ;
public PicardHtsPath FASTQ;

@Argument(shortName="F2", doc="Input fastq file (optionally gzipped) for the second read of paired end data.", optional=true)
public File FASTQ2;
public PicardHtsPath FASTQ2;

@Argument(doc="Use sequential fastq files with the suffix <prefix>_###.fastq or <prefix>_###.fastq.gz." +
"The files should be named:\n" +
Expand Down Expand Up @@ -274,53 +280,53 @@ public static FastqQualityFormat determineQualityFormat(final FastqReader reader
* RUNNAME_S8_L005_R1_004.fastq
* where `baseFastq` is the first in that list.
*/
protected static List<File> getSequentialFileList(final File baseFastq) {
final List<File> files = new ArrayList<>();
protected static List<Path> getSequentialFileList(final Path baseFastq) {
final List<Path> files = new ArrayList<>();
files.add(baseFastq);

// Find the correct extension used in the base FASTQ
FastqExtensions fastqExtensions = null;
String suffix = null; // store the suffix including the extension
for (final FastqExtensions ext : FastqExtensions.values()) {
suffix = "_001" + ext.getExtension();
if (baseFastq.getAbsolutePath().endsWith(suffix)) {
if (baseFastq.toString().endsWith(suffix)) {
fastqExtensions = ext;
break;
}
}
if (null == fastqExtensions) {
throw new PicardException(String.format("Could not parse the FASTQ extension (expected '_001' + '%s'): %s", FastqExtensions.values().toString(), baseFastq));
throw new PicardException(String.format("Could not parse the FASTQ extension (expected '_001' + '%s'): %s", Arrays.toString(FastqExtensions.values()), baseFastq));
}

// Find all the files
for (int idx = 2; true; idx++) {
String fastq = baseFastq.getAbsolutePath();
String fastq = baseFastq.toAbsolutePath().toString();
markjschreiber marked this conversation as resolved.
Show resolved Hide resolved
fastq = String.format("%s_%03d%s", fastq.substring(0, fastq.length() - suffix.length()), idx, fastqExtensions.getExtension());
try {
IOUtil.assertFileIsReadable(new File(fastq));
IOUtil.assertFileIsReadable(Paths.get(fastq));
} catch (final SAMException e) { // the file is not readable, so do not continue
break;
}
files.add(new File(fastq));
files.add(Paths.get(fastq));
}

return files;
}

/* Simply invokes the right method for unpaired or paired data. */
protected int doWork() {
IOUtil.assertFileIsReadable(FASTQ);
IOUtil.assertFileIsReadable(FASTQ.toPath());
if (FASTQ2 != null) {
IOUtil.assertFileIsReadable(FASTQ2);
IOUtil.assertFileIsReadable(FASTQ2.toPath());
}
IOUtil.assertFileIsWritable(OUTPUT);

final SAMFileHeader header = createSamFileHeader();
final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(header, false, OUTPUT, REFERENCE_SEQUENCE);

// Set the quality format
QUALITY_FORMAT = FastqToSam.determineQualityFormat(fileToFastqReader(FASTQ),
(FASTQ2 == null) ? null : fileToFastqReader(FASTQ2),
QUALITY_FORMAT = FastqToSam.determineQualityFormat(fileToFastqReader(FASTQ.toPath()),
(FASTQ2 == null) ? null : fileToFastqReader(FASTQ2.toPath()),
QUALITY_FORMAT);

// Lists for sequential files, but also used when not sequential
Expand All @@ -329,11 +335,11 @@ protected int doWork() {

if (USE_SEQUENTIAL_FASTQS) {
// Get all the files
for (final File fastq : getSequentialFileList(FASTQ)) {
for (final Path fastq : getSequentialFileList(FASTQ.toPath())) {
readers1.add(fileToFastqReader(fastq));
}
if (null != FASTQ2) {
for (final File fastq : getSequentialFileList(FASTQ2)) {
for (final Path fastq : getSequentialFileList(FASTQ2.toPath())) {
readers2.add(fileToFastqReader(fastq));
}
if (readers1.size() != readers2.size()) {
Expand All @@ -342,9 +348,9 @@ protected int doWork() {
}
}
else {
readers1.add(fileToFastqReader(FASTQ));
readers1.add(fileToFastqReader(FASTQ.toPath()));
if (FASTQ2 != null) {
readers2.add(fileToFastqReader(FASTQ2));
readers2.add(fileToFastqReader(FASTQ2.toPath()));
}
}

Expand Down Expand Up @@ -428,8 +434,12 @@ protected int doPaired(final FastqReader freader1, final FastqReader freader2, f
return readCount;
}

private FastqReader fileToFastqReader(final File file) {
return new FastqReader(file, ALLOW_AND_IGNORE_EMPTY_LINES);
private FastqReader fileToFastqReader(final Path path) throws PicardException {
try {
return new FastqReader(null, Files.newBufferedReader(path), ALLOW_AND_IGNORE_EMPTY_LINES);
} catch (IOException e){
throw new PicardException("cannot create a reader for " + path, e);
}
}

private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
Expand Down
19 changes: 13 additions & 6 deletions src/main/java/picard/sam/RevertSam.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import picard.nio.PicardHtsPath;
import picard.util.TabbedTextFileWithHeaderParser;

import java.io.File;
Expand Down Expand Up @@ -144,7 +145,7 @@ public class RevertSam extends CommandLineProgram {
"(e.g. invalid alignment information will be obviated when the REMOVE_ALIGNMENT_INFORMATION option is used).\n" +
"";
@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM/BAM/CRAM file to revert the state of.")
public File INPUT;
public PicardHtsPath INPUT;

@Argument(mutex = {"OUTPUT_MAP"}, shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output SAM/BAM/CRAM file to create, or an output directory if OUTPUT_BY_READGROUP is true.")
public File OUTPUT;
Expand Down Expand Up @@ -249,11 +250,11 @@ protected String[] customCommandLineValidation() {
}

protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(INPUT.toPath());
ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP);

final boolean sanitizing = SANITIZE;
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT.toPath());
final SAMFileHeader inHeader = in.getFileHeader();
ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);

Expand Down Expand Up @@ -326,8 +327,14 @@ protected int doWork() {
out.close();
} else {
final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat;
final Path referenceSequencePath;
try {
readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES);
if (REFERENCE_SEQUENCE != null) {
referenceSequencePath = REFERENCE_SEQUENCE.toPath();
} else {
referenceSequencePath = null;
}
readGroupToFormat = createReadGroupFormatMap(inHeader, referenceSequencePath, VALIDATION_STRINGENCY, INPUT.toPath(), RESTORE_ORIGINAL_QUALITIES);
} catch (final PicardException e) {
log.error(e.getMessage());
return -1;
Expand Down Expand Up @@ -613,9 +620,9 @@ private SAMFileHeader createOutHeader(

private Map<SAMReadGroupRecord, FastqQualityFormat> createReadGroupFormatMap(
final SAMFileHeader inHeader,
final File referenceSequence,
final Path referenceSequence,
final ValidationStringency validationStringency,
final File input,
final Path input,
final boolean restoreOriginalQualities) {

final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<>();
Expand Down
8 changes: 5 additions & 3 deletions src/test/java/picard/sam/AbstractAlignmentMergerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.cmdline.argumentcollections.RequiredReferenceArgumentCollection;
import picard.nio.PicardHtsPath;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
Expand Down Expand Up @@ -675,9 +677,9 @@ public void testUnmapBacterialContamination() throws IOException {
// builder.addFrag("frag3",1,500,false,false,"20S20M60S",null, 45);
// builder.addFrag("frag4",1,500,true,false,"20S20M60S",null, 45);

final File file = newTempSamFile("aligned");
final PicardHtsPath file = PicardHtsPath.fromPath(newTempSamFile("aligned").toPath());

try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file, null)) {
try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file.toPath(), (Path) null)) {
builder.getRecords().forEach(writer::addAlignment);
}

Expand All @@ -697,7 +699,7 @@ public void testUnmapBacterialContamination() throws IOException {

MergeBamAlignment mergeBamAlignment = new MergeBamAlignment();

mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file);
mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file.toPath().toFile()); // TODO update to use Path when MergeBamAlignment is updated to use Path
mergeBamAlignment.UNMAPPED_BAM = fileUnaligned;
mergeBamAlignment.UNMAP_CONTAMINANT_READS = true;

Expand Down
9 changes: 5 additions & 4 deletions src/test/java/picard/sam/FastqToSamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -185,7 +186,7 @@ private File convertFile(final String fastqFilename1,
final File fastq2 = (fastqFilename2 != null) ? new File(TEST_DATA_DIR, fastqFilename2) : null;
final File samFile = newTempSamFile(fastq1.getName());

final List<String> args =new ArrayList<String>();
final List<String> args = new ArrayList<>();

args.add("FASTQ=" + fastq1.getAbsolutePath());
args.add("OUTPUT=" + samFile.getAbsolutePath());
Expand Down Expand Up @@ -296,9 +297,9 @@ public void testSequentialFiles() throws IOException {
final String pairedEnd1 = "sequential-files/paired_end_R1_001.fastq";
final String pairedEnd2 = "sequential-files/paired_end_R2_001.fastq";

Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + singleEnd)).size(), 2);
Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + pairedEnd1)).size(), 2);
Assert.assertEquals(FastqToSam.getSequentialFileList(new File(TEST_DATA_DIR, "/" + pairedEnd2)).size(), 2);
Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), singleEnd)).size(), 2);
Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), pairedEnd1)).size(), 2);
Assert.assertEquals(FastqToSam.getSequentialFileList(Paths.get(TEST_DATA_DIR.getPath(), pairedEnd2)).size(), 2);

convertFileAndVerifyRecordCount(1, singleEnd, null, FastqQualityFormat.Illumina, true, false);
convertFileAndVerifyRecordCount(2, singleEnd, null, FastqQualityFormat.Illumina, true, true);
Expand Down