Skip to content

Commit

Permalink
Set String and File IO types to be UTF_8
Browse files Browse the repository at this point in the history
Define UTF_8 to be used for all GKL string and file realted things
Unit tests shows no negative side affects of these changes.

Signed-off-by: Keith Mannthey <[email protected]>
  • Loading branch information
Kmannth committed Jan 31, 2022
1 parent 45ff1bb commit 0988de8
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWNativeAlignerResult;

import java.io.File;
import java.nio.charset.StandardCharsets;

/**
* Provides a native SmithWaterman implementation accelerated for the Intel Architecture.
Expand Down Expand Up @@ -157,7 +158,7 @@ public SWNativeAlignerResult align(byte[] refArray, byte[] altArray, SWParameter
throw new IllegalArgumentException("Ran into invalid argument issue");
}

return new SWNativeAlignerResult(new String(cigar).trim(), offset);
return new SWNativeAlignerResult(new String(cigar,StandardCharsets.UTF_8).trim(), offset);
}

public byte getStrategy(SWOverhangStrategy strategy)
Expand Down
9 changes: 6 additions & 3 deletions src/test/java/com/intel/gkl/compression/DeflaterProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
import java.util.Iterator;
import java.util.List;
import java.util.zip.Deflater;

import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
/**
* Integration and performance/compression profiling test for IntelDeflater
*/
Expand Down Expand Up @@ -52,7 +53,9 @@ public Deflater makeDeflater(final int compressionLevel, final boolean nowrap) {
deflaterFactories.add(javaDeflaterFactory);

// create profile log file
final FileWriter fileWriter = new FileWriter(profileFile);
final FileOutputStream fileStream = new FileOutputStream(profileFile);
final OutputStreamWriter fileWriter = new OutputStreamWriter(fileStream, "UTF-8");
// final FileWriter fileWriter = new FileWriter(profileFile);
try {
fileWriter.write("level, time(sec), filesize\n");
} catch (IOException e) {System.err.println("Caught IOException: " + e.getMessage());}
Expand Down Expand Up @@ -84,7 +87,7 @@ public Deflater makeDeflater(final int compressionLevel, final boolean nowrap) {
}
}
try {
fileWriter.write(String.format("%d, %.3f, %d\n",
fileWriter.write(String.format("%d, %.3f, %d%n",
compressionLevel, (totalTime/1000.0/loopCount), outputFile.length()));
fileWriter.flush();
} catch (IOException e) {System.err.println("Caught IOException: " + e.getMessage());}
Expand Down
5 changes: 3 additions & 2 deletions src/test/java/com/intel/gkl/compression/InflaterUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import java.io.File;
import java.io.IOException;

import java.util.Arrays;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
Expand Down Expand Up @@ -129,7 +130,7 @@ public void inflateOutputBufferOverflowShortTest() throws DataFormatException, j
int resultLength = inflater.inflate(result, 0 , result.length);
inflater.end();

String seq2 = new String(result, 0, resultLength);
String seq2 = new String(result, 0, resultLength, "UTF8");
log.info(String.format("UnCompressed length : %d Seq : %s" , seq2.length() , seq2));
Assert.assertEquals(sequence, seq2);
}
Expand Down Expand Up @@ -186,7 +187,7 @@ public void inflateNowrapFalseJavaTest() throws DataFormatException, java.io.Uns
int resultLength = inflater.inflate(result, 0 , 1024);
inflater.end();

String seq2 = new String(result, 0, resultLength);
String seq2 = new String(result, 0, resultLength, "UTF8");
log.info(String.format("UnCompressed length : %d Seq : %s" , seq2.length() ,
seq2));
Assert.assertEquals(sequence, seq2);
Expand Down
76 changes: 43 additions & 33 deletions src/test/java/com/intel/gkl/pairhmm/PairHmmUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Scanner;
import java.nio.file.Files;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;

public class PairHmmUnitTest {
static final String pairHMMTestData = IntelGKLUtils.pathToTestResource("pairhmm-testdata.txt");
Expand Down Expand Up @@ -68,13 +72,13 @@ public void simpleTest() {

// read data from file
haplotypeDataArray[0] = new HaplotypeDataHolder();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0] = new ReadDataHolder();
readDataArray[0].readBases = "ACGT".getBytes();
readDataArray[0].readQuals = "++++".getBytes();
readDataArray[0].insertionGOP = "++++".getBytes();
readDataArray[0].deletionGOP = "++++".getBytes();
readDataArray[0].overallGCP = "++++".getBytes();
readDataArray[0].readBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0].readQuals = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].insertionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].deletionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].overallGCP = "++++".getBytes(StandardCharsets.UTF_8);
double expectedResult = -6.022797e-01;

// call pairHMM
Expand Down Expand Up @@ -116,13 +120,13 @@ public void omp_Test() {

// read data from file
haplotypeDataArray[0] = new HaplotypeDataHolder();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0] = new ReadDataHolder();
readDataArray[0].readBases = "ACGT".getBytes();
readDataArray[0].readQuals = "++++".getBytes();
readDataArray[0].insertionGOP = "++++".getBytes();
readDataArray[0].deletionGOP = "++++".getBytes();
readDataArray[0].overallGCP = "++++".getBytes();
readDataArray[0].readBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0].readQuals = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].insertionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].deletionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].overallGCP = "++++".getBytes(StandardCharsets.UTF_8);
double expectedResult = -6.022797e-01;

// call pairHMM
Expand All @@ -147,13 +151,13 @@ public void omp_Test() {

// read data from file
haplotypeDataArray[0] = new HaplotypeDataHolder();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes();
haplotypeDataArray[0].haplotypeBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0] = new ReadDataHolder();
readDataArray[0].readBases = "ACGT".getBytes();
readDataArray[0].readQuals = "++++".getBytes();
readDataArray[0].insertionGOP = "++++".getBytes();
readDataArray[0].deletionGOP = "++++".getBytes();
readDataArray[0].overallGCP = "++++".getBytes();
readDataArray[0].readBases = "ACGT".getBytes(StandardCharsets.UTF_8);
readDataArray[0].readQuals = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].insertionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].deletionGOP = "++++".getBytes(StandardCharsets.UTF_8);
readDataArray[0].overallGCP = "++++".getBytes(StandardCharsets.UTF_8);
double expectedResult = -6.022797e-01;

// call pairHMM
Expand Down Expand Up @@ -189,8 +193,11 @@ public void dataFileTest() {

// read test data from file
Scanner s = null;
BufferedReader r = null;
try {
s = new Scanner(new BufferedReader(new FileReader(pairHMMTestData)));
Path Data = Paths.get(pairHMMTestData);
r = new BufferedReader(Files.newBufferedReader(Data, StandardCharsets.UTF_8));
s = new Scanner(r);

while (s.hasNext()) {
// skip comment lines
Expand All @@ -199,12 +206,12 @@ public void dataFileTest() {
continue;
}

haplotypeDataArray[0].haplotypeBases = s.next().getBytes();
readDataArray[0].readBases = s.next().getBytes();
readDataArray[0].readQuals = normalize(s.next().getBytes(), 6);
readDataArray[0].insertionGOP = normalize(s.next().getBytes());
readDataArray[0].deletionGOP = normalize(s.next().getBytes());
readDataArray[0].overallGCP = normalize(s.next().getBytes());
haplotypeDataArray[0].haplotypeBases = s.next().getBytes(StandardCharsets.UTF_8);
readDataArray[0].readBases = s.next().getBytes(StandardCharsets.UTF_8);
readDataArray[0].readQuals = normalize(s.next().getBytes(StandardCharsets.UTF_8), 6);
readDataArray[0].insertionGOP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
readDataArray[0].deletionGOP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
readDataArray[0].overallGCP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
double expectedResult = s.nextDouble();

// call pairHMM
Expand Down Expand Up @@ -250,9 +257,12 @@ public void testDataFileBatchTest() {

// read test data from file
Scanner s = null;
try {
s = new Scanner(new BufferedReader(new FileReader(pairHMMTestData)));
BufferedReader r = null;

try {
Path Data = Paths.get(pairHMMTestData);
r = new BufferedReader(Files.newBufferedReader(Data, StandardCharsets.UTF_8));
s = new Scanner(r);
int batchSize = 0;
while (s.hasNext()) {
// skip comment lines
Expand All @@ -261,12 +271,12 @@ public void testDataFileBatchTest() {
continue;
}

haplotypeDataArray[batchSize].haplotypeBases = s.next().getBytes();
readDataArray[batchSize].readBases = s.next().getBytes();
readDataArray[batchSize].readQuals = normalize(s.next().getBytes(), 6);
readDataArray[batchSize].insertionGOP = normalize(s.next().getBytes());
readDataArray[batchSize].deletionGOP = normalize(s.next().getBytes());
readDataArray[batchSize].overallGCP = normalize(s.next().getBytes());
haplotypeDataArray[batchSize].haplotypeBases = s.next().getBytes(StandardCharsets.UTF_8);
readDataArray[batchSize].readBases = s.next().getBytes(StandardCharsets.UTF_8);
readDataArray[batchSize].readQuals = normalize(s.next().getBytes(StandardCharsets.UTF_8), 6);
readDataArray[batchSize].insertionGOP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
readDataArray[batchSize].deletionGOP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
readDataArray[batchSize].overallGCP = normalize(s.next().getBytes(StandardCharsets.UTF_8));
expectedResult[batchSize] = s.nextDouble();
log.info(String.format("expected[%d] = %e ", batchSize, expectedResult[batchSize]));
batchSize++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

import java.io.*;
import java.util.Arrays;
import java.nio.file.Files;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
Expand All @@ -35,10 +39,8 @@ public void inputDataTest() {
}

try {

final File inputFile = new File(smithwatermanData);
final FileReader input = new FileReader(inputFile);
final BufferedReader in = new BufferedReader(input);
final Path Data = Paths.get(smithwatermanData);
final BufferedReader in = new BufferedReader(Files.newBufferedReader(Data, StandardCharsets.UTF_8));

String refString = new String(""), altString = new String("");
SWParameters SWparameters = new SWParameters(200, -150, -260, -11);
Expand Down Expand Up @@ -213,12 +215,9 @@ public void simpleTest() {
smithWaterman.close();
throw new SkipException(err);
}

try {

final File inputFile = new File(smithwatermanData);
final FileReader input = new FileReader(inputFile);
final BufferedReader in = new BufferedReader(input);
final Path Data = Paths.get(smithwatermanData);
final BufferedReader in = new BufferedReader(Files.newBufferedReader(Data, StandardCharsets.UTF_8));

byte[] ref;
byte[] alt;
Expand Down Expand Up @@ -250,7 +249,6 @@ public void simpleTest() {

}
in.close();
input.close();
} catch (java.io.IOException e) {
e.printStackTrace();
}
Expand Down

0 comments on commit 0988de8

Please sign in to comment.