From be900f3bc81dc8f4c2667d2ad736d7f5b13cf18c Mon Sep 17 00:00:00 2001 From: Nathan Dickerson Date: Wed, 26 Sep 2018 11:19:45 -0500 Subject: [PATCH] feat(Input File): Removing the Byte Order Mark if it is present (#268) This solves the issue of having to re-save a CSV file that's already been saved and happens to have a Byte Order Mark - a hidden first character in the file. --- .../java/com/bullhorn/dataloader/data/CsvFileReader.java | 5 ++++- .../com/bullhorn/dataloader/data/CsvFileReaderTest.java | 6 ++++++ .../bullhorn/dataloader/integration/IntegrationTest.java | 3 +++ .../byteOrderMark/CandidateByteOrderMark.csv | 2 ++ src/test/resources/unitTest/CandidateByteOrderMark.csv | 2 ++ 5 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/integrationTest/byteOrderMark/CandidateByteOrderMark.csv create mode 100644 src/test/resources/unitTest/CandidateByteOrderMark.csv diff --git a/src/main/java/com/bullhorn/dataloader/data/CsvFileReader.java b/src/main/java/com/bullhorn/dataloader/data/CsvFileReader.java index 393b882f..eeb99604 100644 --- a/src/main/java/com/bullhorn/dataloader/data/CsvFileReader.java +++ b/src/main/java/com/bullhorn/dataloader/data/CsvFileReader.java @@ -5,7 +5,9 @@ import com.bullhorn.dataloader.util.PropertyFileUtil; import com.csvreader.CsvReader; import com.google.common.collect.Sets; +import org.apache.commons.io.input.BOMInputStream; +import java.io.FileInputStream; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; @@ -31,7 +33,8 @@ public class CsvFileReader extends CsvReader { * @param filePath the path to the CSV file */ public CsvFileReader(String filePath, PropertyFileUtil propertyFileUtil, PrintUtil printUtil) throws IOException { - super(filePath, ',', propertyFileUtil.getSingleByteEncoding() ? Charset.forName("ISO-8859-1") : Charset.forName("UTF-8")); + super(new BOMInputStream(new FileInputStream(filePath)), ',', + propertyFileUtil.getSingleByteEncoding() ? Charset.forName("ISO-8859-1") : Charset.forName("UTF-8")); this.propertyFileUtil = propertyFileUtil; this.printUtil = printUtil; diff --git a/src/test/java/com/bullhorn/dataloader/data/CsvFileReaderTest.java b/src/test/java/com/bullhorn/dataloader/data/CsvFileReaderTest.java index 10f6691f..1664c490 100644 --- a/src/test/java/com/bullhorn/dataloader/data/CsvFileReaderTest.java +++ b/src/test/java/com/bullhorn/dataloader/data/CsvFileReaderTest.java @@ -36,6 +36,12 @@ public void setup() throws IOException { propertyValidationUtil, printUtilMock); } + @Test + public void testByteOrderMarkRemoval() throws IOException { + CsvFileReader csvFileReader = new CsvFileReader(TestUtils.getResourceFilePath("CandidateByteOrderMark.csv"), propertyFileUtil, printUtilMock); + Assert.assertArrayEquals(new String[]{"externalID", "name", "firstName", "lastName", "email"}, csvFileReader.getHeaders()); + } + @Test public void testMappedColumns() throws IOException { CsvFileReader csvFileReader = new CsvFileReader(TestUtils.getResourceFilePath("Candidate_MappedColumns.csv"), propertyFileUtil, printUtilMock); diff --git a/src/test/java/com/bullhorn/dataloader/integration/IntegrationTest.java b/src/test/java/com/bullhorn/dataloader/integration/IntegrationTest.java index b62c895a..f0981079 100644 --- a/src/test/java/com/bullhorn/dataloader/integration/IntegrationTest.java +++ b/src/test/java/com/bullhorn/dataloader/integration/IntegrationTest.java @@ -75,6 +75,9 @@ public void testIntegration() throws IOException { // Test that column header name mapping is working properly insertUpdateDeleteFromDirectory(TestUtils.getResourceFilePath("columnMapping"), false); + // Test that the byte order mark is ignored when it's present in the input file as the first (hidden) character + insertUpdateDeleteFromDirectory(TestUtils.getResourceFilePath("byteOrderMark"), false); + // Run a test for processing empty association fields (with the setting turned on) System.setProperty("processEmptyAssociations", "true"); insertUpdateDeleteFromDirectory(TestUtils.getResourceFilePath("processEmptyFields"), false); diff --git a/src/test/resources/integrationTest/byteOrderMark/CandidateByteOrderMark.csv b/src/test/resources/integrationTest/byteOrderMark/CandidateByteOrderMark.csv new file mode 100644 index 00000000..7dbad978 --- /dev/null +++ b/src/test/resources/integrationTest/byteOrderMark/CandidateByteOrderMark.csv @@ -0,0 +1,2 @@ +externalID ,name ,firstName,lastName,email +candidateBOM-ext-1,Stéphan Soufflé,Stéphan ,Soufflé ,StephanSouffle@example.com diff --git a/src/test/resources/unitTest/CandidateByteOrderMark.csv b/src/test/resources/unitTest/CandidateByteOrderMark.csv new file mode 100644 index 00000000..7dbad978 --- /dev/null +++ b/src/test/resources/unitTest/CandidateByteOrderMark.csv @@ -0,0 +1,2 @@ +externalID ,name ,firstName,lastName,email +candidateBOM-ext-1,Stéphan Soufflé,Stéphan ,Soufflé ,StephanSouffle@example.com