From 7e7d1b3c0a3dceaed4a8413875eb1500f2a028ec Mon Sep 17 00:00:00 2001 From: Jeronimo Backes Date: Mon, 19 Apr 2021 21:49:22 +0930 Subject: [PATCH] adding support for collecting blank lines of comments --- .../parsers/common/AbstractParser.java | 4 +- .../parsers/common/CommonParserSettings.java | 25 ++++++++++++ .../parsers/issues/github/Github_447.java | 39 +++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 src/test/java/com/univocity/parsers/issues/github/Github_447.java diff --git a/src/main/java/com/univocity/parsers/common/AbstractParser.java b/src/main/java/com/univocity/parsers/common/AbstractParser.java index b908d266..42191ad1 100644 --- a/src/main/java/com/univocity/parsers/common/AbstractParser.java +++ b/src/main/java/com/univocity/parsers/common/AbstractParser.java @@ -66,6 +66,7 @@ public abstract class AbstractParser> { protected final Map comments; protected String lastComment; private final boolean collectComments; + private final boolean collectEmptyComments; private final int errorContentLength; private boolean extractingHeaders = false; private final boolean extractHeaders; @@ -95,6 +96,7 @@ public AbstractParser(T settings) { this.errorHandler = settings.getProcessorErrorHandler(); this.rowsToSkip = settings.getNumberOfRowsToSkip(); this.collectComments = settings.isCommentCollectionEnabled(); + this.collectEmptyComments = settings.isBlankCommentCollectionEnabled(); this.comments = collectComments ? new TreeMap() : Collections.emptyMap(); this.extractHeaders = settings.isHeaderExtractionEnabled(); this.whitespaceRangeStart = settings.getWhitespaceRangeStart(); @@ -105,7 +107,7 @@ protected void processComment() { if (collectComments) { long line = input.lineCount(); String comment = input.readComment(); - if (comment != null) { + if (comment != null || collectEmptyComments) { lastComment = comment; comments.put(line, lastComment); } diff --git a/src/main/java/com/univocity/parsers/common/CommonParserSettings.java b/src/main/java/com/univocity/parsers/common/CommonParserSettings.java index 6212ef22..4903cb8e 100644 --- a/src/main/java/com/univocity/parsers/common/CommonParserSettings.java +++ b/src/main/java/com/univocity/parsers/common/CommonParserSettings.java @@ -64,6 +64,7 @@ public abstract class CommonParserSettings extends CommonSetti private boolean lineSeparatorDetectionEnabled = false; private long numberOfRowsToSkip = 0L; private boolean commentCollectionEnabled = false; + private boolean blankCommentCollectionEnabled = false; private boolean autoClosingEnabled = true; private boolean commentProcessingEnabled = true; private List inputAnalysisProcesses = new ArrayList(); @@ -411,6 +412,30 @@ public void setCommentCollectionEnabled(boolean commentCollectionEnabled) { this.commentCollectionEnabled = commentCollectionEnabled; } + /** + * Enables collection of comments found in the input (disabled by default). If enabled, comment lines will be + * stored by the parser and made available via {@code AbstractParser.getContext().comments()} and {@code AbstractParser.getContext().lastComment()} + * + * @param commentCollectionEnabled flag indicating whether or not to enable collection of comments. + * @param includeBlank flag indicating whether or not to collect blank lines in comments (these will come as {@code null}) + */ + public void setCommentCollectionEnabled(boolean commentCollectionEnabled, boolean includeBlank) { + setCommentCollectionEnabled(commentCollectionEnabled); + this.blankCommentCollectionEnabled = commentCollectionEnabled && includeBlank; + } + + /** + * Indicates that blank comments found in the input must be collected (disabled by default). If enabled, comment lines will be + * stored by the parser and made available via {@code AbstractParser.getContext().comments()} and {@code AbstractParser.getContext().lastComment()} + * + * Blank comments will come as {@code null}. + * + * @return a flag indicating whether or not to enable collection of blank comments. + */ + public boolean isBlankCommentCollectionEnabled() { + return blankCommentCollectionEnabled; + } + @Override final void runAutomaticConfiguration() { Class beanClass = null; diff --git a/src/test/java/com/univocity/parsers/issues/github/Github_447.java b/src/test/java/com/univocity/parsers/issues/github/Github_447.java new file mode 100644 index 00000000..f57b7d6f --- /dev/null +++ b/src/test/java/com/univocity/parsers/issues/github/Github_447.java @@ -0,0 +1,39 @@ +package com.univocity.parsers.issues.github; + +import com.univocity.parsers.fixed.*; +import org.testng.annotations.*; + +import java.io.*; + +import static org.testng.Assert.*; + + +/** + * From: https://github.com/univocity/univocity-parsers/issues/447 + * + * @author Univocity Software Pty Ltd - parsers@univocity.com + */ +public class Github_447 { + + @Test + public void parseCSV() { + String input = "" + + "#\n" + + "# underscores are used as the padding character, so leading/trailing whitespace can be considered part of the value\n" + + "#\n" + + "#4 5 40 40 8"; + + final FixedWidthParserSettings settings = new FixedWidthParserSettings(new FixedWidthFields(4, 5, 40, 40, 8)); + settings.setCommentCollectionEnabled(true, true); + settings.getFormat().setComment('#'); + settings.getFormat().setNormalizedNewline('\n'); + settings.getFormat().setLineSeparator("\n"); + settings.setHeaderExtractionEnabled(true); + + final FixedWidthParser parser = new FixedWidthParser(settings); + parser.parse(new StringReader(input)); + + assertEquals(parser.getContext().comments().size(), 4); + + } +} \ No newline at end of file