Skip to content

Commit

Permalink
adding support for collecting blank lines of comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jbax committed Apr 19, 2021
1 parent c170dcd commit 7e7d1b3
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public abstract class AbstractParser<T extends CommonParserSettings<?>> {
protected final Map<Long, String> comments;
protected String lastComment;
private final boolean collectComments;
private final boolean collectEmptyComments;
private final int errorContentLength;
private boolean extractingHeaders = false;
private final boolean extractHeaders;
Expand Down Expand Up @@ -95,6 +96,7 @@ public AbstractParser(T settings) {
this.errorHandler = settings.getProcessorErrorHandler();
this.rowsToSkip = settings.getNumberOfRowsToSkip();
this.collectComments = settings.isCommentCollectionEnabled();
this.collectEmptyComments = settings.isBlankCommentCollectionEnabled();
this.comments = collectComments ? new TreeMap<Long, String>() : Collections.<Long, String>emptyMap();
this.extractHeaders = settings.isHeaderExtractionEnabled();
this.whitespaceRangeStart = settings.getWhitespaceRangeStart();
Expand All @@ -105,7 +107,7 @@ protected void processComment() {
if (collectComments) {
long line = input.lineCount();
String comment = input.readComment();
if (comment != null) {
if (comment != null || collectEmptyComments) {
lastComment = comment;
comments.put(line, lastComment);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ public abstract class CommonParserSettings<F extends Format> extends CommonSetti
private boolean lineSeparatorDetectionEnabled = false;
private long numberOfRowsToSkip = 0L;
private boolean commentCollectionEnabled = false;
private boolean blankCommentCollectionEnabled = false;
private boolean autoClosingEnabled = true;
private boolean commentProcessingEnabled = true;
private List<InputAnalysisProcess> inputAnalysisProcesses = new ArrayList<InputAnalysisProcess>();
Expand Down Expand Up @@ -411,6 +412,30 @@ public void setCommentCollectionEnabled(boolean commentCollectionEnabled) {
this.commentCollectionEnabled = commentCollectionEnabled;
}

/**
* Enables collection of comments found in the input (disabled by default). If enabled, comment lines will be
* stored by the parser and made available via {@code AbstractParser.getContext().comments()} and {@code AbstractParser.getContext().lastComment()}
*
* @param commentCollectionEnabled flag indicating whether or not to enable collection of comments.
* @param includeBlank flag indicating whether or not to collect blank lines in comments (these will come as {@code null})
*/
public void setCommentCollectionEnabled(boolean commentCollectionEnabled, boolean includeBlank) {
setCommentCollectionEnabled(commentCollectionEnabled);
this.blankCommentCollectionEnabled = commentCollectionEnabled && includeBlank;
}

/**
* Indicates that blank comments found in the input must be collected (disabled by default). If enabled, comment lines will be
* stored by the parser and made available via {@code AbstractParser.getContext().comments()} and {@code AbstractParser.getContext().lastComment()}
*
* Blank comments will come as {@code null}.
*
* @return a flag indicating whether or not to enable collection of blank comments.
*/
public boolean isBlankCommentCollectionEnabled() {
return blankCommentCollectionEnabled;
}

@Override
final void runAutomaticConfiguration() {
Class<?> beanClass = null;
Expand Down
39 changes: 39 additions & 0 deletions src/test/java/com/univocity/parsers/issues/github/Github_447.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package com.univocity.parsers.issues.github;

import com.univocity.parsers.fixed.*;
import org.testng.annotations.*;

import java.io.*;

import static org.testng.Assert.*;


/**
* From: https://github.com/univocity/univocity-parsers/issues/447
*
* @author Univocity Software Pty Ltd - <a href="mailto:[email protected]">[email protected]</a>
*/
public class Github_447 {

@Test
public void parseCSV() {
String input = "" +
"#\n" +
"# underscores are used as the padding character, so leading/trailing whitespace can be considered part of the value\n" +
"#\n" +
"#4 5 40 40 8";

final FixedWidthParserSettings settings = new FixedWidthParserSettings(new FixedWidthFields(4, 5, 40, 40, 8));
settings.setCommentCollectionEnabled(true, true);
settings.getFormat().setComment('#');
settings.getFormat().setNormalizedNewline('\n');
settings.getFormat().setLineSeparator("\n");
settings.setHeaderExtractionEnabled(true);

final FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(new StringReader(input));

assertEquals(parser.getContext().comments().size(), 4);

}
}

0 comments on commit 7e7d1b3

Please sign in to comment.