Skip to content

Commit

Permalink
Should not quote with strict quoting when line starts with #, but com…
Browse files Browse the repository at this point in the history
…ments are disabled. Quote only the first column, fix FasterXML#270
  • Loading branch information
Krzysztof Debski authored and Krzysztof Debski committed May 19, 2021
1 parent 48e7b7e commit 9df3797
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package com.fasterxml.jackson.dataformat.csv.impl;

import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;

import com.fasterxml.jackson.core.JacksonException;
import com.fasterxml.jackson.core.exc.WrappedIOException;
import com.fasterxml.jackson.core.io.CharTypes;
Expand All @@ -9,10 +13,6 @@
import com.fasterxml.jackson.dataformat.csv.CsvGenerator.Feature;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;

import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;

/**
* Helper class that handles actual low-level construction of
* CSV output, based only on indexes given without worrying about reordering,
Expand Down Expand Up @@ -70,6 +70,8 @@ public class CsvEncoder
final protected char[] _cfgLineSeparator;

final protected char[] _cfgNullValue;

final protected boolean _cfgAllowsComments;

final protected int _cfgLineSeparatorLength;

Expand Down Expand Up @@ -193,6 +195,7 @@ public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema,
_cfgLineSeparator = schema.getLineSeparator();
_cfgLineSeparatorLength = (_cfgLineSeparator == null) ? 0 : _cfgLineSeparator.length;
_cfgNullValue = schema.getNullValueOrEmpty();
_cfgAllowsComments = schema.allowsComments();

_columnCount = schema.size();
_outputEscapes = (esc == null) ? sOutputEscapes : esc.getEscapeCodesForAscii();
Expand Down Expand Up @@ -234,6 +237,7 @@ public CsvEncoder(CsvEncoder base, CsvSchema newSchema)
_cfgLineSeparator = newSchema.getLineSeparator();
_cfgLineSeparatorLength = _cfgLineSeparator.length;
_cfgNullValue = newSchema.getNullValueOrEmpty();
_cfgAllowsComments = newSchema.allowsComments();
_cfgMinSafeChar = _calcSafeChar();
_columnCount = newSchema.size();
_cfgQuoteCharEscapeChar = _getQuoteCharEscapeChar(
Expand Down Expand Up @@ -341,7 +345,7 @@ public final void write(int columnIndex, String value) throws JacksonException
appendColumnSeparator();
}
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len)) {
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, columnIndex)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
Expand Down Expand Up @@ -513,7 +517,7 @@ protected void appendValue(String value) throws JacksonException
// First: determine if we need quotes; simple heuristics;
// only check for short Strings, stop if something found
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len)) {
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, _nextColumnToWrite)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
Expand Down Expand Up @@ -989,7 +993,7 @@ public void close(boolean autoClose, boolean flushStream) throws IOException
* Helper method that determines whether given String is likely
* to require quoting; check tries to optimize for speed.
*/
protected boolean _mayNeedQuotes(String value, int length)
protected boolean _mayNeedQuotes(String value, int length, int columnIndex)
{
// 21-Mar-2014, tatu: If quoting disabled, don't quote
if (_cfgQuoteCharacter < 0) {
Expand All @@ -998,9 +1002,9 @@ protected boolean _mayNeedQuotes(String value, int length)
// may skip checks unless we want exact checking
if (_cfgOptimalQuoting) {
if (_cfgEscapeCharacter > 0) {
return _needsQuotingStrict(value, _cfgEscapeCharacter);
return _needsQuotingStrict(value, columnIndex, _cfgEscapeCharacter);
}
return _needsQuotingStrict(value);
return _needsQuotingStrict(value, columnIndex);
}
if (length > _cfgMaxQuoteCheckChars) {
return true;
Expand Down Expand Up @@ -1048,7 +1052,7 @@ protected final boolean _needsQuotingLoose(String value, int esc)
return false;
}

protected boolean _needsQuotingStrict(String value)
protected boolean _needsQuotingStrict(String value, int columnIndex)
{
final int minSafe = _cfgMinSafeChar;

Expand All @@ -1065,15 +1069,15 @@ protected boolean _needsQuotingStrict(String value)
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// 31-Dec-2014, tatu: Comment lines start with # so quote if starts with #
|| (c == '#' && i == 0)) {
|| (columnIndex == 0 && _cfgAllowsComments && c == '#' && i == 0)) {
return true;
}
}
}
return false;
}

protected boolean _needsQuotingStrict(String value, int esc)
protected boolean _needsQuotingStrict(String value, int columnIndex, int esc)
{
final int minSafe = _cfgMinSafeChar;
final int[] escCodes = _outputEscapes;
Expand All @@ -1089,7 +1093,7 @@ protected boolean _needsQuotingStrict(String value, int esc)
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// 31-Dec-2014, tatu: Comment lines start with # so quote if starts with #
|| (c == '#' && i == 0)) {
|| (columnIndex == 0 && _cfgAllowsComments && c == '#' && i == 0)) {
return true;
}
} else if (c == esc) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,12 @@ public void testForcedQuotingEmptyStrings() throws Exception
assertEquals(",2.5\n", result);
}

// Must comment '#', at least if it starts the line
public void testQuotingOfCommentChar() throws Exception
// Must quote '#' when it starts the line
public void testQuotingOfCommentCharForFirstColumn() throws Exception
{
// First, with default quoting
CsvMapper mapper = mapperForCsv();
final CsvSchema schema = mapper.schemaFor(IdDesc.class);
final CsvSchema schema = mapper.schemaFor(IdDesc.class).withComments();
String csv = mapper.writer(schema)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("\"#123\",Foo\n", csv);
Expand All @@ -251,6 +251,42 @@ public void testQuotingOfCommentChar() throws Exception
assertEquals("\"#123\",Foo\n", csv);
}

// In strict mode when the second column starts with '#', does not have to quote it
public void testQuotingOfCommentCharForSecondColumn() throws Exception
{
// First, with default quoting
CsvMapper mapper = mapperForCsv();
final CsvSchema schema = mapper.schemaFor(IdDesc.class).withComments();
String csv = mapper.writer(schema)
.writeValueAsString(new IdDesc("123", "#Foo"));
assertEquals("123,\"#Foo\"\n", csv);

// then with strict/optimal
mapper = mapperForCsv();
csv = mapper.writer(schema)
.with(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING)
.writeValueAsString(new IdDesc("123", "#Foo"));
assertEquals("123,#Foo\n", csv);
}

// In strict mode when comments are disabled, does not have to quote '#'
public void testQuotingOfCommentCharWhenCommentsAreDisabled() throws Exception
{
// First, with default quoting
CsvMapper mapper = mapperForCsv();
final CsvSchema schema = mapper.schemaFor(IdDesc.class).withoutComments();
String csv = mapper.writer(schema)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("\"#123\",Foo\n", csv);

// then with strict/optimal
mapper = mapperForCsv();
csv = mapper.writer(schema)
.with(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("#123,Foo\n", csv);
}

// for [dataformat-csv#98]
public void testBackslashEscape() throws Exception
{
Expand Down

0 comments on commit 9df3797

Please sign in to comment.