Skip to content

Commit

Permalink
Make comparison of newlines in text files more precise
Browse files Browse the repository at this point in the history
Don't ignore all kind of newlines when comparing text-files, only treat
exactly matching \n and \r\n as equals.
  • Loading branch information
HannesWell committed Jul 7, 2023
1 parent 5b5d2b1 commit 5aef81f
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
*******************************************************************************/
package org.eclipse.tycho.zipcomparator.internal;

import java.io.EOFException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

Expand All @@ -36,62 +36,69 @@ public class TextComparator implements ContentsComparator {

static final String HINT = "txt";

private static final char CR = '\r';
private static final char LF = '\n';

// Possible new lines:
// \n -- unix style
// \r\n -- windows style
// \r -- old Mac OS 9 style, recent Mac OS X/macOS use \n

@Override
public ArtifactDelta getDelta(ComparatorInputStream baseline, ComparatorInputStream reactor, ComparisonData data)
throws IOException {
return compareText(baseline, reactor, data);
}

public static ArtifactDelta compareText(ComparatorInputStream baseline, ComparatorInputStream reactor,
ComparisonData data) throws IOException {
ByteIterator baselineIterator = new ByteIterator(baseline.asBytes());
ByteIterator reactorIterator = new ByteIterator(reactor.asBytes());
while (baselineIterator.hasNext() && reactorIterator.hasNext()) {
if (baselineIterator.next() != reactorIterator.next()) {
return createDelta(ArtifactDelta.DEFAULT.getMessage(), baseline, reactor, data);
}
}
//now both need to be at the end of the stream if they are the same!
if (baselineIterator.hasNext() || reactorIterator.hasNext()) {
ComparisonData data) {
if (!isEqualTextIngoreNewLine(baseline.asBytes(), reactor.asBytes())) {
return createDelta(ArtifactDelta.DEFAULT.getMessage(), baseline, reactor, data);
}
return ArtifactDelta.NO_DIFFERENCE;
}

private static final class ByteIterator {

private byte[] bytes;
private int index;

public ByteIterator(byte[] bytes) {
this.bytes = bytes;
}

byte next() throws EOFException {
if (hasNext()) {
byte b = bytes[index];
index++;
return b;
/**
* Tests if {@code baseline} and {@code reactor} contain equal text, if line-endings are
* ignored.
*
* @implNote This methods is intended to have the same results as if the entire content of each
* array were read and compared line by line using BufferedReader.readLine(), which
* only returns the line content, without terminators. The actual implementation is
* just more efficient, because it does not create String objects for the entire
* content.
*/
public static boolean isEqualTextIngoreNewLine(byte[] baseline, byte[] reactor) {
int indexBaseline = 0;
int indexReactor = 0;
int mismatch = Arrays.mismatch(baseline, reactor);
while (mismatch >= 0) {
indexBaseline += mismatch;
indexReactor += mismatch;
int baselineNewLine = newLineLength(baseline, indexBaseline);
int reactorNewLine = newLineLength(reactor, indexReactor);
if (baselineNewLine < 0 || reactorNewLine < 0) {
return false;
}
throw new EOFException();
}

boolean hasNext() {
skipNewLines();
return index < bytes.length;
// Both sliders are at either "\n" or "\r\n"
indexBaseline += baselineNewLine;
indexReactor += reactorNewLine;
mismatch = Arrays.mismatch(baseline, indexBaseline, baseline.length, reactor, indexReactor, reactor.length);
}
return true;
}

private void skipNewLines() {
while (index < bytes.length) {
byte b = bytes[index];
if (b == '\n' || b == '\r') {
index++;
continue;
}
return;
private static int newLineLength(byte[] bytes, int index) {
if (index < bytes.length) {
if (bytes[index] == LF
// Prevent "\r\n" and "\r\r\n" from being treated as equals
&& (index == 0 || bytes[index - 1] != CR)) {
return 1;
} else if (bytes[index] == CR) {
return index + 1 < bytes.length && bytes[index + 1] == LF ? 2 : 1;
}
}

return -1;
}

@Override
Expand All @@ -109,7 +116,7 @@ public static ArtifactDelta createDelta(String message, ComparatorInputStream ba
Patch<String> patch = DiffUtils.diff(source, target);
List<String> unifiedDiffList = UnifiedDiffUtils.generateUnifiedDiff("baseline", "reactor", source,
patch, 0);
detailed = unifiedDiffList.stream().collect(Collectors.joining((System.lineSeparator())));
detailed = unifiedDiffList.stream().collect(Collectors.joining(System.lineSeparator()));
} catch (Exception e) {
detailed = message;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package org.eclipse.tycho.zipcomparator.internal;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;

import org.eclipse.tycho.artifactcomparator.ArtifactComparator.ComparisonData;
import org.eclipse.tycho.artifactcomparator.ArtifactDelta;
import org.eclipse.tycho.artifactcomparator.ComparatorInputStream;
import org.junit.Test;

public class TextComparatorTest {
private static final String NL = System.lineSeparator();

@Test
public void testEqualText() throws IOException {
String text = "FirstLine\nline2\n";
assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(text, text));
}

@Test
public void testNotEqualText() throws IOException {
String baseline = "FirstLine\nline2\n";
String reactor = "line1\nline2\n";

ArtifactDelta delta = getTextDelta(baseline, reactor);
assertDeltaWithDetails(
"--- baseline" + NL + "+++ reactor" + NL + "@@ -1,1 +1,1 @@" + NL + "-FirstLine" + NL + "+line1",
delta);
}

@Test
public void testEqualTextWhenIgnoringLineEndings() throws IOException {
{
String baseline = "FirstLine\r\nline2\r\nline3";
String reactor = "FirstLine\nline2\nline3";
assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor));
}
{
String baseline = "\r\nFirstLine\r\nline2\r\n";
String reactor = "\nFirstLine\nline2\n";
assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor));
}
{
String baseline = "\r\n\r\nFirstLine\r\n\r\nline2\r\n\r\n";
String reactor = "\n\nFirstLine\n\nline2\n\n";
assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor));
}
{ // mixed styles in one string
String baseline = "\n\r\nFirstLine\r\n\nline2\n\r\n";
String reactor = "\n\nFirstLine\n\nline2\n\n";
assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor));
}
}

@Test
public void testNotEqualTextWithDifferentCRandLFcombinations() throws IOException {

{
String baseline = "line1\n\rline2";
String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-";

String reactor1 = "line1\nline2";
ArtifactDelta delta1 = getTextDelta(baseline, reactor1);
assertDeltaWithDetails(expectedDelta, delta1);

String reactor2 = "line1\r\nline2";
ArtifactDelta delta2 = getTextDelta(baseline, reactor2);
assertDeltaWithDetails(expectedDelta, delta2);
}
{
String baseline = "line1\r\n\nline2";
String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-";

String reactor1 = "line1\nline2";
ArtifactDelta delta1 = getTextDelta(baseline, reactor1);
assertDeltaWithDetails(expectedDelta, delta1);

String reactor2 = "line1\r\nline2";
ArtifactDelta delta2 = getTextDelta(baseline, reactor2);
assertDeltaWithDetails(expectedDelta, delta2);
}
{
String baseline = "line1\r\r\nline2";
String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-";

String reactor1 = "line1\nline2";
ArtifactDelta delta1 = getTextDelta(baseline, reactor1);
assertDeltaWithDetails(expectedDelta, delta1);

String reactor2 = "line1\r\nline2";
ArtifactDelta delta2 = getTextDelta(baseline, reactor2);
assertDeltaWithDetails(expectedDelta, delta2);
}
{
String baseline = "\r\nline1\r\nline2";
String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -1,1 +1,0 @@" + NL + "-";

String reactor1 = "line1\nline2";
ArtifactDelta delta1 = getTextDelta(baseline, reactor1);
assertDeltaWithDetails(expectedDelta, delta1);

String reactor2 = "line1\r\nline2";
ArtifactDelta delta2 = getTextDelta(baseline, reactor2);
assertDeltaWithDetails(expectedDelta, delta2);
}
{
String baseline = "line1\r\nline2\r\n";
String expectedDelta = ""; //BufferedReader.readLine() considers a trailing newline equals to EOF

String reactor1 = "line1\nline2";
ArtifactDelta delta1 = getTextDelta(baseline, reactor1);
assertDeltaWithDetails(expectedDelta, delta1);

String reactor2 = "line1\r\nline2";
ArtifactDelta delta2 = getTextDelta(baseline, reactor2);
assertDeltaWithDetails(expectedDelta, delta2);
}
}

private static ArtifactDelta getTextDelta(String baseline, String reactor) throws IOException {
ComparisonData data = new ComparisonData(List.of(), false, /* Show diff details: */ true);
ComparatorInputStream baselineStream = new ComparatorInputStream(baseline.getBytes(StandardCharsets.UTF_8));
ComparatorInputStream reactorStream = new ComparatorInputStream(reactor.getBytes(StandardCharsets.UTF_8));
return TextComparator.compareText(baselineStream, reactorStream, data);
}

private static void assertDeltaWithDetails(String expected, ArtifactDelta delta) {
assertNotEquals(ArtifactDelta.NO_DIFFERENCE, delta);
assertEquals(expected, delta.getDetailedMessage());
}

}

0 comments on commit 5aef81f

Please sign in to comment.