diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
index e9fc1182..1594bdc7 100644
--- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
+++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
@@ -11,12 +11,10 @@
import java.io.EOFException;
import java.io.IOException;
-import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import org.codehaus.plexus.util.ReaderFactory;
-import org.codehaus.plexus.util.xml.XmlReader;
//import java.util.Hashtable;
@@ -124,7 +122,6 @@ private String newStringIntern( char[] cbuf, int off, int len )
// private String elValue[];
private int elNamespaceCount[];
- private String fileEncoding = null;
/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -663,20 +660,6 @@ public void setInput( Reader in )
{
reset();
reader = in;
-
- if ( reader instanceof XmlReader ) {
- // encoding already detected
- XmlReader xsr = (XmlReader) reader;
- fileEncoding = xsr.getEncoding();
- }
- else if ( reader instanceof InputStreamReader )
- {
- InputStreamReader isr = (InputStreamReader) reader;
- if ( isr.getEncoding() != null )
- {
- fileEncoding = isr.getEncoding().toUpperCase();
- }
- }
}
@Override
@@ -3432,18 +3415,7 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
final int encodingEnd = pos - 1;
// TODO reconcile with setInput encodingName
- inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );
-
- if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
- {
- throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
- this, null );
- }
- else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
- {
- throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
- this, null );
- }
+ // inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );
lastParsedAttr = "encoding";
diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
index cba42b32..e16aa5a2 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
@@ -27,6 +27,7 @@
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
@@ -968,7 +969,7 @@ public void testXMLDeclVersionEncodingStandaloneNoSpace()
* @since 3.4.1
*/
@Test
- public void testEncodingISO_8859_1setInputReader()
+ public void testEncodingISO_8859_1_newXmlReader()
throws IOException
{
try ( Reader reader =
@@ -994,7 +995,7 @@ public void testEncodingISO_8859_1setInputReader()
* @since 3.4.1
*/
@Test
- public void testEncodingISO_8859_1_setInputStream()
+ public void testEncodingISO_8859_1_InputStream()
throws IOException
{
try ( InputStream input =
@@ -1012,12 +1013,6 @@ public void testEncodingISO_8859_1_setInputStream()
}
}
- private static void assertPosition( int row, int col, MXParser parser )
- {
- assertEquals( "Current line", row, parser.getLineNumber() );
- assertEquals( "Current column", col, parser.getColumnNumber() );
- }
-
/**
* Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
*
@@ -1028,7 +1023,7 @@ private static void assertPosition( int row, int col, MXParser parser )
* @since 3.4.2
*/
@Test
- public void testEncodingISO_8859_1setStringReader()
+ public void testEncodingISO_8859_1_StringReader()
throws IOException
{
try ( Reader reader =
@@ -1047,6 +1042,93 @@ public void testEncodingISO_8859_1setStringReader()
}
}
+ /**
+ * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
+ *
+ * Another case of bug #163: Reader generated with ReaderFactory.newReader and the right file encoding.
+ *
+ * @throws IOException if IO error.
+ *
+ * @since 3.5.2
+ */
+ @Test
+ public void testEncodingISO_8859_1_newReader()
+ throws IOException
+ {
+ try ( Reader reader =
+ ReaderFactory.newReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ),
+ StandardCharsets.UTF_8.name() ) )
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( reader );
+ while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+ ;
+ assertTrue( true );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
+ *
+ * Another case of bug #163: InputStream supplied with the right file encoding.
+ *
+ * @throws IOException if IO error.
+ *
+ * @since 3.5.2
+ */
+ @Test
+ public void testEncodingISO_8859_1_InputStream_encoded() throws IOException {
+ try ( InputStream input =
+ Files.newInputStream( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) )
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( input, StandardCharsets.UTF_8.name() );
+ while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+ ;
+ assertTrue( true );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ /**
+ * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
+ *
+ * @throws IOException if IO error.
+ *
+ * @since 3.4.1
+ */
+ @Test
+ public void testEncodingUTF8_newXmlReader()
+ throws IOException
+ {
+ try ( Reader reader =
+ ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) )
+ {
+ MXParser parser = new MXParser();
+ parser.setInput( reader );
+ while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+ ;
+ assertTrue( true );
+ }
+ catch ( XmlPullParserException e )
+ {
+ fail( "should not raise exception: " + e );
+ }
+ }
+
+ private static void assertPosition( int row, int col, MXParser parser )
+ {
+ assertEquals( "Current line", row, parser.getLineNumber() );
+ assertEquals( "Current column", col, parser.getColumnNumber() );
+ }
+
/**
*
* Test custom Entity not found.
diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
index 854fb494..0747e07d 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
@@ -4,13 +4,12 @@
import static org.junit.Assert.fail;
import java.io.File;
-import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
-import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
+import org.codehaus.plexus.util.ReaderFactory;
import org.junit.Before;
import org.junit.Test;
@@ -207,13 +206,15 @@ public void testhst_bh_006()
* Version:
*
* @throws java.io.IOException if there is an I/O error
+ *
+ * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-8 file
+ * has a BOM or not
*/
- @Test
+ // @Test
public void testhst_lhs_007()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
+ try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "007.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
@@ -234,13 +235,45 @@ public void testhst_lhs_007()
* Version:
*
* @throws java.io.IOException if there is an I/O error
+ *
+ * NOTE: This test is SKIPPED as the MXParser object alone is unable to detect whether UTF-16 file
+ * has a BOM or not
*/
- @Test
- public void testhst_lhs_008()
+ // @Test
+ public void testhst_lhs_008_newReader()
+ throws IOException
+ {
+ try ( Reader reader =
+ ReaderFactory.newReader( new File( testResourcesDir, "008.xml" ), StandardCharsets.UTF_16.name() ) )
+ {
+ parser.setInput( reader );
+ while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
+ ;
+ fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
+ }
+ catch ( XmlPullParserException e )
+ {
+ assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
+ }
+ }
+
+ /**
+ * Test ID:
hst-lhs-008
+ * Test URI:
008.xml
+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible
+ * Sections:
4.3.3
+ * Version:
+ *
+ * @throws java.io.IOException if there is an I/O error
+ *
+ * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as
+ * UTF-8, and XmlReader in lenient mode does not throw exception.
+ */
+ // @Test
+ public void testhst_lhs_008_XmlReader()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
+ try ( Reader reader = ReaderFactory.newXmlReader( new File( testResourcesDir, "008.xml" ) ) )
{
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
@@ -261,14 +294,17 @@ public void testhst_lhs_008()
* Version:
*
* @throws java.io.IOException if there is an I/O error
+ *
+ * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as
+ * UTF-8.
*/
- @Test
- public void testhst_lhs_009()
+ // @Test
+ public void testhst_lhs_009_newReader()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
- {
+ try ( Reader reader =
+ ReaderFactory.newReader( new File( testResourcesDir, "009.xml" ), StandardCharsets.UTF_16.name() ) )
+ {
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
@@ -280,4 +316,35 @@ public void testhst_lhs_009()
}
}
+ /**
+ * Test ID:
hst-lhs-009
+ * Test URI:
009.xml
+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible