Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse nodes before and after the root element #560

Merged
merged 1 commit into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ class XMLTestJVM {
def issue508commentParsing: Unit = {
// confirm that comments are processed correctly now
roundtrip("<a><!-- comment --> suffix</a>")
roundtrip("<a>prefix <!-- comment --> suffix</a>")
roundtrip("<a>prefix <!-- comment --> <!-- comment2 --> suffix</a>")
roundtrip("<a>prefix <b><!-- comment --></b> suffix</a>")
roundtrip("<a>prefix <b><!-- multi-\nline\n comment --></b> suffix</a>")
roundtrip("""<a>prefix <b><!-- multi-
Expand All @@ -596,13 +596,7 @@ class XMLTestJVM {
// confirm that processing instructions were always processed correctly
roundtrip("<a><?target content ?> suffix</a>")
roundtrip("<a>prefix <?target content ?> suffix</a>")
roundtrip("<a>prefix <b><?target content?></b> suffix</a>")

// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
// capturing comments before and after the root element is not currently possible
// (by the way, the same applies to processing instructions).
//check("<!-- prologue --><a>text</a>")
//check("<a>text</a><!-- epilogue -->")
roundtrip("<a>prefix <b><?target content?> </b> suffix</a>")
}

@UnitTest
Expand All @@ -613,7 +607,26 @@ class XMLTestJVM {
roundtrip("""<a>prefix <b><![CDATA[
| multi-
| line cdata
| section]]></b> suffix</a>""".stripMargin)
| section]]> </b> suffix</a>""".stripMargin)
}

def roundtripNodes(xml: String): Unit = assertEquals(xml, XML.loadStringNodes(xml).map(_.toString).mkString(""))

@UnitTest
def xmlLoaderLoadNodes: Unit = {
roundtripNodes("<!-- prolog --><a>text</a>")
roundtripNodes("<!-- prolog --><?target content ?><!-- comment2 --><a>text</a>")
roundtripNodes("""<!-- prolog
| --><?target content ?><!--
| comment2 --><a>text</a>""".stripMargin)

roundtripNodes("<a>text</a><!-- epilogue -->")
roundtripNodes("<a>text</a><!-- epilogue --><?target content ?><!-- comment2 -->")

// Note: at least with the JDK's Xerces, whitespace in the prolog and epilogue gets lost in parsing:
// the parser does not fire any white-space related events, so:
// does not work: roundtripNodes("<!-- c --> <a/>")
// does not work: roundtripNodes("<a/> <!-- epilogue -->")
}

@UnitTest
Expand Down
35 changes: 28 additions & 7 deletions shared/src/main/scala/scala/xml/factory/XMLLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,29 @@ trait XMLLoader[T <: Node] {
* The methods available in scala.xml.XML use the XML parser in the JDK.
*/
def loadXML(source: InputSource, parser: SAXParser): T = {
val newAdapter = adapter
val result: FactoryAdapter = parse(source, parser)
result.rootElem.asInstanceOf[T]
}

def loadXMLNodes(source: InputSource, parser: SAXParser): Seq[Node] = {
val result: FactoryAdapter = parse(source, parser)
result.prolog ++ (result.rootElem :: result.epilogue)
}

private def parse(source: InputSource, parser: SAXParser): FactoryAdapter = {
val result: FactoryAdapter = adapter

try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
parser.setProperty("http://xml.org/sax/properties/lexical-handler", result)
} catch {
case _: SAXNotRecognizedException =>
}

newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
parser.parse(source, newAdapter)
newAdapter.scopeStack = newAdapter.scopeStack.tail
result.scopeStack = TopScope :: result.scopeStack
parser.parse(source, result)
result.scopeStack = result.scopeStack.tail

newAdapter.rootElem.asInstanceOf[T]
result
}

/** Loads XML from the given file, file descriptor, or filename. */
Expand All @@ -80,4 +90,15 @@ trait XMLLoader[T <: Node] {

/** Loads XML from the given String. */
def loadString(string: String): T = loadXML(fromString(string), parser)
}

/** Load XML nodes, including comments and processing instructions that precede and follow the root element. */
def loadFileNodes(file: File): Seq[Node] = loadXMLNodes(fromFile(file), parser)
def loadFileNodes(fd: FileDescriptor): Seq[Node] = loadXMLNodes(fromFile(fd), parser)
def loadFileNodes(name: String): Seq[Node] = loadXMLNodes(fromFile(name), parser)
def loadNodes(is: InputStream): Seq[Node] = loadXMLNodes(fromInputStream(is), parser)
def loadNodes(reader: Reader): Seq[Node] = loadXMLNodes(fromReader(reader), parser)
def loadNodes(sysID: String): Seq[Node] = loadXMLNodes(fromSysId(sysID), parser)
def loadNodes(source: InputSource): Seq[Node] = loadXMLNodes(source, parser)
def loadNodes(url: URL): Seq[Node] = loadXMLNodes(fromInputStream(url.openStream()), parser)
def loadStringNodes(string: String): Seq[Node] = loadXMLNodes(fromString(string), parser)
}
31 changes: 23 additions & 8 deletions shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ trait ConsoleErrorHandler extends DefaultHandler2 {
* underlying SAX parser.
*/
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
var prolog: List[Node] = List.empty
var rootElem: Node = _
var epilogue: List[Node] = List.empty

val buffer = new StringBuilder()
val buffer: StringBuilder = new StringBuilder()
private var inCDATA: Boolean = false

/** List of attributes
Expand All @@ -51,28 +53,28 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
*
* @since 2.0.0
*/
var attribStack = List.empty[MetaData]
var attribStack: List[MetaData] = List.empty
/** List of elements
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var hStack = List.empty[Node] // [ element ] contains siblings
var hStack: List[Node] = List.empty // [ element ] contains siblings
/** List of element names
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var tagStack = List.empty[String]
var tagStack: List[String] = List.empty
/** List of namespaces
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var scopeStack = List.empty[NamespaceBinding]
var scopeStack: List[NamespaceBinding] = List.empty

var curTag: String = _
var capture: Boolean = false
Expand Down Expand Up @@ -123,7 +125,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
// ContentHandler methods
//

val normalizeWhitespace = false
val normalizeWhitespace: Boolean = false

/**
* Capture characters, possibly normalizing whitespace.
Expand Down Expand Up @@ -177,13 +179,20 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
attributes: Attributes): Unit =
{
captureText()

// capture the prolog at the start of the root element
if (tagStack.isEmpty) {
prolog = hStack.reverse
hStack = List.empty
}

tagStack = curTag :: tagStack
curTag = qname

val localName = splitName(qname)._2
capture = nodeContainsText(localName)

hStack = null :: hStack
hStack = null :: hStack
var m: MetaData = Null
var scpe: NamespaceBinding =
if (scopeStack.isEmpty) TopScope
Expand All @@ -193,7 +202,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
val qname = attributes getQName i
val value = attributes getValue i
val (pre, key) = splitName(qname)
def nullIfEmpty(s: String) = if (s == "") null else s
def nullIfEmpty(s: String): String = if (s == "") null else s

if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
val arg = if (pre == null) null else key
Expand Down Expand Up @@ -250,6 +259,12 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
capture = curTag != null && nodeContainsText(curTag) // root level
}

override def endDocument(): Unit = {
// capture the epilogue at the end of the document
epilogue = hStack.init.reverse
hStack = hStack.last :: Nil
}

/**
* Processing instruction.
*/
Expand Down
2 changes: 2 additions & 0 deletions shared/src/main/scala/scala/xml/parsing/MarkupParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
var extIndex = -1

/** holds temporary values of pos */
// Note: this is clearly an override, but if marked as such it causes a "...cannot override a mutable variable"
// error with Scala 3; does it work with Scala 3 if not explicitly marked as an override remains to be seen...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like scala/scala3#13744 should fix it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay! Thank you, I did not see that!

var tmppos: Int = _

/** holds the next character */
Expand Down