Skip to content

Commit

Permalink
Merge pull request #560 from dubinsky/prolog-epilog
Browse files Browse the repository at this point in the history
  • Loading branch information
SethTisue authored Dec 13, 2021
2 parents 441678d + 156f2ea commit 1af1168
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 24 deletions.
31 changes: 22 additions & 9 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ class XMLTestJVM {
def issue508commentParsing: Unit = {
// confirm that comments are processed correctly now
roundtrip("<a><!-- comment --> suffix</a>")
roundtrip("<a>prefix <!-- comment --> suffix</a>")
roundtrip("<a>prefix <!-- comment --> <!-- comment2 --> suffix</a>")
roundtrip("<a>prefix <b><!-- comment --></b> suffix</a>")
roundtrip("<a>prefix <b><!-- multi-\nline\n comment --></b> suffix</a>")
roundtrip("""<a>prefix <b><!-- multi-
Expand All @@ -596,13 +596,7 @@ class XMLTestJVM {
// confirm that processing instructions were always processed correctly
roundtrip("<a><?target content ?> suffix</a>")
roundtrip("<a>prefix <?target content ?> suffix</a>")
roundtrip("<a>prefix <b><?target content?></b> suffix</a>")

// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
// capturing comments before and after the root element is not currently possible
// (by the way, the same applies to processing instructions).
//check("<!-- prologue --><a>text</a>")
//check("<a>text</a><!-- epilogue -->")
roundtrip("<a>prefix <b><?target content?> </b> suffix</a>")
}

@UnitTest
Expand All @@ -613,7 +607,26 @@ class XMLTestJVM {
roundtrip("""<a>prefix <b><![CDATA[
| multi-
| line cdata
| section]]></b> suffix</a>""".stripMargin)
| section]]> </b> suffix</a>""".stripMargin)
}

def roundtripNodes(xml: String): Unit = assertEquals(xml, XML.loadStringNodes(xml).map(_.toString).mkString(""))

@UnitTest
def xmlLoaderLoadNodes: Unit = {
roundtripNodes("<!-- prolog --><a>text</a>")
roundtripNodes("<!-- prolog --><?target content ?><!-- comment2 --><a>text</a>")
roundtripNodes("""<!-- prolog
| --><?target content ?><!--
| comment2 --><a>text</a>""".stripMargin)

roundtripNodes("<a>text</a><!-- epilogue -->")
roundtripNodes("<a>text</a><!-- epilogue --><?target content ?><!-- comment2 -->")

// Note: at least with the JDK's Xerces, whitespace in the prolog and epilogue gets lost in parsing:
// the parser does not fire any white-space related events, so:
// does not work: roundtripNodes("<!-- c --> <a/>")
// does not work: roundtripNodes("<a/> <!-- epilogue -->")
}

@UnitTest
Expand Down
35 changes: 28 additions & 7 deletions shared/src/main/scala/scala/xml/factory/XMLLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,29 @@ trait XMLLoader[T <: Node] {
* The methods available in scala.xml.XML use the XML parser in the JDK.
*/
def loadXML(source: InputSource, parser: SAXParser): T = {
val newAdapter = adapter
val result: FactoryAdapter = parse(source, parser)
result.rootElem.asInstanceOf[T]
}

def loadXMLNodes(source: InputSource, parser: SAXParser): Seq[Node] = {
val result: FactoryAdapter = parse(source, parser)
result.prolog ++ (result.rootElem :: result.epilogue)
}

private def parse(source: InputSource, parser: SAXParser): FactoryAdapter = {
val result: FactoryAdapter = adapter

try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
parser.setProperty("http://xml.org/sax/properties/lexical-handler", result)
} catch {
case _: SAXNotRecognizedException =>
}

newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
parser.parse(source, newAdapter)
newAdapter.scopeStack = newAdapter.scopeStack.tail
result.scopeStack = TopScope :: result.scopeStack
parser.parse(source, result)
result.scopeStack = result.scopeStack.tail

newAdapter.rootElem.asInstanceOf[T]
result
}

/** Loads XML from the given file, file descriptor, or filename. */
Expand All @@ -80,4 +90,15 @@ trait XMLLoader[T <: Node] {

/** Loads XML from the given String. */
def loadString(string: String): T = loadXML(fromString(string), parser)
}

/** Load XML nodes, including comments and processing instructions that precede and follow the root element. */
def loadFileNodes(file: File): Seq[Node] = loadXMLNodes(fromFile(file), parser)
def loadFileNodes(fd: FileDescriptor): Seq[Node] = loadXMLNodes(fromFile(fd), parser)
def loadFileNodes(name: String): Seq[Node] = loadXMLNodes(fromFile(name), parser)
def loadNodes(is: InputStream): Seq[Node] = loadXMLNodes(fromInputStream(is), parser)
def loadNodes(reader: Reader): Seq[Node] = loadXMLNodes(fromReader(reader), parser)
def loadNodes(sysID: String): Seq[Node] = loadXMLNodes(fromSysId(sysID), parser)
def loadNodes(source: InputSource): Seq[Node] = loadXMLNodes(source, parser)
def loadNodes(url: URL): Seq[Node] = loadXMLNodes(fromInputStream(url.openStream()), parser)
def loadStringNodes(string: String): Seq[Node] = loadXMLNodes(fromString(string), parser)
}
31 changes: 23 additions & 8 deletions shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ trait ConsoleErrorHandler extends DefaultHandler2 {
* underlying SAX parser.
*/
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
var prolog: List[Node] = List.empty
var rootElem: Node = _
var epilogue: List[Node] = List.empty

val buffer = new StringBuilder()
val buffer: StringBuilder = new StringBuilder()
private var inCDATA: Boolean = false

/** List of attributes
Expand All @@ -51,28 +53,28 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
*
* @since 2.0.0
*/
var attribStack = List.empty[MetaData]
var attribStack: List[MetaData] = List.empty
/** List of elements
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var hStack = List.empty[Node] // [ element ] contains siblings
var hStack: List[Node] = List.empty // [ element ] contains siblings
/** List of element names
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var tagStack = List.empty[String]
var tagStack: List[String] = List.empty
/** List of namespaces
*
* Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
*
* @since 2.0.0
*/
var scopeStack = List.empty[NamespaceBinding]
var scopeStack: List[NamespaceBinding] = List.empty

var curTag: String = _
var capture: Boolean = false
Expand Down Expand Up @@ -123,7 +125,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
// ContentHandler methods
//

val normalizeWhitespace = false
val normalizeWhitespace: Boolean = false

/**
* Capture characters, possibly normalizing whitespace.
Expand Down Expand Up @@ -177,13 +179,20 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
attributes: Attributes): Unit =
{
captureText()

// capture the prolog at the start of the root element
if (tagStack.isEmpty) {
prolog = hStack.reverse
hStack = List.empty
}

tagStack = curTag :: tagStack
curTag = qname

val localName = splitName(qname)._2
capture = nodeContainsText(localName)

hStack = null :: hStack
hStack = null :: hStack
var m: MetaData = Null
var scpe: NamespaceBinding =
if (scopeStack.isEmpty) TopScope
Expand All @@ -193,7 +202,7 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
val qname = attributes getQName i
val value = attributes getValue i
val (pre, key) = splitName(qname)
def nullIfEmpty(s: String) = if (s == "") null else s
def nullIfEmpty(s: String): String = if (s == "") null else s

if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
val arg = if (pre == null) null else key
Expand Down Expand Up @@ -250,6 +259,12 @@ abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Nod
capture = curTag != null && nodeContainsText(curTag) // root level
}

override def endDocument(): Unit = {
// capture the epilogue at the end of the document
epilogue = hStack.init.reverse
hStack = hStack.last :: Nil
}

/**
* Processing instruction.
*/
Expand Down
2 changes: 2 additions & 0 deletions shared/src/main/scala/scala/xml/parsing/MarkupParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
var extIndex = -1

/** holds temporary values of pos */
// Note: this is clearly an override, but if marked as such it causes a "...cannot override a mutable variable"
// error with Scala 3; does it work with Scala 3 if not explicitly marked as an override remains to be seen...
var tmppos: Int = _

/** holds the next character */
Expand Down

0 comments on commit 1af1168

Please sign in to comment.