From 2054024362d60693cb44138e379e87668a7fb115 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 8 Jul 2024 06:59:46 +0900 Subject: [PATCH] Do not output :text event after the root tag is closed. ## Why? GitHub: fix GH-163 ## Change - sax_test.rb ``` require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser' require 'libxml-ruby' require 'nokogiri' xml = < a b c EOS class Listener def method_missing(name, *args) p [name, *args] end end puts "LibXML(SAX)" parser = LibXML::XML::SaxParser.string(xml) parser.callbacks = Listener.new parser.parse puts "" puts "Nokogiri(SAX)" parser = Nokogiri::XML::SAX::Parser.new(Listener.new) parser.parse(xml) puts "" puts "REXML(SAX)" parser = REXML::Parsers::SAX2Parser.new(xml) parser.listen(Listener.new) parser.parse puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? res = parser.pull p res end puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, Listener.new).parse ``` ## Before (rexml 3.3.1) ``` LibXML(SAX) [:on_start_document] [:on_start_element_ns, "root", {}, nil, nil, {}] [:on_characters, " a b c \n"] [:on_end_element_ns, "root", nil, nil] [:on_comment, " ok comment "] [:on_processing_instruction, "abc", "version=\"1.0\" "] [:on_end_document] Nokogiri(SAX) [:start_document] [:start_element_namespace, "root", [], nil, nil, []] [:characters, " a b c \n"] [:end_element_namespace, "root", nil, nil] [:comment, " ok comment "] [:processing_instruction, "abc", "version=\"1.0\" "] [:end_document] REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, " a b c \n"] [:progress, 15] [:end_element, nil, "root", "root"] [:progress, 22] [:characters, "\n"] [:progress, 23] [:comment, " ok comment "] [:progress, 42] [:characters, "\n"] [:progress, 43] [:processing_instruction, "abc", " version=\"1.0\" "] [:progress, 65] [:characters, "\n"] [:progress, 66] [:end_document] REXML(Pull) start_element: ["root", {}] text: [" a b c \n", " a b c \n"] end_element: ["root"] text: ["\n", "\n"] comment: [" ok comment "] text: ["\n", "\n"] processing_instruction: ["abc", " version=\"1.0\" "] text: ["\n", "\n"] REXML(Stream) [:tag_start, "root", {}] [:text, " a b c \n"] [:tag_end, "root"] [:text, "\n"] [:comment, " ok comment "] [:text, "\n"] [:instruction, "abc", " version=\"1.0\" "] [:text, "\n"] ``` ## After(This PR) ``` REXML(SAX) [:start_document] [:start_element, nil, "root", "root", {}] [:progress, 6] [:characters, " a b c \n"] [:progress, 15] [:end_element, nil, "root", "root"] [:progress, 22] [:comment, " ok comment "] [:progress, 42] [:processing_instruction, "abc", " version=\"1.0\" "] [:progress, 65] [:end_document] REXML(Pull) start_element: ["root", {}] text: [" a b c \n", " a b c \n"] end_element: ["root"] comment: [" ok comment "] processing_instruction: ["abc", " version=\"1.0\" "] end_document: [] REXML(Stream) [:tag_start, "root", {}] [:text, " a b c \n"] [:tag_end, "root"] [:comment, " ok comment "] [:instruction, "abc", " version=\"1.0\" "] ``` --- lib/rexml/parsers/baseparser.rb | 1 + test/parse/test_text.rb | 15 +++++++++++++++ test/parser/test_ultra_light.rb | 1 - test/test_core.rb | 2 +- test/test_document.rb | 2 +- 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 2a448e13..5cf1af21 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -477,6 +477,7 @@ def pull_event unless /\A\s*\z/.match?(text) raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) end + return pull_event end return [ :text, text ] end diff --git a/test/parse/test_text.rb b/test/parse/test_text.rb index f1622b71..1acefc40 100644 --- a/test/parse/test_text.rb +++ b/test/parse/test_text.rb @@ -21,5 +21,20 @@ def test_after_root DETAIL end end + + def test_whitespace_characters_after_root + parser = REXML::Parsers::BaseParser.new('b ') + + events = [] + while parser.has_next? + event = parser.pull + case event[0] + when :text + events << event[1] + end + end + + assert_equal(["b"], events) + end end end diff --git a/test/parser/test_ultra_light.rb b/test/parser/test_ultra_light.rb index 44fd1d1e..b3f576ff 100644 --- a/test/parser/test_ultra_light.rb +++ b/test/parser/test_ultra_light.rb @@ -17,7 +17,6 @@ def test_entity_declaration [:entitydecl, "name", "value"] ], [:start_element, :parent, "root", {}], - [:text, "\n"], ], parse(<<-INTERNAL_SUBSET)) diff --git a/test/test_core.rb b/test/test_core.rb index 44e2e7ea..e1fba8a7 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -826,7 +826,7 @@ def test_deep_clone end def test_whitespace_before_root - a = < diff --git a/test/test_document.rb b/test/test_document.rb index 9cd77c4e..33cf4002 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -435,7 +435,7 @@ def test_utf_16 actual_xml = "" document.write(actual_xml) - expected_xml = <<-EOX.encode("UTF-16BE") + expected_xml = <<-EOX.chomp.encode("UTF-16BE") \ufeff Hello world! EOX