From 3edd6c8ead186f57924ee175d15817bdaac66977 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 20 Dec 2022 12:24:39 -0500 Subject: [PATCH] feat: HTML5::DocumentFragment#quirks_mode and make sure the behavior of quirks mode matches the discussion at issue #2646. --- ext/nokogiri/gumbo.c | 6 +- lib/nokogiri/html5/document_fragment.rb | 7 ++ test/html5/test_quirks_mode.rb | 87 +++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c index 637e545c1d..8c5cfc7b39 100644 --- a/ext/nokogiri/gumbo.c +++ b/ext/nokogiri/gumbo.c @@ -518,8 +518,11 @@ fragment( // Quirks mode. VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); VALUE dtd = rb_funcall(doc, internal_subset, 0); - if (NIL_P(dtd)) { + VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode"); + if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) { quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS; + } else if (NIL_P(dtd)) { + quirks_mode = GUMBO_DOCTYPE_QUIRKS; } else { VALUE dtd_name = rb_funcall(dtd, name, 0); VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0); @@ -566,6 +569,7 @@ fragment_continue(VALUE parse_args) args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it. xmlNodePtr xml_frag = extract_xml_node(doc_fragment); build_tree(xml_doc, xml_frag, output->root); + rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode)); add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9)); return Qnil; } diff --git a/lib/nokogiri/html5/document_fragment.rb b/lib/nokogiri/html5/document_fragment.rb index bf61fdb3b0..758882bf7e 100644 --- a/lib/nokogiri/html5/document_fragment.rb +++ b/lib/nokogiri/html5/document_fragment.rb @@ -28,6 +28,13 @@ class DocumentFragment < Nokogiri::HTML4::DocumentFragment attr_accessor :document attr_accessor :errors + # Get the parser's quirks mode value. See HTML5::QuirksMode. + # + # This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`). + # + # Since v1.14.0 + attr_reader :quirks_mode + # Create a document fragment. def initialize(doc, tags = nil, ctx = nil, options = {}) self.document = doc diff --git a/test/html5/test_quirks_mode.rb b/test/html5/test_quirks_mode.rb index 411b3afd23..82fa1d6d34 100644 --- a/test/html5/test_quirks_mode.rb +++ b/test/html5/test_quirks_mode.rb @@ -29,4 +29,91 @@ end end end + + describe "DocumentFragment#quirks_mode" do + let(:input) { "

" } + let(:no_quirks_output) { "

" } + let(:quirks_output) { "

" } + + describe "without parsing anything" do + let(:fragment) { Nokogiri::HTML5::DocumentFragment.new(Nokogiri::HTML5::Document.new) } + + it "returns nil" do + assert_nil(fragment.quirks_mode) + end + end + + describe "in context" do + describe "document did not invoke the parser" do + let(:document) { Nokogiri::HTML5::Document.new } + + it "parses the fragment in no-quirks mode" do + context_node = document.create_element("div") + fragment = context_node.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode) + assert_equal(no_quirks_output, fragment.to_html) + end + end + + describe "document has a doctype" do + let(:document) { Nokogiri::HTML5::Document.parse("
") } + + it "parses the fragment in no-quirks mode" do + context_node = document.at_css("div") + fragment = context_node.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode) + assert_equal(no_quirks_output, fragment.to_html) + end + end + + describe "document does not have a doctype" do + let(:document) { Nokogiri::HTML5::Document.parse("
") } + + it "parses the fragment in quirks mode" do + context_node = document.at_css("div") + fragment = context_node.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::QUIRKS, fragment.quirks_mode) + assert_equal(quirks_output, fragment.to_html) + end + end + end + + describe "no context" do + describe "document did not invoke the parser" do + let(:document) { Nokogiri::HTML5::Document.new } + + it "parses the fragment in no-quirks mode" do + fragment = document.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode) + assert_equal(no_quirks_output, fragment.to_html) + end + end + + describe "document has a doctype" do + let(:document) { Nokogiri::HTML5::Document.parse("
") } + + it "parses the fragment in no-quirks mode" do + fragment = document.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode) + assert_equal(no_quirks_output, fragment.to_html) + end + end + + describe "document does not have a doctype" do + let(:document) { Nokogiri::HTML5::Document.parse("
") } + + it "parses the fragment in no-quirks mode" do + fragment = document.fragment(input) + + assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode) + assert_equal(no_quirks_output, fragment.to_html) + end + end + end + end end if Nokogiri.uses_gumbo?