Skip to content

Commit

Permalink
feat: HTML5::DocumentFragment#quirks_mode
Browse files Browse the repository at this point in the history
and make sure the behavior of quirks mode matches the discussion at
issue #2646.
  • Loading branch information
flavorjones committed Dec 20, 2022
1 parent 1fd1aa9 commit 3edd6c8
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 1 deletion.
6 changes: 5 additions & 1 deletion ext/nokogiri/gumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,11 @@ fragment(
// Quirks mode.
VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
VALUE dtd = rb_funcall(doc, internal_subset, 0);
if (NIL_P(dtd)) {
VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode");
if (NIL_P(ctx) || NIL_P(doc_quirks_mode)) {
quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
} else if (NIL_P(dtd)) {
quirks_mode = GUMBO_DOCTYPE_QUIRKS;
} else {
VALUE dtd_name = rb_funcall(dtd, name, 0);
VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
Expand Down Expand Up @@ -566,6 +569,7 @@ fragment_continue(VALUE parse_args)
args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
build_tree(xml_doc, xml_frag, output->root);
rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
return Qnil;
}
Expand Down
7 changes: 7 additions & 0 deletions lib/nokogiri/html5/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
attr_accessor :document
attr_accessor :errors

# Get the parser's quirks mode value. See HTML5::QuirksMode.
#
# This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`).
#
# Since v1.14.0
attr_reader :quirks_mode

# Create a document fragment.
def initialize(doc, tags = nil, ctx = nil, options = {})
self.document = doc
Expand Down
87 changes: 87 additions & 0 deletions test/html5/test_quirks_mode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,91 @@
end
end
end

describe "DocumentFragment#quirks_mode" do
let(:input) { "<p><table>" }
let(:no_quirks_output) { "<p></p><table></table>" }
let(:quirks_output) { "<p><table></table></p>" }

describe "without parsing anything" do
let(:fragment) { Nokogiri::HTML5::DocumentFragment.new(Nokogiri::HTML5::Document.new) }

it "returns nil" do
assert_nil(fragment.quirks_mode)
end
end

describe "in context" do
describe "document did not invoke the parser" do
let(:document) { Nokogiri::HTML5::Document.new }

it "parses the fragment in no-quirks mode" do
context_node = document.create_element("div")
fragment = context_node.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode)
assert_equal(no_quirks_output, fragment.to_html)
end
end

describe "document has a doctype" do
let(:document) { Nokogiri::HTML5::Document.parse("<!DOCTYPE html><div>") }

it "parses the fragment in no-quirks mode" do
context_node = document.at_css("div")
fragment = context_node.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode)
assert_equal(no_quirks_output, fragment.to_html)
end
end

describe "document does not have a doctype" do
let(:document) { Nokogiri::HTML5::Document.parse("<div>") }

it "parses the fragment in quirks mode" do
context_node = document.at_css("div")
fragment = context_node.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::QUIRKS, fragment.quirks_mode)
assert_equal(quirks_output, fragment.to_html)
end
end
end

describe "no context" do
describe "document did not invoke the parser" do
let(:document) { Nokogiri::HTML5::Document.new }

it "parses the fragment in no-quirks mode" do
fragment = document.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode)
assert_equal(no_quirks_output, fragment.to_html)
end
end

describe "document has a doctype" do
let(:document) { Nokogiri::HTML5::Document.parse("<!DOCTYPE html><div>") }

it "parses the fragment in no-quirks mode" do
fragment = document.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode)
assert_equal(no_quirks_output, fragment.to_html)
end
end

describe "document does not have a doctype" do
let(:document) { Nokogiri::HTML5::Document.parse("<div>") }

it "parses the fragment in no-quirks mode" do
fragment = document.fragment(input)

assert_equal(Nokogiri::HTML5::QuirksMode::NO_QUIRKS, fragment.quirks_mode)
assert_equal(no_quirks_output, fragment.to_html)
end
end
end
end
end if Nokogiri.uses_gumbo?

0 comments on commit 3edd6c8

Please sign in to comment.