From d0f14d1b58f814b61162f8aee393ff1c77bf551e Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 25 May 2021 23:06:58 -0400 Subject: [PATCH] fix: DocumentFragment#path checks for error case in libxml 2.9.11+ Fixes #2250 --- ext/nokogiri/xml_node.c | 24 ++++++++++++++++-------- test/xml/test_document.rb | 12 ++++++++++++ test/xml/test_document_fragment.rb | 19 +++++++++++++++++++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 93e37c35a67..5117a200539 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1292,17 +1292,25 @@ get_name(VALUE self) * Returns the path associated with this Node */ static VALUE -path(VALUE self) +noko_xml_node_path(VALUE rb_node) { - xmlNodePtr node; - xmlChar *path ; + xmlNodePtr c_node; + xmlChar *c_path ; VALUE rval; - Data_Get_Struct(self, xmlNode, node); + Data_Get_Struct(rb_node, xmlNode, c_node); + + c_path = xmlGetNodePath(c_node); + if (c_path == NULL) { + // see https://github.com/sparklemotion/nokogiri/issues/2250 + // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we + // do this for now to preserve the behavior across libxml2 versions. + rval = NOKOGIRI_STR_NEW2("?"); + } else { + rval = NOKOGIRI_STR_NEW2(c_path); + xmlFree(c_path); + } - path = xmlGetNodePath(node); - rval = NOKOGIRI_STR_NEW2(path); - xmlFree(path); return rval ; } @@ -1779,7 +1787,7 @@ noko_init_xml_node() rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0); rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0); rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0); - rb_define_method(cNokogiriXmlNode, "path", path, 0); + rb_define_method(cNokogiriXmlNode, "path", noko_xml_node_path, 0); rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1); rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2); rb_define_method(cNokogiriXmlNode, "blank?", blank_eh, 0); diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 2462ebab8be..95780c43ef0 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -1167,6 +1167,18 @@ def initialize(*args) assert_equal("expected Nokogiri::XML::Node but received Nokogiri::XML::NodeSet", e.message); end end + + describe "#path" do + it "should return '/'" do + xml = <<~EOF + + EOF + + doc = Nokogiri::XML::Document.parse(xml) + assert_equal("/", doc.path) + assert_equal(doc, doc.at_xpath(doc.path)) # make sure we can round-trip + end + end end end end diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb index b7de6f34446..348c7d7939f 100644 --- a/test/xml/test_document_fragment.rb +++ b/test/xml/test_document_fragment.rb @@ -363,6 +363,25 @@ def initialize(*args) end end end + + describe "#path" do + it "should return '?'" do + # see https://github.com/sparklemotion/nokogiri/issues/2250 + # this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we + # do this for now to preserve the behavior across libxml2 versions. + xml = <<~EOF + + + EOF + + frag = Nokogiri::XML::DocumentFragment.parse(xml) + assert_equal "?", frag.path + + # # TODO: we should circle back and fix both the `#path` behavior and the `#xpath` + # # behavior so we can round-trip and get the DocumentFragment back again. + # assert_equal(frag, frag.at_xpath(doc.path)) # make sure we can round-trip + end + end end end end