diff --git a/CHANGELOG.md b/CHANGELOG.md index f674775a58b..0bcbb99ccd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ * `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [#1800] * Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [#1531] (Thanks, @ethirajsrinivasan!) * [MRI] Improve installation experience on macOS High Sierra (Darwin). [#1812, #1813] (Thanks, @gpakosz and @nurse!) +* [MRI] Node#dup supports copying a node directly to a new document. See the method documentation for details. +* [MRI] DocumentFragment#dup is now more memory-efficient, avoiding making unnecessary copies. [#1063] * [JRuby] NodeSet has been rewritten to improve performance! [#1795] diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 91e0351e863..8c637c1b6b0 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -113,7 +113,7 @@ static void relink_namespace(xmlNodePtr reparented) && ns != reparented->ns && xmlStrEqual(ns->prefix, reparented->ns->prefix) && xmlStrEqual(ns->href, reparented->ns->href) - ) { + ) { xmlSetNs(reparented, ns); } } @@ -532,22 +532,39 @@ static VALUE internal_subset(VALUE self) /* * call-seq: * dup + * dup(depth) + * dup(depth, new_parent_doc) * - * Copy this node. An optional depth may be passed in, but it defaults - * to a deep copy. 0 is a shallow copy, 1 is a deep copy. + * Copy this node. + * An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy. + * An optional new_parent_doc may also be passed in, which will be the new + * node's parent document. Defaults to the current node's document. + * current document. */ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) { - VALUE level; + VALUE r_level, r_new_parent_doc; + int level; + int n_args; + xmlDocPtr new_parent_doc; xmlNodePtr node, dup; - if(rb_scan_args(argc, argv, "01", &level) == 0) { - level = INT2NUM((long)1); + Data_Get_Struct(self, xmlNode, node); + + n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc); + + if (n_args < 1) { + r_level = INT2NUM((long)1); } + level = (int)NUM2INT(r_level); - Data_Get_Struct(self, xmlNode, node); + if (n_args < 2) { + new_parent_doc = node->doc; + } else { + Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc); + } - dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level)); + dup = xmlDocCopyNode(node, new_parent_doc, level); if(dup == NULL) { return Qnil; } nokogiri_root_node(dup); diff --git a/lib/nokogiri/xml/document_fragment.rb b/lib/nokogiri/xml/document_fragment.rb index c2c5bec7350..cfa826b4216 100644 --- a/lib/nokogiri/xml/document_fragment.rb +++ b/lib/nokogiri/xml/document_fragment.rb @@ -25,6 +25,17 @@ def initialize document, tags = nil, ctx = nil children.each { |child| child.parent = self } end + if Nokogiri.uses_libxml? + def dup + new_document = document.dup + new_fragment = XML::DocumentFragment.new(new_document) + children.each do |child| + child.dup(1, new_document).parent = new_fragment + end + new_fragment + end + end + ### # return the name for DocumentFragment def name diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb index c919ee2068f..3ff801e847d 100644 --- a/test/xml/test_document_fragment.rb +++ b/test/xml/test_document_fragment.rb @@ -263,6 +263,20 @@ def test_issue_1077_parsing_of_frozen_strings Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised end + def test_dup_creates_tree_with_identical_structure + original = Nokogiri::XML::DocumentFragment.parse("
hello
hello
hello
hello
", node.children.first.to_html + end + + def test_dup_deep_copy + doc = XML::Document.parse "hello
hello
", node.children.first.to_html + end + + def test_dup_shallow_copy + doc = XML::Document.parse "hello
hello
hello
", duplicate_div.children.first.to_html + end + end + def test_subclass_dup subclass = Class.new(Nokogiri::XML::Node) node = subclass.new('foo', @xml).dup