Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #1063: poor memory performance when duping DocumentFragment #1834

Merged
merged 5 commits into from
Dec 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [#1800]
* Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [#1531] (Thanks, @ethirajsrinivasan!)
* [MRI] Improve installation experience on macOS High Sierra (Darwin). [#1812, #1813] (Thanks, @gpakosz and @nurse!)
* [MRI] Node#dup supports copying a node directly to a new document. See the method documentation for details.
* [MRI] DocumentFragment#dup is now more memory-efficient, avoiding making unnecessary copies. [#1063]
* [JRuby] NodeSet has been rewritten to improve performance! [#1795]


Expand Down
33 changes: 25 additions & 8 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ static void relink_namespace(xmlNodePtr reparented)
&& ns != reparented->ns
&& xmlStrEqual(ns->prefix, reparented->ns->prefix)
&& xmlStrEqual(ns->href, reparented->ns->href)
) {
) {
xmlSetNs(reparented, ns);
}
}
Expand Down Expand Up @@ -532,22 +532,39 @@ static VALUE internal_subset(VALUE self)
/*
* call-seq:
* dup
* dup(depth)
* dup(depth, new_parent_doc)
*
* Copy this node. An optional depth may be passed in, but it defaults
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
* Copy this node.
* An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy.
* An optional new_parent_doc may also be passed in, which will be the new
* node's parent document. Defaults to the current node's document.
* current document.
*/
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
{
VALUE level;
VALUE r_level, r_new_parent_doc;
int level;
int n_args;
xmlDocPtr new_parent_doc;
xmlNodePtr node, dup;

if(rb_scan_args(argc, argv, "01", &level) == 0) {
level = INT2NUM((long)1);
Data_Get_Struct(self, xmlNode, node);

n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);

if (n_args < 1) {
r_level = INT2NUM((long)1);
}
level = (int)NUM2INT(r_level);

Data_Get_Struct(self, xmlNode, node);
if (n_args < 2) {
new_parent_doc = node->doc;
} else {
Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
}

dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
dup = xmlDocCopyNode(node, new_parent_doc, level);
if(dup == NULL) { return Qnil; }

nokogiri_root_node(dup);
Expand Down
11 changes: 11 additions & 0 deletions lib/nokogiri/xml/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ def initialize document, tags = nil, ctx = nil
children.each { |child| child.parent = self }
end

if Nokogiri.uses_libxml?
def dup
new_document = document.dup
new_fragment = XML::DocumentFragment.new(new_document)
children.each do |child|
child.dup(1, new_document).parent = new_fragment
end
new_fragment
end
end

###
# return the name for DocumentFragment
def name
Expand Down
14 changes: 14 additions & 0 deletions test/xml/test_document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,20 @@ def test_issue_1077_parsing_of_frozen_strings
Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised
end

def test_dup_creates_tree_with_identical_structure
original = Nokogiri::XML::DocumentFragment.parse("<div><p>hello</p></div>")
duplicate = original.dup
assert_equal original.to_html, duplicate.to_html
end

def test_dup_creates_mutable_tree
original = Nokogiri::XML::DocumentFragment.parse("<div><p>hello</p></div>")
duplicate = original.dup
duplicate.at_css("div").add_child("<b>hello there</b>")
assert_nil original.at_css("b")
assert_not_nil duplicate.at_css("b")
end

if Nokogiri.uses_libxml?
def test_for_libxml_in_context_fragment_parsing_bug_workaround
10.times do
Expand Down
38 changes: 38 additions & 0 deletions test/xml/test_node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,44 @@ def test_parse_with_unparented_html_fragment_text_context_node
assert_equal x.first.name, "span"
end

def test_dup_is_deep_copy_by_default
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup
assert_equal 1, node.children.length
assert_equal "<p>hello</p>", node.children.first.to_html
end

def test_dup_deep_copy
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup(1)
assert_equal 1, node.children.length
assert_equal "<p>hello</p>", node.children.first.to_html
end

def test_dup_shallow_copy
doc = XML::Document.parse "<root><div><p>hello</p></div></root>"
div = doc.at_css "div"
node = div.dup(0)
assert_equal 0, node.children.length
end

if Nokogiri.uses_libxml?
def test_dup_to_another_document
doc1 = HTML::Document.parse "<root><div><p>hello</p></div></root>"
doc2 = HTML::Document.parse "<div></div>"

div = doc1.at_css "div"
duplicate_div = div.dup(1, doc2)

assert_not_nil doc1.at_css("div")
assert_equal doc2, duplicate_div.document
assert_equal 1, duplicate_div.children.length
assert_equal "<p>hello</p>", duplicate_div.children.first.to_html
end
end

def test_subclass_dup
subclass = Class.new(Nokogiri::XML::Node)
node = subclass.new('foo', @xml).dup
Expand Down