From 21d1ab21c7ab87fd1bea27a9549e07f08c0ae3ae Mon Sep 17 00:00:00 2001 From: Lars Kanis Date: Sat, 21 Nov 2015 23:47:05 +0100 Subject: [PATCH] Revert capturing of errors while xmlCopyDoc()/xmlDocCopyNode() and sync behavior with JRuby. This was introduced due to https://github.com/sparklemotion/nokogiri/issues/1196 and https://github.com/sparklemotion/nokogiri/issues/1208 . However it turned out, that the change in libxml-2.9.2 was a regression, that was fixed in: https://bugzilla.gnome.org/show_bug.cgi?id=737840 and libxml-2.9.3. If I read the libxml sources right, it seems, that xmlDocCopyNode() is not intended to emit any such warnings at all. Only errors leading to a failure of the function are emitted. However these errors should be reported to ruby space in the form of exceptions (this is not yet implemented - currently either nil is returned or a generic error text is raised). This patch also synchronizes the behavior on MRI to that of JRuby, so that the error list is filled from the parser only and that it is shared after Document#dup . --- ext/nokogiri/xml_document.c | 6 ++---- ext/nokogiri/xml_node.c | 10 +--------- test/html/test_document.rb | 17 +++++++++-------- test/html/test_document_fragment.rb | 21 +++++++++++---------- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index d8adbbface..d5f2733fb6 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -322,22 +322,20 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) xmlDocPtr doc, dup; VALUE copy; VALUE level; - VALUE error_list = rb_ary_new(); + VALUE error_list; if(rb_scan_args(argc, argv, "01", &level) == 0) level = INT2NUM((long)1); Data_Get_Struct(self, xmlDoc, doc); - xmlResetLastError(); - xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); dup = xmlCopyDoc(doc, (int)NUM2INT(level)); - xmlSetStructuredErrorFunc(NULL, NULL); if(dup == NULL) return Qnil; dup->type = doc->type; copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup); + error_list = rb_iv_get(self, "@errors"); rb_iv_set(copy, "@errors", error_list); return copy ; } diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 75d51559a8..e82108af27 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -234,15 +234,7 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep * reparent the actual reparentee, so we reparent a duplicate. */ nokogiri_root_node(reparentee); - - xmlResetLastError(); - xmlSetStructuredErrorFunc((void *)rb_iv_get(DOC_RUBY_OBJECT(pivot->doc), "@errors"), Nokogiri_error_array_pusher); - - reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1) ; - - xmlSetStructuredErrorFunc(NULL, NULL); - - if (! reparentee) { + if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) { rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); } } diff --git a/test/html/test_document.rb b/test/html/test_document.rb index b013f35a9e..69a0ea7ea0 100644 --- a/test/html/test_document.rb +++ b/test/html/test_document.rb @@ -628,19 +628,20 @@ def test_capturing_nonparse_errors_during_document_clone end def test_capturing_nonparse_errors_during_node_copy_between_docs - skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby? - - doc1 = Nokogiri::HTML("
one
") - doc2 = Nokogiri::HTML("
two
") + # Errors should be emitted while parsing only, and should not change when moving nodes. + doc1 = Nokogiri::HTML("one") + doc2 = Nokogiri::HTML("two") node1 = doc1.at_css("#unique") node2 = doc2.at_css("#unique") - - original_errors = doc1.errors.dup + original_errors1 = doc1.errors.dup + original_errors2 = doc2.errors.dup + assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name" + assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name" node1.add_child node2 - assert_equal original_errors.length+1, doc1.errors.length - assert_match(/ID unique already defined/, doc1.errors.last.to_s) + assert_equal original_errors1, doc1.errors + assert_equal original_errors2, doc2.errors end def test_silencing_nonparse_errors_during_attribute_insertion_1262 diff --git a/test/html/test_document_fragment.rb b/test/html/test_document_fragment.rb index ed166806a9..9c60e1fd30 100644 --- a/test/html/test_document_fragment.rb +++ b/test/html/test_document_fragment.rb @@ -270,7 +270,7 @@ def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_ def test_capturing_nonparse_errors_during_fragment_clone # see https://github.com/sparklemotion/nokogiri/issues/1196 for background - original = Nokogiri::HTML.fragment("
") + original = Nokogiri::HTML.fragment("
") original_errors = original.errors.dup copy = original.dup @@ -278,19 +278,20 @@ def test_capturing_nonparse_errors_during_fragment_clone end def test_capturing_nonparse_errors_during_node_copy_between_fragments - skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby? - - frag1 = Nokogiri::HTML.fragment("
one
") - frag2 = Nokogiri::HTML.fragment("
two
") + # Errors should be emitted while parsing only, and should not change when moving nodes. + frag1 = Nokogiri::HTML.fragment("one") + frag2 = Nokogiri::HTML.fragment("two") node1 = frag1.at_css("#unique") node2 = frag2.at_css("#unique") + original_errors1 = frag1.errors.dup + original_errors2 = frag2.errors.dup + assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name" + assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name" - original_errors = frag1.errors.dup - - node1.add_child node2 # we should also not see an error on stderr + node1.add_child node2 - assert_equal original_errors.length+1, frag1.errors.length - assert_match(/ID unique already defined/, frag1.errors.last.to_s) + assert_equal original_errors1, frag1.errors + assert_equal original_errors2, frag2.errors end end end