Skip to content

Commit

Permalink
Rewrite XmlNodeSet
Browse files Browse the repository at this point in the history
The current implementation suffers from having poor performance.  More
specifically, `op_or' is order of magnitude slower than the MRI version slows
down XMLNodeSet::xpath and XMLNodeSet::css significantly.  This
commit rewrites the class to manage its own internal array, similar to what
libxml does.

fixes #1795
  • Loading branch information
jvshahid committed Sep 15, 2018
1 parent 7b8cd0f commit 9f69b84
Show file tree
Hide file tree
Showing 7 changed files with 347 additions and 154 deletions.
36 changes: 20 additions & 16 deletions ext/java/nokogiri/XmlDtd.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil;
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.SaveContextVisitor;
import static org.jruby.runtime.Helpers.invoke;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.xerces.xni.QName;
import org.cyberneko.dtd.DTDConfiguration;
Expand All @@ -54,6 +56,9 @@
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.SaveContextVisitor;

/**
* Class for Nokogiri::XML::DTD
*
Expand All @@ -64,8 +69,6 @@

@JRubyClass(name="Nokogiri::XML::DTD", parent="Nokogiri::XML::Node")
public class XmlDtd extends XmlNode {
protected RubyArray allDecls = null;

/** cache of children, Nokogiri::XML::NodeSet */
protected IRubyObject children = null;

Expand Down Expand Up @@ -369,7 +372,6 @@ protected void extractDecls(ThreadContext context) {
Ruby runtime = context.getRuntime();

// initialize data structures
allDecls = RubyArray.newArray(runtime);
attributes = RubyHash.newHash(runtime);
elements = RubyHash.newHash(runtime);
entities = RubyHash.newHash(runtime);
Expand All @@ -379,10 +381,9 @@ protected void extractDecls(ThreadContext context) {

// recursively extract decls
if (node == null) return; // leave all the decl hash's empty
extractDecls(context, node.getFirstChild());

// convert allDecls to a NodeSet
children = XmlNodeSet.newXmlNodeSet(context, allDecls);
children = XmlNodeSet.newXmlNodeSet(context, extractDecls(context, node.getFirstChild()));

// add attribute decls as attributes to the matching element decl
RubyArray keys = attributes.keys();
Expand Down Expand Up @@ -425,25 +426,26 @@ protected void extractDecls(ThreadContext context) {
* subset it extracts everything and assumess <code>node</code>
* and all children are part of the external subset.
*/
protected void extractDecls(ThreadContext context, Node node) {
protected IRubyObject[] extractDecls(ThreadContext context, Node node) {
List<IRubyObject> decls = new ArrayList<IRubyObject>();
while (node != null) {
if (isExternalSubset(node)) {
return;
break;
} else if (isAttributeDecl(node)) {
XmlAttributeDecl decl = (XmlAttributeDecl)
XmlAttributeDecl.create(context, node);
attributes.op_aset(context, decl.attribute_name(context), decl);
allDecls.append(decl);
decls.add(decl);
} else if (isElementDecl(node)) {
XmlElementDecl decl = (XmlElementDecl)
XmlElementDecl.create(context, node);
elements.op_aset(context, decl.element_name(context), decl);
allDecls.append(decl);
decls.add(decl);
} else if (isEntityDecl(node)) {
XmlEntityDecl decl = (XmlEntityDecl)
XmlEntityDecl.create(context, node);
entities.op_aset(context, decl.node_name(context), decl);
allDecls.append(decl);
decls.add(decl);
} else if (isNotationDecl(node)) {
XmlNode tmp = (XmlNode)
NokogiriHelpers.constructNode(context.getRuntime(), node);
Expand All @@ -453,7 +455,7 @@ protected void extractDecls(ThreadContext context, Node node) {
tmp.getAttribute(context, "sysid"));
notations.op_aset(context,
tmp.getAttribute(context, "name"), decl);
allDecls.append(decl);
decls.add(decl);
} else if (isContentModel(node)) {
XmlElementContent cm =
new XmlElementContent(context.getRuntime(),
Expand All @@ -462,13 +464,15 @@ protected void extractDecls(ThreadContext context, Node node) {
contentModels.op_aset(context, cm.element_name(context), cm);
} else {
// recurse
extractDecls(context, node.getFirstChild());
decls.addAll(Arrays.asList(extractDecls(context, node.getFirstChild())));
}

node = node.getNextSibling();
}

return decls.toArray(new IRubyObject[decls.size()]);
}

@Override
public void accept(ThreadContext context, SaveContextVisitor visitor) {
// since we use nekoDTD to parse dtd, node might be ElementImpl type
Expand Down
12 changes: 6 additions & 6 deletions ext/java/nokogiri/XmlElement.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@

package nokogiri;

import nokogiri.internals.SaveContextVisitor;

import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.anno.JRubyClass;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import nokogiri.internals.SaveContextVisitor;

/**
* Class for Nokogiri::XML::Element
*
Expand Down Expand Up @@ -71,9 +71,9 @@ public void accept(ThreadContext context, SaveContextVisitor visitor) {
visitor.enter((Element) node);
XmlNodeSet xmlNodeSet = (XmlNodeSet) children(context);
if (xmlNodeSet.length() > 0) {
RubyArray nodes = xmlNodeSet.nodes;
for( int i = 0; i < nodes.size(); i++ ) {
Object item = nodes.eltInternal(i);
IRubyObject[] nodes = XmlNodeSet.getNodes(context, xmlNodeSet);
for( int i = 0; i < nodes.length; i++ ) {
Object item = nodes[i];
if (item instanceof XmlNode) {
((XmlNode) item).accept(context, visitor);
}
Expand Down
38 changes: 23 additions & 15 deletions ext/java/nokogiri/XmlNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,16 @@
package nokogiri;

import static java.lang.Math.max;
import static nokogiri.internals.NokogiriHelpers.*;
import static nokogiri.internals.NokogiriHelpers.clearXpathContext;
import static nokogiri.internals.NokogiriHelpers.convertEncoding;
import static nokogiri.internals.NokogiriHelpers.convertString;
import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
import static nokogiri.internals.NokogiriHelpers.isBlank;
import static nokogiri.internals.NokogiriHelpers.nodeArrayToRubyArray;
import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil;
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
import static nokogiri.internals.NokogiriHelpers.stringOrNil;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
Expand All @@ -43,18 +52,12 @@
import java.util.Iterator;
import java.util.List;

import nokogiri.internals.HtmlDomParserContext;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.NokogiriNamespaceCache;
import nokogiri.internals.SaveContextVisitor;
import nokogiri.internals.XmlDomParserContext;

import org.apache.xerces.dom.CoreDocumentImpl;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyInteger;
import org.jruby.RubyFixnum;
import org.jruby.RubyInteger;
import org.jruby.RubyModule;
import org.jruby.RubyObject;
import org.jruby.RubyString;
Expand All @@ -76,6 +79,12 @@
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import nokogiri.internals.HtmlDomParserContext;
import nokogiri.internals.NokogiriHelpers;
import nokogiri.internals.NokogiriNamespaceCache;
import nokogiri.internals.SaveContextVisitor;
import nokogiri.internals.XmlDomParserContext;

/**
* Class for Nokogiri::XML::Node
*
Expand Down Expand Up @@ -695,7 +704,7 @@ public IRubyObject child(ThreadContext context) {

@JRubyMethod
public IRubyObject children(ThreadContext context) {
XmlNodeSet xmlNodeSet = XmlNodeSet.create(context.runtime);
XmlNodeSet xmlNodeSet = XmlNodeSet.newEmptyNodeSet(context);

NodeList nodeList = node.getChildNodes();
if (nodeList.getLength() > 0) {
Expand Down Expand Up @@ -728,8 +737,8 @@ public IRubyObject last_element_child(ThreadContext context) {
public IRubyObject element_children(ThreadContext context) {
List<Node> elementNodes = new ArrayList<Node>();
addElements(node, elementNodes, false);
if (elementNodes.size() == 0) return XmlNodeSet.newEmptyNodeSet(context);
RubyArray array = NokogiriHelpers.nodeArrayToRubyArray(context.getRuntime(), elementNodes.toArray(new Node[0]));
IRubyObject[] array = NokogiriHelpers.nodeArrayToArray(context.runtime,
elementNodes.toArray(new Node[0]));
XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, array);
return xmlNodeSet;
}
Expand Down Expand Up @@ -833,7 +842,7 @@ public IRubyObject in_context(ThreadContext context,
documentErrors.add(docErrors.get(i));
}
document.setInstanceVariable("@errors", documentErrors);
XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, RubyArray.newArray(runtime));
XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, new IRubyObject[0]);
return xmlNodeSet;
}

Expand All @@ -845,10 +854,9 @@ public IRubyObject in_context(ThreadContext context,
} else {
first = doc.node.getFirstChild();
}
RubyArray nodeArray = RubyArray.newArray(runtime);
nodeArray.add(NokogiriHelpers.getCachedNodeOrCreate(runtime, first));

XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, nodeArray);
IRubyObject[] nodes = new IRubyObject[]{NokogiriHelpers.getCachedNodeOrCreate(runtime, first)};
XmlNodeSet xmlNodeSet = XmlNodeSet.newXmlNodeSet(context, nodes);
return xmlNodeSet;
}

Expand Down
Loading

0 comments on commit 9f69b84

Please sign in to comment.