Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-8746: Refactor EdgeTree #878

Merged
merged 21 commits into from
Oct 14, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Polygon2D;
Expand Down Expand Up @@ -103,8 +104,8 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo

return new ConstantScoreWeight(this, boost) {

final Polygon2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
final Component2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createComponentPredicate(tree);

@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Polygon2D;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
Expand Down Expand Up @@ -84,7 +84,7 @@ public void visit(QueryVisitor visitor) {
}
}

private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result, Polygon2D tree, GeoEncodingUtils.PolygonPredicate polygonPredicate,
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result, Component2D tree, GeoEncodingUtils.PolygonPredicate polygonPredicate,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result, Component2D tree, GeoEncodingUtils.PolygonPredicate polygonPredicate,
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {

byte[] minLat, byte[] maxLat, byte[] minLon, byte[] maxLon) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
byte[] minLat, byte[] maxLat, byte[] minLon, byte[] maxLon) {
final Component2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createComponentPredicate(tree);
// bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
final byte minLat[] = new byte[Integer.BYTES];
final byte maxLat[] = new byte[Integer.BYTES];
final byte minLon[] = new byte[Integer.BYTES];
final byte maxLon[] = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(tree.getMinY()), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(tree.getMaxY()), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(tree.getMinX()), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(tree.getMaxX()), maxLon, 0);

return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
Expand Down Expand Up @@ -133,28 +133,25 @@ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);

return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
return tree.relate(cellMinLon, cellMaxLon, cellMinLat, cellMaxLat);
}
};
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

final Component2D tree = Polygon2D.create(polygons);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we push these variables into getIntersectVisitor? That would delay creating the bounding box, Component2D, and PolygonPredicate objects until the visitor is needed and save unnecessary computation when no docs contain point fields.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above suggestion: I think it makes the code here a bit cleaner?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should do that. I think we create one ScorerSupplier per segment so moving that logic to the IntersectVisitor means that we will creating this objects per segment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Thanks for reminding me. I ran into that same problem ages ago as well.

final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createComponentPredicate(tree);
// bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
// these are pre-encoded with LatLonPoint's encoding
final Rectangle box = Rectangle.fromPolygon(polygons);
final byte minLat[] = new byte[Integer.BYTES];
final byte maxLat[] = new byte[Integer.BYTES];
final byte minLon[] = new byte[Integer.BYTES];
final byte maxLon[] = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);

final Polygon2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
NumericUtils.intToSortableBytes(encodeLatitude(tree.getMinY()), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(tree.getMaxY()), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(tree.getMinX()), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(tree.getMaxX()), maxLon, 0);

return new ConstantScoreWeight(this, boost) {

Expand Down
96 changes: 96 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/Component2D.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.geo;

import org.apache.lucene.index.PointValues;

import static org.apache.lucene.geo.GeoUtils.orient;

/**
* 2D Geometry object that supports spatial relationships with bounding boxes,
* triangles and points.
*
* @lucene.internal
**/
public interface Component2D {

/** min X value for the component **/
double getMinX();

/** max X value for the component **/
double getMaxX();

/** min Y value for the component **/
double getMinY();

/** max Y value for the component **/
double getMaxY();

/** relates this component2D with a point **/
boolean contains(double x, double y);

/** relates this component2D with a bounding box **/
PointValues.Relation relate(double minX, double maxX, double minY, double maxY);

/** relates this component2D with a triangle **/
PointValues.Relation relateTriangle(double minX, double maxX, double minY, double maxY,
double aX, double aY, double bX, double bY, double cX, double cY);

/** relates this component2D with a triangle **/
default PointValues.Relation relateTriangle(double aX, double aY, double bX, double bY, double cX, double cY) {
double minY = StrictMath.min(StrictMath.min(aY, bY), cY);
double minX = StrictMath.min(StrictMath.min(aX, bX), cX);
double maxY = StrictMath.max(StrictMath.max(aY, bY), cY);
double maxX = StrictMath.max(StrictMath.max(aX, bX), cX);
return relateTriangle(minX, maxX, minY, maxY, aX, aY, bX, bY, cX, cY);
}

/** Compute whether the bounding boxes are disjoint **/
static boolean disjoint(double minX1, double maxX1, double minY1, double maxY1, double minX2, double maxX2, double minY2, double maxY2) {
return (maxY1 < minY2 || minY1 > maxY2 || maxX1 < minX2 || minX1 > maxX2);
}

/** Compute whether the first bounding box 1 is within the second bounding box **/
static boolean within(double minX1, double maxX1, double minY1, double maxY1, double minX2, double maxX2, double minY2, double maxY2) {
return (minY2 < minY1 && maxY2 > maxY1&& minX2 < minX1 && maxX2 > maxX1);
iverase marked this conversation as resolved.
Show resolved Hide resolved
}

/** returns true if rectangle (defined by minX, maxX, minY, maxY) contains the X Y point */
static boolean containsPoint(final double x, final double y, final double minX, final double maxX, final double minY, final double maxY) {
return x >= minX && x <= maxX && y >= minY && y <= maxY;
}

/**
* Compute whether the given x, y point is in a triangle; uses the winding order method */
static boolean pointInTriangle(double minX, double maxX, double minY, double maxY, double x, double y, double aX, double aY, double bX, double bY, double cX, double cY) {
//check the bounding box because if the triangle is degenerated, e.g points and lines, we need to filter out
//coplanar points that are not part of the triangle.
if (x >= minX && x <= maxX && y >= minY && y <= maxY) {
int a = orient(x, y, aX, aY, bX, bY);
int b = orient(x, y, bX, bY, cX, cY);
if (a == 0 || b == 0 || a < 0 == b < 0) {
int c = orient(x, y, cX, cY, aX, aY);
return c == 0 || (c < 0 == (b < 0 || a < 0));
}
return false;
} else {
return false;
}
}

}
206 changes: 206 additions & 0 deletions lucene/core/src/java/org/apache/lucene/geo/ComponentTree.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.geo;

import java.util.Comparator;

import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;

/**
* 2D multi-component geometry implementation represented as an interval tree of components.
* <p>
* Construction takes {@code O(n log n)} time for sorting and tree construction.
*
* @lucene.internal
*/
final class ComponentTree implements Component2D {
/** minimum latitude of this geometry's bounding box area */
private double minY;
/** maximum latitude of this geometry's bounding box area */
private double maxY;
/** minimum longitude of this geometry's bounding box area */
private double minX;
/** maximum longitude of this geometry's bounding box area */
private double maxX;
// child components, or null. Note internal nodes might mot have
// a consistent bounding box. Internal nodes should not be accessed
// outside if this class.
private Component2D left;
private Component2D right;
/** which dimension was this node split on */
// TODO: its implicit based on level, but boolean keeps code simple
final private boolean splitX;
/** root node of edge tree */
final private Component2D component;

protected ComponentTree(Component2D component, boolean splitX) {
this.minY = component.getMinY();
this.maxY = component.getMaxY();
this.minX = component.getMinX();
this.maxX = component.getMaxX();
this.component = component;
this.splitX = splitX;
}

@Override
public double getMinX() {
return minX;
}

@Override
public double getMaxX() {
return maxX;
}

@Override
public double getMinY() {
return minY;
}

@Override
public double getMaxY() {
return maxY;
}

@Override
public boolean contains(double x, double y) {
if (y <= this.maxY && x <= this.maxX) {
if (component.contains(x, y)) {
return true;
}
if (left != null) {
if (left.contains(x, y)) {
return true;
}
}
if (right != null && ((splitX == false && y >= this.component.getMinY()) || (splitX && x >= this.component.getMinX()))) {
if (right.contains(x, y)) {
return true;
}
}
}
return false;
}

/** Returns relation to the provided triangle */
@Override
public Relation relateTriangle(double minX, double maxX, double minY, double maxY,
double ax, double ay, double bx, double by, double cx, double cy) {
if (minY <= this.maxY && minX <= this.maxX) {
Relation relation = component.relateTriangle(minX, maxX, minY, maxY, ax, ay, bx, by, cx, cy);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
if (left != null) {
relation = left.relateTriangle(minX, maxX, minY, maxY, ax, ay, bx, by, cx, cy);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
}
if (right != null && ((splitX == false && maxY >= this.component.getMinY()) || (splitX && maxX >= this.component.getMinX()))) {
relation = right.relateTriangle(minX, maxX, minY, maxY, ax, ay, bx, by, cx, cy);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
}
}
return Relation.CELL_OUTSIDE_QUERY;
}

/** Returns relation to the provided rectangle */
@Override
public Relation relate(double minX, double maxX, double minY, double maxY) {
if (minY <= this.maxY && minX <= this.maxX) {
Relation relation = component.relate(minX, maxX, minY, maxY);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
if (left != null) {
relation = left.relate(minX, maxX, minY, maxY);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
}
if (right != null && ((splitX == false && maxY >= this.component.getMinY()) || (splitX && maxX >= this.component.getMinX()))) {
relation = right.relate(minX, maxX, minY, maxY);
if (relation != Relation.CELL_OUTSIDE_QUERY) {
return relation;
}
}
}
return Relation.CELL_OUTSIDE_QUERY;
}

/** Creates tree from provided components */
public static Component2D create(Component2D[] components) {
if (components.length == 1) {
return components[0];
}
ComponentTree root = createTree(components, 0, components.length - 1, false);
// pull up min values for the root node so it contains a consistent bounding box
for (Component2D component : components) {
root.minY = Math.min(root.minY, component.getMinY());
root.minX = Math.min(root.minX, component.getMinX());
}
return root;
}

/** Creates tree from sorted components (with range low and high inclusive) */
private static ComponentTree createTree(Component2D[] components, int low, int high, boolean splitX) {
if (low > high) {
return null;
}
final int mid = (low + high) >>> 1;
if (low < high) {
Comparator<Component2D> comparator;
if (splitX) {
comparator = (left, right) -> {
int ret = Double.compare(left.getMinX(), right.getMinX());
if (ret == 0) {
ret = Double.compare(left.getMaxX(), right.getMaxX());
}
return ret;
};
} else {
comparator = (left, right) -> {
int ret = Double.compare(left.getMinY(), right.getMinY());
if (ret == 0) {
ret = Double.compare(left.getMaxY(), right.getMaxY());
}
return ret;
};
}
ArrayUtil.select(components, low, high + 1, mid, comparator);
}
ComponentTree newNode = new ComponentTree(components[mid], splitX);
// find children
newNode.left = createTree(components, low, mid - 1, !splitX);
newNode.right = createTree(components, mid + 1, high, !splitX);

// pull up max values to this node
if (newNode.left != null) {
newNode.maxX = Math.max(newNode.maxX, newNode.left.getMaxX());
newNode.maxY = Math.max(newNode.maxY, newNode.left.getMaxY());
}
if (newNode.right != null) {
newNode.maxX = Math.max(newNode.maxX, newNode.right.getMaxX());
newNode.maxY = Math.max(newNode.maxY, newNode.right.getMaxY());
}
return newNode;
}
}
Loading