Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-8477: Automatically rewrite disjunctions when internal gaps matter #620

Merged
merged 7 commits into from
Mar 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.search.intervals;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

class BlockIntervalsSource extends ConjunctionIntervalsSource {

static IntervalsSource build(List<IntervalsSource> subSources) {
if (subSources.size() == 1) {
return subSources.get(0);
}
return Intervals.or(Disjunctions.pullUp(subSources, BlockIntervalsSource::new));
}

private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
if (s instanceof BlockIntervalsSource) {
flattened.addAll(((BlockIntervalsSource)s).subSources);
}
else {
flattened.add(s);
}
}
return flattened;
}

private BlockIntervalsSource(List<IntervalsSource> sources) {
super(flatten(sources), true);
}

@Override
protected IntervalIterator combine(List<IntervalIterator> iterators) {
return new BlockIntervalIterator(iterators);
}

@Override
public int minExtent() {
int minExtent = 0;
for (IntervalsSource subSource : subSources) {
minExtent += subSource.minExtent();
}
return minExtent;
}

@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return Collections.singletonList(this); // Disjunctions already pulled up in build()
}

@Override
public int hashCode() {
return Objects.hash(subSources);
}

@Override
public boolean equals(Object other) {
if (other instanceof BlockIntervalsSource == false) return false;
BlockIntervalsSource b = (BlockIntervalsSource) other;
return Objects.equals(this.subSources, b.subSources);
}

@Override
public String toString() {
return "BLOCK(" + subSources.stream().map(IntervalsSource::toString).collect(Collectors.joining(",")) + ")";
}

private static class BlockIntervalIterator extends ConjunctionIntervalIterator {

int start = -1, end = -1;

BlockIntervalIterator(List<IntervalIterator> subIterators) {
super(subIterators);
}

@Override
public int start() {
return start;
}

@Override
public int end() {
return end;
}

@Override
public int gaps() {
return 0;
}

@Override
public int nextInterval() throws IOException {
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
return start = end = IntervalIterator.NO_MORE_INTERVALS;
int i = 1;
while (i < subIterators.size()) {
while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) {
if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
return start = end = IntervalIterator.NO_MORE_INTERVALS;
}
if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) {
i = i + 1;
}
else {
if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
return start = end = IntervalIterator.NO_MORE_INTERVALS;
i = 1;
}
}
start = subIterators.get(0).start();
end = subIterators.get(subIterators.size() - 1).end();
return start;
}

@Override
protected void reset() {
start = end = -1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

import org.apache.lucene.index.LeafReaderContext;
Expand All @@ -31,28 +30,15 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;

class ConjunctionIntervalsSource extends IntervalsSource {
abstract class ConjunctionIntervalsSource extends IntervalsSource {

protected final List<IntervalsSource> subSources;
protected final IntervalFunction function;
protected final boolean isMinimizing;

ConjunctionIntervalsSource(List<IntervalsSource> subSources, IntervalFunction function) {
protected ConjunctionIntervalsSource(List<IntervalsSource> subSources, boolean isMinimizing) {
assert subSources.size() > 1;
this.subSources = subSources;
this.function = function;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ConjunctionIntervalsSource that = (ConjunctionIntervalsSource) o;
return Objects.equals(subSources, that.subSources) &&
Objects.equals(function, that.function);
}

@Override
public String toString() {
return function + subSources.stream().map(Object::toString).collect(Collectors.joining(",", "(", ")"));
this.isMinimizing = isMinimizing;
}

@Override
Expand All @@ -65,49 +51,40 @@ public void visit(String field, QueryVisitor visitor) {
}

@Override
public int minExtent() {
int minExtent = 0;
for (IntervalsSource source : subSources) {
minExtent += source.minExtent();
}
return minExtent;
}

@Override
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
public final IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
List<IntervalIterator> subIntervals = new ArrayList<>();
for (IntervalsSource source : subSources) {
IntervalIterator it = source.intervals(field, ctx);
if (it == null)
return null;
subIntervals.add(it);
}
return function.apply(subIntervals);
return combine(subIntervals);
}

protected abstract IntervalIterator combine(List<IntervalIterator> iterators);

@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
List<MatchesIterator> subs = new ArrayList<>();
for (IntervalsSource source : subSources) {
MatchesIterator mi = source.matches(field, ctx, doc);
if (mi == null) {
return null;
}
if (isMinimizing) {
mi = new CachingMatchesIterator(mi);
}
subs.add(mi);
}
IntervalIterator it = function.apply(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
IntervalIterator it = combine(subs.stream().map(m -> IntervalMatches.wrapMatches(m, doc)).collect(Collectors.toList()));
if (it.advance(doc) != doc) {
return null;
}
if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
return null;
}
return new ConjunctionMatchesIterator(it, subs);
}

@Override
public int hashCode() {
return Objects.hash(subSources, function);
return isMinimizing ? new MinimizingConjunctionMatchesIterator(it, subs) : new ConjunctionMatchesIterator(it, subs);
}

private static class ConjunctionMatchesIterator implements MatchesIterator {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.search.intervals;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;

class ContainedByIntervalsSource extends ConjunctionIntervalsSource {

static IntervalsSource build(IntervalsSource small, IntervalsSource big) {
return Intervals.or(Disjunctions.pullUp(big, s -> new ContainedByIntervalsSource(small, s)));
}

private final IntervalsSource small;
private final IntervalsSource big;

private ContainedByIntervalsSource(IntervalsSource small, IntervalsSource big) {
super(Arrays.asList(small, big), false);
this.small = small;
this.big = big;
}

@Override
protected IntervalIterator combine(List<IntervalIterator> iterators) {
assert iterators.size() == 2;
IntervalIterator a = iterators.get(0);
IntervalIterator b = iterators.get(1);
return new FilteringIntervalIterator(a, b) {
@Override
public int nextInterval() throws IOException {
if (bpos == false)
return IntervalIterator.NO_MORE_INTERVALS;
while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
while (b.end() < a.end()) {
if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
bpos = false;
return IntervalIterator.NO_MORE_INTERVALS;
}
}
if (b.start() <= a.start())
return a.start();
}
bpos = false;
return IntervalIterator.NO_MORE_INTERVALS;
}
};
}

@Override
public int minExtent() {
return small.minExtent();
}

@Override
public Collection<IntervalsSource> pullUpDisjunctions() {
return Disjunctions.pullUp(big, s -> new ContainedByIntervalsSource(small, s));
}

@Override
public int hashCode() {
return Objects.hashCode(subSources);
}

@Override
public boolean equals(Object other) {
if (other instanceof ContainedByIntervalsSource == false) return false;
ContainedByIntervalsSource o = (ContainedByIntervalsSource) other;
return Objects.equals(this.subSources, o.subSources);
}

@Override
public String toString() {
return "CONTAINED_BY(" + small + "," + big + ")";
}
}
Loading