Skip to content

Commit

Permalink
LUCENE-8292: Make TermsEnum fully abstract (#574)
Browse files Browse the repository at this point in the history
  • Loading branch information
s1monw committed Feb 15, 2019
1 parent 03945a9 commit fd1fc26
Show file tree
Hide file tree
Showing 31 changed files with 142 additions and 132 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ API Changes
* LUCENE-8609: Remove IndexWriter#numDocs() and IndexWriter#maxDoc() in favor
of IndexWriter#getDocStats(). (Simon Willnauer)

* LUCENE-8292: Make TermsEnum fully abstract. (Simon Willnauer)

Changes in Runtime Behavior

* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
Expand Down
10 changes: 5 additions & 5 deletions lucene/MIGRATE.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Apache Lucene Migration Guide

## TermsEnum.seekExact(BytesRef) is abstract (LUCENE-8662) ##
## TermsEnum is now fully abstract (LUCENE-8292) ##

TermsEnum.seekExact has been changed to abstract, so non-abstract subclass must implement it.
The default implementation can be seekCeil(text) == SeekStatus.FOUND.
This method is performance critical, so subclass SHOULD have its own implementation
if possible instead of using the default implementation.
TermsEnum has been changed to be fully abstract, so non-abstract subclass must implement all it's methods.
Non-Performance critical TermsEnums can use BaseTermsEnum as a base class instead. The change was motivated
by several performance issues with FilterTermsEnum that caused significant slowdowns and massive memory consumption due
to not delegating all method from TermsEnum. See LUCENE-8292 and LUCENE-8662

## Similarity.SimScorer.computeXXXFactor methods removed (LUCENE-8014) ##

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
Expand Down Expand Up @@ -984,7 +985,7 @@ public TermsEnum termsEnum() throws IOException {
}
}

private static class TermsDict extends TermsEnum {
private static class TermsDict extends BaseTermsEnum {

final TermsDictEntry entry;
final LongValues blockAddresses;
Expand Down Expand Up @@ -1031,11 +1032,6 @@ public BytesRef next() throws IOException {
return term;
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public void seekExact(long ord) throws IOException {
if (ord < 0 || ord >= entry.termsDictSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
Expand Down Expand Up @@ -286,7 +287,7 @@ public int getDocCount() throws IOException {
}

// Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum {
private final class SegmentTermsEnum extends BaseTermsEnum {
private final IndexInput in;
private final BlockTermState state;
private final boolean doOrd;
Expand Down Expand Up @@ -685,11 +686,6 @@ public TermState termState() throws IOException {
return ts;
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public void seekExact(long ord) throws IOException {
//System.out.println("BTR.seek by ord ord=" + ord);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import java.io.IOException;

import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
Expand All @@ -34,7 +34,7 @@
import org.apache.lucene.util.fst.FST;

// NOTE: cannot seek!
final class OrdsIntersectTermsEnum extends TermsEnum {
final class OrdsIntersectTermsEnum extends BaseTermsEnum {
final IndexInput in;

private OrdsIntersectTermsEnumFrame[] stack;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@

import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
Expand All @@ -41,7 +41,7 @@
import org.apache.lucene.util.fst.Util;

/** Iterates through terms in this field. */
public final class OrdsSegmentTermsEnum extends TermsEnum {
public final class OrdsSegmentTermsEnum extends BaseTermsEnum {

// Lazy init:
IndexInput in;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
Expand Down Expand Up @@ -291,7 +292,7 @@ public BytesRef getMax() throws IOException {
}
}

static final class BloomFilteredTermsEnum extends TermsEnum {
static final class BloomFilteredTermsEnum extends BaseTermsEnum {
private Terms delegateTerms;
private TermsEnum delegateTermsEnum;
private final FuzzySet filter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
Expand Down Expand Up @@ -707,7 +708,7 @@ public boolean hasPayloads() {
return hasPayloads;
}

private final class DirectTermsEnum extends TermsEnum {
private final class DirectTermsEnum extends BaseTermsEnum {

private final BytesRef scratch = new BytesRef();
private int termOrd;
Expand Down Expand Up @@ -952,7 +953,7 @@ public ImpactsEnum impacts(int flags) throws IOException {
}
}

private final class DirectIntersectTermsEnum extends TermsEnum {
private final class DirectIntersectTermsEnum extends BaseTermsEnum {
private final RunAutomaton runAutomaton;
private final CompiledAutomaton compiledAutomaton;
private int termOrd;
Expand Down Expand Up @@ -1516,10 +1517,6 @@ public void seekExact(long ord) {
throw new UnsupportedOperationException();
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ public String toString() {
}

// Only wraps common operations for PBF interact
abstract class BaseTermsEnum extends TermsEnum {
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {

/* Current term's ord, starts from 0 */
long ord;
Expand Down Expand Up @@ -626,11 +626,6 @@ void decodeStats() throws IOException {
super.decodeStats();
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
throw new UnsupportedOperationException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throw
}

// Only wraps common operations for PBF interact
abstract class BaseTermsEnum extends TermsEnum {
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {

/* Current term stats + decoded metadata (customized by PBF) */
final BlockTermState state;
Expand Down Expand Up @@ -519,11 +519,6 @@ void loadMetaData() throws IOException {
state.totalTermFreq = meta.totalTermFreq;
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
decoded = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.TreeMap;

import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
Expand Down Expand Up @@ -111,7 +112,7 @@ private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
}
}

private class SimpleTextTermsEnum extends TermsEnum {
private class SimpleTextTermsEnum extends BaseTermsEnum {
private final IndexOptions indexOptions;
private int docFreq;
private long totalTermFreq;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.TreeMap;

import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
Expand Down Expand Up @@ -338,7 +339,7 @@ private static class SimpleTVPostings {
private BytesRef payloads[];
}

private static class SimpleTVTermsEnum extends TermsEnum {
private static class SimpleTVTermsEnum extends BaseTermsEnum {
SortedMap<BytesRef,SimpleTVPostings> terms;
Iterator<Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings> current;
Expand All @@ -358,11 +359,6 @@ public SeekStatus seekCeil(BytesRef text) throws IOException {
}
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

import java.io.IOException;

import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
Expand All @@ -44,7 +44,7 @@
* Likewise, in next it scans until it finds a term that matches the
* current automaton transition. */

final class IntersectTermsEnum extends TermsEnum {
final class IntersectTermsEnum extends BaseTermsEnum {

//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
import java.io.PrintStream;

import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
Expand All @@ -36,7 +36,7 @@

/** Iterates through terms in this field. */

final class SegmentTermsEnum extends TermsEnum {
final class SegmentTermsEnum extends BaseTermsEnum {

// Lazy init:
IndexInput in;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
Expand Down Expand Up @@ -825,7 +826,7 @@ public boolean hasPayloads() {

}

private static class TVTermsEnum extends TermsEnum {
private static class TVTermsEnum extends BaseTermsEnum {

private int numTerms, startPos, ord;
private int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex;
Expand Down Expand Up @@ -906,11 +907,6 @@ public SeekStatus seekCeil(BytesRef text)
}
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
Expand Down Expand Up @@ -926,7 +927,7 @@ public TermsEnum termsEnum() throws IOException {
}
}

private static class TermsDict extends TermsEnum {
private static class TermsDict extends BaseTermsEnum {

final TermsDictEntry entry;
final LongValues blockAddresses;
Expand Down Expand Up @@ -973,11 +974,6 @@ public BytesRef next() throws IOException {
return term;
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
return seekCeil(text) == SeekStatus.FOUND;
}

@Override
public void seekExact(long ord) throws IOException {
if (ord < 0 || ord >= entry.termsDictSize) {
Expand Down
Loading

0 comments on commit fd1fc26

Please sign in to comment.