Skip to content

Commit

Permalink
Enhances exists queries to reduce need for _field_names (elastic#26930
Browse files Browse the repository at this point in the history
)

* Enhances exists queries to reduce need for `_field_names`

Before this change we wrote the name all the fields in a document to a `_field_names` field and then implemented exists queries as a term query on this field. The problem with this approach is that it bloats the index and also affects indexing performance.

This change adds a new method `existsQuery()` to `MappedFieldType` which is implemented by each sub-class. For most field types if doc values are available a `DocValuesFieldExistsQuery` is used, falling back to using `_field_names` if doc values are disabled. Note that only fields where no doc values are available are written to `_field_names`.

Closes elastic#26770

* Addresses review comments

* Addresses more review comments

* implements existsQuery explicitly on every mapper

* Reinstates ability to perform term query on `_field_names`

* Added bwc depending on index created version

* Review Comments

* Skips tests that are not supported in 6.1.0

These values will need to be changed after backporting this PR to 6.x
  • Loading branch information
colings86 authored Nov 1, 2017
1 parent d805c41 commit 99aca9c
Show file tree
Hide file tree
Showing 45 changed files with 1,880 additions and 190 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
package org.elasticsearch.index.mapper;

import com.carrotsearch.hppc.ObjectArrayList;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
Expand Down Expand Up @@ -126,6 +130,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
return new BytesBinaryDVIndexFieldData.Builder();
}

@Override
public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new QueryShardException(context, "Binary fields do not support searching");
Expand Down Expand Up @@ -165,6 +178,11 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
} else {
field.add(value);
}
} else {
// Only add an entry to the field names field if the field is stored
// but has no doc values so exists query will work on a field with
// no doc values
createFieldNamesField(context, fields);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
Expand Down Expand Up @@ -136,6 +139,15 @@ public String typeName() {
return CONTENT_TYPE;
}

@Override
public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}
}

@Override
public Boolean nullValue() {
return (Boolean)super.nullValue();
Expand Down Expand Up @@ -253,6 +265,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
}
if (fieldType().hasDocValues()) {
fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
} else {
createFieldNamesField(context, fields);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
import org.apache.lucene.search.suggest.document.CompletionQuery;
Expand All @@ -40,11 +42,13 @@
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.elasticsearch.search.suggest.completion.context.ContextMappings;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -257,6 +261,11 @@ public static synchronized PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public Query existsQuery(QueryShardContext context) {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}

/**
* Completion prefix query
*/
Expand Down Expand Up @@ -456,6 +465,11 @@ public Mapper parse(ParseContext context) throws IOException {
context.doc().add(new SuggestField(fieldType().name(), input, metaData.weight));
}
}
List<IndexableField> fields = new ArrayList<>(1);
createFieldNamesField(context, fields);
for (IndexableField field : fields) {
context.doc().add(field);
}
multiFields.parse(this, context);
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
Expand Down Expand Up @@ -245,6 +248,15 @@ long parse(String value) {
return dateTimeFormatter().parser().parseMillis(value);
}

@Override
public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}
}

@Override
public Query termQuery(Object value, @Nullable QueryShardContext context) {
Query query = rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context);
Expand Down Expand Up @@ -451,6 +463,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
}
if (fieldType().hasDocValues()) {
fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
} else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
createFieldNamesField(context, fields);
}
if (fieldType().stored()) {
fields.add(new StoredField(fieldType().name(), timestamp));
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
Expand All @@ -33,6 +34,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;

Expand Down Expand Up @@ -285,6 +287,16 @@ public Mapper parse(ParseContext context) throws IOException {
*/
protected abstract void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException;

protected void createFieldNamesField(ParseContext context, List<IndexableField> fields) {
FieldNamesFieldType fieldNamesFieldType = (FieldNamesFieldMapper.FieldNamesFieldType) context.docMapper()
.metadataMapper(FieldNamesFieldMapper.class).fieldType();
if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) {
for (String fieldName : FieldNamesFieldMapper.extractFieldNames(fieldType().name())) {
fields.add(new Field(FieldNamesFieldMapper.NAME, fieldName, fieldNamesFieldType));
}
}
}

@Override
public Iterator<Mapper> iterator() {
return multiFields.iterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
Expand All @@ -44,6 +48,9 @@
*/
public class FieldNamesFieldMapper extends MetadataFieldMapper {

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
ESLoggerFactory.getLogger(FieldNamesFieldMapper.class));

public static final String NAME = "_field_names";

public static final String CONTENT_TYPE = "_field_names";
Expand Down Expand Up @@ -178,11 +185,18 @@ public boolean isEnabled() {
return enabled;
}

@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException("Cannot run exists query on _field_names");
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
if (isEnabled() == false) {
throw new IllegalStateException("Cannot run [exists] queries if the [_field_names] field is disabled");
}
DEPRECATION_LOGGER.deprecated(
"terms query on the _field_names field is deprecated and will be removed, use exists query instead");
return super.termQuery(value, context);
}
}
Expand All @@ -206,12 +220,14 @@ public void preParse(ParseContext context) throws IOException {

@Override
public void postParse(ParseContext context) throws IOException {
super.parse(context);
if (context.indexSettings().getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).before(Version.V_6_1_0)) {
super.parse(context);
}
}

@Override
public Mapper parse(ParseContext context) throws IOException {
// we parse in post parse
// Adding values to the _field_names field is handled by the mappers for each field type
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeoPoint;
Expand All @@ -37,6 +40,7 @@
import org.elasticsearch.index.query.QueryShardException;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -180,6 +184,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
return new AbstractLatLonPointDVIndexFieldData.Builder();
}

@Override
public Query existsQuery(QueryShardContext context) {
if (hasDocValues()) {
return new DocValuesFieldExistsQuery(name());
} else {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: ["
Expand Down Expand Up @@ -207,6 +220,12 @@ protected void parse(ParseContext context, GeoPoint point) throws IOException {
}
if (fieldType.hasDocValues()) {
context.doc().add(new LatLonDocValuesField(fieldType().name(), point.lat(), point.lon()));
} else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
List<IndexableField> fields = new ArrayList<>(1);
createFieldNamesField(context, fields);
for (IndexableField field : fields) {
context.doc().add(field);
}
}
// if the mapping contains multifields then use the geohash string
if (multiFields.iterator().hasNext()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.geo.SpatialStrategy;
Expand All @@ -44,6 +47,8 @@
import org.locationtech.spatial4j.shape.jts.JtsGeometry;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -125,6 +130,11 @@ public Builder coerce(boolean coerce) {
return builder;
}

@Override
protected boolean defaultDocValues(Version indexCreated) {
return false;
}

protected Explicit<Boolean> coerce(BuilderContext context) {
if (coerce != null) {
return new Explicit<>(coerce, true);
Expand Down Expand Up @@ -406,6 +416,11 @@ public PrefixTreeStrategy resolveStrategy(String strategyName) {
throw new IllegalArgumentException("Unknown prefix tree strategy [" + strategyName + "]");
}

@Override
public Query existsQuery(QueryShardContext context) {
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead");
Expand Down Expand Up @@ -440,11 +455,9 @@ public Mapper parse(ParseContext context) throws IOException {
throw new MapperParsingException("[{" + fieldType().name() + "}] is configured for points only but a " +
((shape instanceof JtsGeometry) ? ((JtsGeometry)shape).getGeom().getGeometryType() : shape.getClass()) + " was found");
}
Field[] fields = fieldType().defaultStrategy().createIndexableFields(shape);
if (fields == null || fields.length == 0) {
return null;
}
for (Field field : fields) {
List<IndexableField> fields = new ArrayList<>(Arrays.asList(fieldType().defaultStrategy().createIndexableFields(shape)));
createFieldNamesField(context, fields);
for (IndexableField field : fields) {
context.doc().add(field);
}
} catch (Exception e) {
Expand Down
Loading

0 comments on commit 99aca9c

Please sign in to comment.