Skip to content

Commit

Permalink
New queryable "_tier" metadata field (elastic#69288)
Browse files Browse the repository at this point in the history
New _tier metadata field that supports term, terms, exists and wildcard queries on the first data tier preference stated for an index.

Closes elastic#68135
  • Loading branch information
markharwood authored Mar 31, 2021
1 parent 693807a commit 3aee4c1
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 1 deletion.
7 changes: 7 additions & 0 deletions docs/reference/mapping/fields.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ fields can be customized when a mapping is created.

The index to which the document belongs.

<<mapping-tier-field,`_tier`>>::

The current data tier preference of the index to which the document belongs.


<<mapping-id-field,`_id`>>::

The document's ID.
Expand Down Expand Up @@ -72,6 +77,8 @@ include::fields/id-field.asciidoc[]

include::fields/index-field.asciidoc[]

include::fields/tier-field.asciidoc[]

include::fields/meta-field.asciidoc[]

include::fields/routing-field.asciidoc[]
Expand Down
44 changes: 44 additions & 0 deletions docs/reference/mapping/fields/tier-field.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[[mapping-tier-field]]
=== `_tier` field

When performing queries across multiple indexes, it is sometimes desirable to
target indexes held on nodes of a given data tier (`data_hot`, `data_warm`, `data_cold` or `data_frozen`).
The `_tier` field allows matching on the `tier_preference` setting of the index a document was indexed into.
The preferred value is accessible in certain queries :

[source,console]
--------------------------
PUT index_1/_doc/1
{
"text": "Document in index 1"
}
PUT index_2/_doc/2?refresh=true
{
"text": "Document in index 2"
}
GET index_1,index_2/_search
{
"query": {
"terms": {
"_tier": ["data_hot", "data_warm"] <1>
}
}
}
--------------------------

<1> Querying on the `_tier` field


Typically a query will use a `terms` query to list the tiers of interest but you can use
the `_tier` field in any query that is rewritten to a `term` query, such as the
`match`, `query_string`, `term`, `terms`, or `simple_query_string` query, as well as `prefix`
and `wildcard` queries. However, it does not support `regexp` and `fuzzy`
queries.

The `tier_preference` setting of the index is a comma-delimited list of tier names
in order of preference i.e. the preferred tier for hosting an index is listed first followed
by potentially many fall-back options. Query matching only considers the first preference
(the first value of a list).

Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.cluster.routing.allocation.mapper;

import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.mapper.ConstantFieldType;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;

import java.util.Collections;

public class DataTierFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_tier";

public static final String CONTENT_TYPE = "_tier";

public static final TypeParser PARSER = new FixedTypeParser(c -> new DataTierFieldMapper());

static final class DataTierFieldType extends ConstantFieldType {

static final DataTierFieldType INSTANCE = new DataTierFieldType();

private DataTierFieldType() {
super(NAME, Collections.emptyMap());
}

@Override
public String typeName() {
return CONTENT_TYPE;
}

@Override
public String familyTypeName() {
return KeywordFieldMapper.CONTENT_TYPE;
}

@Override
protected boolean matches(String pattern, boolean caseInsensitive, SearchExecutionContext context) {
if (caseInsensitive) {
pattern = Strings.toLowercaseAscii(pattern);
}
String tierPreference = DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING.get(context.getIndexSettings().getSettings());
if (tierPreference == null) {
return false;
}
// Tier preference can be a comma-delimited list of tiers, ordered by preference
// It was decided we should only test the first of these potentially multiple preferences.
String firstPreference = tierPreference.split(",")[0].trim();
return Regex.simpleMatch(pattern, firstPreference);
}

@Override
public Query existsQuery(SearchExecutionContext context) {
String tierPreference = DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING.get(context.getIndexSettings().getSettings());
if (tierPreference == null) {
return new MatchNoDocsQuery();
}
return new MatchAllDocsQuery();
}

@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
}
}

public DataTierFieldMapper() {
super(DataTierFieldType.INSTANCE);
}

@Override
protected String contentType() {
return CONTENT_TYPE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.engine.EngineFactory;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.shard.IndexSettingProvider;
import org.elasticsearch.indices.recovery.RecoverySettings;
import org.elasticsearch.license.LicenseService;
Expand All @@ -48,6 +49,7 @@
import org.elasticsearch.plugins.ClusterPlugin;
import org.elasticsearch.plugins.EnginePlugin;
import org.elasticsearch.plugins.ExtensiblePlugin;
import org.elasticsearch.plugins.MapperPlugin;
import org.elasticsearch.plugins.RepositoryPlugin;
import org.elasticsearch.protocol.xpack.XPackInfoRequest;
import org.elasticsearch.protocol.xpack.XPackInfoResponse;
Expand All @@ -61,6 +63,7 @@
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.watcher.ResourceWatcherService;
import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
import org.elasticsearch.xpack.cluster.routing.allocation.mapper.DataTierFieldMapper;
import org.elasticsearch.xpack.core.action.ReloadAnalyzerAction;
import org.elasticsearch.xpack.core.action.TransportReloadAnalyzersAction;
import org.elasticsearch.xpack.core.action.TransportXPackInfoAction;
Expand Down Expand Up @@ -102,7 +105,13 @@
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

public class XPackPlugin extends XPackClientPlugin implements ExtensiblePlugin, RepositoryPlugin, EnginePlugin, ClusterPlugin {
public class XPackPlugin extends XPackClientPlugin
implements
ExtensiblePlugin,
RepositoryPlugin,
EnginePlugin,
ClusterPlugin,
MapperPlugin {
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(XPackPlugin.class);

public static final String ASYNC_RESULTS_INDEX = ".async-search";
Expand Down Expand Up @@ -237,6 +246,12 @@ private static boolean alreadyContainsXPackCustomMetadata(ClusterState clusterSt
metadata.custom(WatcherMetadata.TYPE) != null ||
clusterState.custom(TokenMetadata.TYPE) != null;
}

@Override
public Map<String, MetadataFieldMapper.TypeParser> getMetadataMappers() {
return Map.of(DataTierFieldMapper.NAME, DataTierFieldMapper.PARSER);
}


@Override
public Settings additionalSettings() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.cluster.routing.allocation.mapper;

import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;

import java.io.IOException;
import java.util.Arrays;
import java.util.function.Predicate;

import static java.util.Collections.emptyMap;
import static org.hamcrest.Matchers.containsString;

public class DataTierFieldTypeTests extends MapperServiceTestCase {

public void testPrefixQuery() throws IOException {
MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
assertEquals(new MatchAllDocsQuery(), ft.prefixQuery("data_w", null, createContext()));
assertEquals(new MatchNoDocsQuery(), ft.prefixQuery("noSuchRole", null, createContext()));
}

public void testWildcardQuery() {
MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("data_w*", null, createContext()));
assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("data_warm", null, createContext()));
assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("Data_Warm", null, true, createContext()));
assertEquals(new MatchNoDocsQuery(), ft.wildcardQuery("Data_Warm", null, false, createContext()));
assertEquals(new MatchNoDocsQuery(), ft.wildcardQuery("noSuchRole", null, createContext()));
}

public void testTermQuery() {
MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
assertEquals(new MatchAllDocsQuery(), ft.termQuery("data_warm", createContext()));
assertEquals(new MatchNoDocsQuery(), ft.termQuery("data_hot", createContext()));
assertEquals(new MatchNoDocsQuery(), ft.termQuery("noSuchRole", createContext()));
}

public void testTermsQuery() {
MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
assertEquals(new MatchAllDocsQuery(), ft.termsQuery(Arrays.asList("data_warm"), createContext()));
assertEquals(new MatchNoDocsQuery(), ft.termsQuery(Arrays.asList("data_cold", "data_frozen"), createContext()));
}

public void testRegexpQuery() {
MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
QueryShardException e = expectThrows(
QueryShardException.class,
() -> assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 0, 10, null, createContext()))
);
assertThat(e.getMessage(), containsString("Can only use regexp queries on keyword and text fields"));
}

private SearchExecutionContext createContext() {
IndexMetadata indexMetadata = IndexMetadata.builder("index")
.settings(
Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
// Tier can be an ordered list of preferences - starting with primary and followed by fallbacks.
.put(DataTierAllocationDecider.INDEX_ROUTING_PREFER, "data_warm,data_hot")
)
.numberOfShards(1)
.numberOfReplicas(0)
.build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY);

Predicate<String> indexNameMatcher = pattern -> Regex.simpleMatch(pattern, "index");
return new SearchExecutionContext(
0,
0,
indexSettings,
null,
null,
null,
null,
null,
null,
xContentRegistry(),
writableRegistry(),
null,
null,
System::currentTimeMillis,
null,
indexNameMatcher,
() -> true,
null,
emptyMap()
);
}
}

0 comments on commit 3aee4c1

Please sign in to comment.