New _tier metadata field that supports term, terms, exists and wildca…

…rd queries on the first data tier preference stated for an index. Backport of 3aee4c1 Closes #68135
elastic · Apr 7, 2021 · 08a188d · 08a188d
1 parent b4e19a2
commit 08a188d
Show file tree

Hide file tree

Showing 5 changed files with 259 additions and 1 deletion.
diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc
@@ -17,6 +17,10 @@ some of these metadata fields can be customized when a mapping type is created.
 
     The document's mapping type.
 
+<<mapping-tier-field,`_tier`>>::
+
+    The current data tier preference of the index to which the document belongs.
+
 <<mapping-id-field,`_id`>>::
 
     The document's ID.
@@ -76,6 +80,8 @@ include::fields/id-field.asciidoc[]
 
 include::fields/index-field.asciidoc[]
 
+include::fields/tier-field.asciidoc[]
+
 include::fields/meta-field.asciidoc[]
 
 include::fields/routing-field.asciidoc[]

diff --git a/docs/reference/mapping/fields/tier-field.asciidoc b/docs/reference/mapping/fields/tier-field.asciidoc
@@ -0,0 +1,44 @@
+[[mapping-tier-field]]
+=== `_tier` field
+
+When performing queries across multiple indexes, it is sometimes desirable to
+target indexes held on nodes of a given data tier (`data_hot`, `data_warm`, `data_cold` or `data_frozen`).
+The `_tier` field allows matching on the `tier_preference` setting of the index a document was indexed into.
+The preferred value is accessible in certain queries :
+
+[source,console]
+--------------------------
+PUT index_1/_doc/1
+{
+  "text": "Document in index 1"
+}
+
+PUT index_2/_doc/2?refresh=true
+{
+  "text": "Document in index 2"
+}
+
+GET index_1,index_2/_search
+{
+  "query": {
+    "terms": {
+      "_tier": ["data_hot", "data_warm"] <1>
+    }
+  }
+}
+--------------------------
+
+<1> Querying on the `_tier` field
+
+
+Typically a query will use a `terms` query to list the tiers of interest but you can use
+the `_tier` field in any query that is rewritten to a `term` query, such as the
+`match`,  `query_string`, `term`, `terms`, or `simple_query_string` query, as well as `prefix`
+and `wildcard` queries. However, it does not support `regexp` and `fuzzy`
+queries.
+
+The `tier_preference` setting of the index is a comma-delimited list of tier names
+in order of preference i.e. the preferred tier for hosting an index is listed first followed
+by potentially many fall-back options. Query matching only considers the first preference
+(the first value of a list).
+
diff --git a/...n/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java b/...n/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.cluster.routing.allocation.mapper;
+
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.index.mapper.ConstantFieldType;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MetadataFieldMapper;
+import org.elasticsearch.index.mapper.ValueFetcher;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
+
+import java.util.Collections;
+
+public class DataTierFieldMapper extends MetadataFieldMapper {
+
+    public static final String NAME = "_tier";
+
+    public static final String CONTENT_TYPE = "_tier";
+
+    public static final TypeParser PARSER = new FixedTypeParser(c -> new DataTierFieldMapper());
+
+    static final class DataTierFieldType extends ConstantFieldType {
+
+        static final DataTierFieldType INSTANCE = new DataTierFieldType();
+
+        private DataTierFieldType() {
+            super(NAME, Collections.emptyMap());
+        }
+
+        @Override
+        public String typeName() {
+            return CONTENT_TYPE;
+        }
+
+        @Override
+        public String familyTypeName() {
+            return KeywordFieldMapper.CONTENT_TYPE;
+        }
+
+        @Override
+        protected boolean matches(String pattern, boolean caseInsensitive, SearchExecutionContext context) {
+            if (caseInsensitive) {
+                pattern = Strings.toLowercaseAscii(pattern);
+            }
+            String tierPreference = DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING.get(context.getIndexSettings().getSettings());
+            if (tierPreference == null) {
+                return false;
+            }
+            // Tier preference can be a comma-delimited list of tiers, ordered by preference
+            // It was decided we should only test the first of these potentially multiple preferences.
+            String firstPreference = tierPreference.split(",")[0].trim();
+            return Regex.simpleMatch(pattern, firstPreference);
+        }
+
+        @Override
+        public Query existsQuery(SearchExecutionContext context) {
+            String tierPreference = DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING.get(context.getIndexSettings().getSettings());
+            if (tierPreference == null) {
+                return new MatchNoDocsQuery();
+            }
+            return new MatchAllDocsQuery();
+        }
+
+        @Override
+        public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
+            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
+        }
+    }
+
+    public DataTierFieldMapper() {
+        super(DataTierFieldType.INSTANCE);
+    }
+
+    @Override
+    protected String contentType() {
+        return CONTENT_TYPE;
+    }
+}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java
@@ -41,6 +41,7 @@
 import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.engine.EngineFactory;
+import org.elasticsearch.index.mapper.MetadataFieldMapper;
 import org.elasticsearch.index.shard.IndexSettingProvider;
 import org.elasticsearch.indices.recovery.RecoverySettings;
 import org.elasticsearch.license.LicenseService;
@@ -51,6 +52,7 @@
 import org.elasticsearch.plugins.ClusterPlugin;
 import org.elasticsearch.plugins.EnginePlugin;
 import org.elasticsearch.plugins.ExtensiblePlugin;
+import org.elasticsearch.plugins.MapperPlugin;
 import org.elasticsearch.plugins.RepositoryPlugin;
 import org.elasticsearch.repositories.RepositoriesService;
 import org.elasticsearch.repositories.Repository;
@@ -61,6 +63,7 @@
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.watcher.ResourceWatcherService;
 import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
+import org.elasticsearch.xpack.cluster.routing.allocation.mapper.DataTierFieldMapper;
 import org.elasticsearch.xpack.core.action.ReloadAnalyzerAction;
 import org.elasticsearch.xpack.core.action.TransportReloadAnalyzersAction;
 import org.elasticsearch.xpack.core.action.TransportXPackInfoAction;
@@ -96,7 +99,13 @@
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
-public class XPackPlugin extends XPackClientPlugin implements ExtensiblePlugin, RepositoryPlugin, EnginePlugin, ClusterPlugin {
+public class XPackPlugin extends XPackClientPlugin
+    implements
+        ExtensiblePlugin,
+        RepositoryPlugin,
+        EnginePlugin,
+        ClusterPlugin,
+        MapperPlugin {
     private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(XPackPlugin.class);
 
     public static final String ASYNC_RESULTS_INDEX = ".async-search";
@@ -237,6 +246,12 @@ private static boolean alreadyContainsXPackCustomMetadata(ClusterState clusterSt
             metadata.custom(WatcherMetadata.TYPE) != null ||
             clusterState.custom(TokenMetadata.TYPE) != null;
     }
+
+    @Override
+    public Map<String, MetadataFieldMapper.TypeParser> getMetadataMappers() {
+        return Map.of(DataTierFieldMapper.NAME, DataTierFieldMapper.PARSER);
+    }
+
 
     @Override
     public Settings additionalSettings() {

diff --git a/...ava/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldTypeTests.java b/...ava/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldTypeTests.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.cluster.routing.allocation.mapper;
+
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.MapperServiceTestCase;
+import org.elasticsearch.index.query.QueryShardException;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.function.Predicate;
+
+import static java.util.Collections.emptyMap;
+import static org.hamcrest.Matchers.containsString;
+
+public class DataTierFieldTypeTests extends MapperServiceTestCase {
+
+    public void testPrefixQuery() throws IOException {
+        MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
+        assertEquals(new MatchAllDocsQuery(), ft.prefixQuery("data_w", null, createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.prefixQuery("noSuchRole", null, createContext()));
+    }
+
+    public void testWildcardQuery() {
+        MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
+        assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("data_w*", null, createContext()));
+        assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("data_warm", null, createContext()));
+        assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("Data_Warm", null, true, createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.wildcardQuery("Data_Warm", null, false, createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.wildcardQuery("noSuchRole", null, createContext()));
+    }
+
+    public void testTermQuery() {
+        MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
+        assertEquals(new MatchAllDocsQuery(), ft.termQuery("data_warm", createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.termQuery("data_hot", createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.termQuery("noSuchRole", createContext()));
+    }
+
+    public void testTermsQuery() {
+        MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
+        assertEquals(new MatchAllDocsQuery(), ft.termsQuery(Arrays.asList("data_warm"), createContext()));
+        assertEquals(new MatchNoDocsQuery(), ft.termsQuery(Arrays.asList("data_cold", "data_frozen"), createContext()));
+    }
+
+    public void testRegexpQuery() {
+        MappedFieldType ft = DataTierFieldMapper.DataTierFieldType.INSTANCE;
+        QueryShardException e = expectThrows(
+            QueryShardException.class,
+            () -> assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 0, 10, null, createContext()))
+        );
+        assertThat(e.getMessage(), containsString("Can only use regexp queries on keyword and text fields"));
+    }
+
+    private SearchExecutionContext createContext() {
+        IndexMetadata indexMetadata = IndexMetadata.builder("index")
+            .settings(
+                Settings.builder()
+                    .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+                    // Tier can be an ordered list of preferences - starting with primary and followed by fallbacks.
+                    .put(DataTierAllocationDecider.INDEX_ROUTING_PREFER, "data_warm,data_hot")
+            )
+            .numberOfShards(1)
+            .numberOfReplicas(0)
+            .build();
+        IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY);
+
+        Predicate<String> indexNameMatcher = pattern -> Regex.simpleMatch(pattern, "index");
+        return new SearchExecutionContext(
+            0,
+            0,
+            indexSettings,
+            null,
+            null,
+            null,
+            null,
+            null,
+            null,
+            xContentRegistry(),
+            writableRegistry(),
+            null,
+            null,
+            System::currentTimeMillis,
+            null,
+            indexNameMatcher,
+            () -> true,
+            null,
+            emptyMap()
+        );
+    }
+}