Switches Elasticsearch to only write single-type indexes (openzipkin#…

…1698) Multi-type indexes are no longer supported starting with Elasticsearch 6. We added support for single-type writes in Zipkin 1.29 as an opt-in functionality via `ES_EXPERIMENTAL_SPAN2=true`. This makes single- type writes the only means to ingest spans, eventhough we still read both models. To turn off dual-reads, set `ES_LEGACY_READS_ENABLED=false` once your collectors have been updated to Zipkin 1.31 and old indexes are no longer valuable.
abesto · Sep 10, 2019 · 9adcf55 · 9adcf55
1 parent 6443ae0
commit 9adcf55
Show file tree

Hide file tree

Showing 19 changed files with 138 additions and 979 deletions.
diff --git a/...in/autoconfigure/storage/elasticsearch/http/ZipkinElasticsearchHttpStorageProperties.java b/...in/autoconfigure/storage/elasticsearch/http/ZipkinElasticsearchHttpStorageProperties.java
@@ -50,6 +50,8 @@ public class ZipkinElasticsearchHttpStorageProperties implements Serializable {
   private String password;
   /** When set, controls the volume of HTTP logging of the Elasticsearch Api. Options are BASIC, HEADERS, BODY */
   private HttpLoggingInterceptor.Level httpLogging;
+  /** When true, Redundantly queries indexes made with pre v1.31 collectors. Defaults to true. */
+  private boolean legacyReadsEnabled = true;
 
   public String getPipeline() {
     return pipeline;
@@ -151,6 +153,14 @@ public void setHttpLogging(HttpLoggingInterceptor.Level httpLogging) {
     this.httpLogging = httpLogging;
   }
 
+  public boolean isLegacyReadsEnabled() {
+    return legacyReadsEnabled;
+  }
+
+  public void setLegacyReadsEnabled(boolean legacyReadsEnabled) {
+    this.legacyReadsEnabled = legacyReadsEnabled;
+  }
+
   public ElasticsearchHttpStorage.Builder toBuilder(OkHttpClient client) {
     ElasticsearchHttpStorage.Builder builder = ElasticsearchHttpStorage.builder(client);
     if (hosts != null) builder.hosts(hosts);
@@ -160,6 +170,7 @@ public ElasticsearchHttpStorage.Builder toBuilder(OkHttpClient client) {
         .pipeline(pipeline)
         .maxRequests(maxRequests)
         .indexShards(indexShards)
-        .indexReplicas(indexReplicas);
+        .indexReplicas(indexReplicas)
+        .legacyReadsEnabled(legacyReadsEnabled);
   }
 }
diff --git a/...ipkin/storage/elasticsearch/http/ZipkinElasticsearchHttpStorageAutoConfigurationTest.java b/...ipkin/storage/elasticsearch/http/ZipkinElasticsearchHttpStorageAutoConfigurationTest.java
@@ -316,7 +316,7 @@ public void doesntProvideBasicAuthInterceptor_whenBasicAuthUserNameandPasswordNo
   }
 
   @Test
-  public void providesBasicAuthInterceptor_whenBasicAuthUserNameandPasswordConfigured() {
+  public void providesBasicAuthInterceptor_whenBasicAuthUserNameAndPasswordConfigured() {
     context = new AnnotationConfigApplicationContext();
     addEnvironment(context,
         "zipkin.storage.type:elasticsearch",
@@ -335,6 +335,35 @@ public void providesBasicAuthInterceptor_whenBasicAuthUserNameandPasswordConfigu
         .contains((Class) BasicAuthInterceptor.class);
   }
 
+  @Test
+  public void legacyReadsEnabled() {
+    context = new AnnotationConfigApplicationContext();
+    addEnvironment(context,
+      "zipkin.storage.type:elasticsearch",
+      "zipkin.storage.elasticsearch.hosts:http://host1:9200");
+    context.register(PropertyPlaceholderAutoConfiguration.class,
+      ZipkinElasticsearchOkHttpAutoConfiguration.class,
+      ZipkinElasticsearchHttpStorageAutoConfiguration.class);
+    context.refresh();
+
+    assertThat(es().legacyReadsEnabled()).isTrue();
+  }
+
+  @Test
+  public void legacyReadsEnabled_false() {
+    context = new AnnotationConfigApplicationContext();
+    addEnvironment(context,
+      "zipkin.storage.type:elasticsearch",
+      "zipkin.storage.elasticsearch.hosts:http://host1:9200",
+      "zipkin.storage.elasticsearch.legacy-reads-enabled:false");
+    context.register(PropertyPlaceholderAutoConfiguration.class,
+      ZipkinElasticsearchOkHttpAutoConfiguration.class,
+      ZipkinElasticsearchHttpStorageAutoConfiguration.class);
+    context.refresh();
+
+    assertThat(es().legacyReadsEnabled()).isFalse();
+  }
+
   ElasticsearchHttpStorage es() {
     return context.getBean(ElasticsearchHttpStorage.class);
   }

diff --git a/zipkin-server/README.md b/zipkin-server/README.md
@@ -196,6 +196,8 @@ The following apply when `STORAGE_TYPE` is set to `elasticsearch`:
                                        Use when X-Pack security (formerly Shield) is in place.
     * `ES_HTTP_LOGGING`: When set, controls the volume of HTTP logging of the Elasticsearch Api.
                          Options are BASIC, HEADERS, BODY
+    * `ES_LEGACY_READS_ENABLED`: When true, Redundantly queries indexes made with pre v1.31 collectors.
+                                 Defaults to true.
 Example usage:
 
 To connect normally:

diff --git a/zipkin-server/src/main/resources/zipkin-server-shared.yml b/zipkin-server/src/main/resources/zipkin-server-shared.yml
@@ -102,6 +102,7 @@ zipkin:
       username: ${ES_USERNAME:}
       password: ${ES_PASSWORD:}
       http-logging: ${ES_HTTP_LOGGING:}
+      legacy-reads-enabled: ${ES_LEGACY_READS_ENABLED:true}
     mysql:
       host: ${MYSQL_HOST:localhost}
       port: ${MYSQL_TCP_PORT:3306}

diff --git a/zipkin-storage/elasticsearch-http/README.md b/zipkin-storage/elasticsearch-http/README.md
@@ -21,9 +21,19 @@ Here are some examples:
 * http://elasticsearch:9200,http://1.2.3.4:9200
 * http://elasticsearch-1:9200,http://elasticsearch-2:9200
 
+## Format
+Spans are stored in version 2 format, which is the same as the [v2 POST endpoint](http://zipkin.io/zipkin-api/#/default/post_spans)
+with one difference described below. We add a "timestamp_millis" field
+to aid in integration with other tools.
+
+### Timestamps
+Zipkin's timestamps are in epoch microseconds, which is not a supported date type in Elasticsearch.
+In consideration of tools like like Kibana, this component adds "timestamp_millis" when writing
+spans. This is mapped to the Elasticsearch date type, so can be used to any date-based queries.
+
 ## Indexes
 Spans are stored into daily indices, for example spans with a timestamp
-falling on 2016/03/19 will be stored in the index named 'zipkin-2016-03-19'.
+falling on 2016/03/19 will be stored in the index named 'zipkin:span-2016-03-19'.
 There is no support for TTL through this SpanStore. It is recommended
 instead to use [Elastic Curator](https://www.elastic.co/guide/en/elasticsearch/client/curator/current/about.html)
 to remove indices older than the point you are interested in.
@@ -36,19 +46,30 @@ the date separator from '-' to something else.
 control the daily index format.
 
 For example, spans with a timestamp falling on 2016/03/19 end up in the
-index 'zipkin-2016-03-19'. When the date separator is '.', the index
-would be 'zipkin-2016.03.19'.
+index 'zipkin:span-2016-03-19'. When the date separator is '.', the index
+would be 'zipkin:span-2016.03.19'.
 
 ### String Mapping
 The Zipkin api implies aggregation and exact match (keyword) on string
-fields named `traceId` and `name`, as well nested fields named
-`serviceName`, `key` and `value`. Indexing on these fields is limited to
-256 characters eventhough storage is currently unbounded.
+fields named `traceId` and `name` and `serviceName`. Indexing on these
+fields is limited to 256 characters eventhough storage is currently
+unbounded.
 
-### Timestamps
-Zipkin's timestamps are in epoch microseconds, which is not a supported date type in Elasticsearch.
-In consideration of tools like like Kibana, this component adds "timestamp_millis" when writing
-spans. This is mapped to the Elasticsearch date type, so can be used to any date-based queries.
+### Query indexing
+To support the zipkin query api, a special index field named `_q` is
+added to documents, containing annotation values and tag entry pairs.
+Ex: the tag `"error": "500"` results in `"_q":["error", "error=500"]`.
+The values in `q` are limited to 256 characters and searched as keywords.
+
+You can check these manually like so:
+```bash
+$ curl -s localhost:9200/zipkin:span-2017-08-11/_search?q=_q:error=500
+```
+
+The reason for special casing is around dotted name constraints. Tags
+are stored as a dictionary. Some keys include inconsistent number of dots
+(ex "error" and "error.message"). Elasticsearch cannot index these as it
+inteprets them as fields, and dots in fields imply an object path.
 
 ### Trace Identifiers
 Unless `ElasticsearchHttpStorage.Builder.strictTraceId` is set to false,
@@ -82,54 +103,14 @@ your indexes:
 
 ```bash
 # the output below shows which tokens will match on the trace id supplied.
-$ curl -s localhost:9200/test_zipkin_http-2016-10-26/_analyze -d '{
+$ curl -s localhost:9200/zipkin:span-2017-08-22/_analyze -d '{
       "text": "48485a3953bb61246b221d5bc9e6496c",
       "analyzer": "traceId_analyzer"
   }'|jq '.tokens|.[]|.token'
   "48485a3953bb61246b221d5bc9e6496c"
   "6b221d5bc9e6496c"
 ```
 
-### Span and service Names
-Zipkin defines span and service names as lowercase. At write time, any
-mixed case span or service names are downcased. If writing a custom
-collector in a different language, make sure you write span and service
-names in lowercase. Also, if there are any custom query tools, ensure
-inputs are downcased.
-
-Span and service name queries default to look back 24hrs (2 index days).
-This can be controlled by `ElasticsearchHttpStorage.Builder.namesLookback`
-
-#### Index format
-Starting with Zipkin 1.23, service and span names are written to the
-same daily indexes as spans and dependency links as the document type
-"servicespan". This was added for performance reasons as formerly search
-was using relatively expensive nested queries.
-
-The documents themselves represent service and span name pairs. Only one
-document is present per daily index. This is to keep the documents from
-repeating at a multiplier of span count, which also simplifies query.
-This deduplication is enforced at write time by using an ID convention
-of the service and span name. Ex. `id = MyServiceName|MySpanName`
-
-The document is a simple structure, like:
-```json
-{
-  "serviceName": "MyServiceName",
-  "spanName": "MySpanName",
-}
-```
-
-The document does replicate data in the ID, but it is needed as you
-cannot query based on an ID expression.
-
-#### Notes for data written prior to Zipkin 1.23
-Before Zipkin 1.23, service and span names were nested queries against
-the span type. This was an expensive operation, which resulted in high
-latency particularly when the UI loads. When the "servicespan" type is
-missing from an index, or there's no results returned, a fallback nested
-query is invoked.
-
 ## Customizing the ingest pipeline
 
 When using Elasticsearch 5.x, you can setup an [ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/master/pipeline.html)

diff --git a/...search-http/src/main/java/zipkin/storage/elasticsearch/http/ElasticsearchHttpStorage.java b/...search-http/src/main/java/zipkin/storage/elasticsearch/http/ElasticsearchHttpStorage.java
@@ -36,7 +36,6 @@
 import zipkin.storage.elasticsearch.http.internal.LenientDoubleCallbackAsyncSpanStore;
 import zipkin.storage.elasticsearch.http.internal.client.HttpCall;
 
-import static zipkin.internal.Util.checkArgument;
 import static zipkin.internal.Util.checkNotNull;
 import static zipkin.moshi.JsonReaders.enterPath;
 import static zipkin.storage.elasticsearch.http.ElasticsearchHttpSpanStore.DEPENDENCY;
@@ -68,9 +67,7 @@ public static Builder builder(OkHttpClient client) {
         .namesLookback(86400000)
         .shutdownClientOnClose(false)
         .flushOnWrites(false)
-        .singleTypeIndexingEnabled(
-          Boolean.valueOf(System.getenv("ES_EXPERIMENTAL_SPAN2"))
-        );
+        .legacyReadsEnabled(true);
   }
 
   public static Builder builder() {
@@ -127,6 +124,9 @@ public final Builder hosts(final List<String> hosts) {
      */
     public abstract Builder namesLookback(int namesLookback);
 
+    /** When true, Redundantly queries indexes made with pre v1.31 collectors. Defaults to true. */
+    public abstract Builder legacyReadsEnabled(boolean legacyReadsEnabled);
+
     /** Visible for testing */
     abstract Builder flushOnWrites(boolean flushOnWrites);
 
@@ -171,9 +171,6 @@ public final Builder dateSeparator(char dateSeparator) {
      */
     public abstract Builder indexReplicas(int indexReplicas);
 
-    /** intentionally hidden for now */
-    abstract Builder singleTypeIndexingEnabled(boolean singleTypeIndexingEnabled);
-
     @Override public abstract Builder strictTraceId(boolean strictTraceId);
 
     @Override public abstract ElasticsearchHttpStorage build();
@@ -206,18 +203,16 @@ public final Builder dateSeparator(char dateSeparator) {
 
   abstract int namesLookback();
 
-  abstract boolean singleTypeIndexingEnabled();
+  abstract boolean legacyReadsEnabled();
 
   @Override public SpanStore spanStore() {
     return StorageAdapters.asyncToBlocking(asyncSpanStore());
   }
 
   @Override public AsyncSpanStore asyncSpanStore() {
     float version = ensureIndexTemplates().version();
-    if (version >= 6) { // then multi-type (legacy) index isn't possible
+    if (version >= 6 /* multi-type (legacy) index isn't possible */ || !legacyReadsEnabled()) {
       return new ElasticsearchHttpSpanStore(this);
-    } else if (version < 2 || !singleTypeIndexingEnabled()) { // don't fan out queries unnecessarily
-      return new LegacyElasticsearchHttpSpanStore(this);
     } else { // fan out queries as we don't know if old legacy collectors are in use
       return new LenientDoubleCallbackAsyncSpanStore(
         new ElasticsearchHttpSpanStore(this),
@@ -227,23 +222,15 @@ public final Builder dateSeparator(char dateSeparator) {
   }
 
   @Override public AsyncSpanConsumer asyncSpanConsumer() {
-    // We only write once, so we detect which approach we should take
-    if (shouldUseSingleTypeIndexing(ensureIndexTemplates())) {
-      return new ElasticsearchHttpSpanConsumer(this);
-    } else {
-      return new LegacyElasticsearchHttpSpanConsumer(this);
-    }
+    ensureIndexTemplates();
+    return new ElasticsearchHttpSpanConsumer(this);
   }
 
   /** This is a blocking call, only used in tests. */
   void clear() throws IOException {
     Set<String> toClear = new LinkedHashSet<>();
-    if (shouldUseSingleTypeIndexing(ensureIndexTemplates())) {
-      toClear.add(indexNameFormatter().formatType(SPAN));
-      toClear.add(indexNameFormatter().formatType(DEPENDENCY));
-    } else {
-      toClear.add(indexNameFormatter().formatType(null));
-    }
+    toClear.add(indexNameFormatter().formatType(SPAN));
+    toClear.add(indexNameFormatter().formatType(DEPENDENCY));
     for (String index : toClear) clear(index);
   }
 
@@ -298,24 +285,12 @@ CheckResult ensureClusterReady(String index) {
   IndexTemplates ensureIndexTemplates() {
     String index = indexNameFormatter().index();
     IndexTemplates templates = new VersionSpecificTemplates(this).get(http());
-    if (shouldUseSingleTypeIndexing(templates)) {
-      EnsureIndexTemplate.apply(http(), index + ":" + SPAN + "_template", templates.span());
-      EnsureIndexTemplate.apply(http(), index + ":" + DEPENDENCY + "_template",
-        templates.dependency());
-    } else { // TODO: remove when we stop writing span1 format
-      checkArgument(templates.legacy() != null,
-        "multiple type template is null: version=%s, singleTypeIndexingEnabled=%s",
-        templates.version(), singleTypeIndexingEnabled());
-      EnsureIndexTemplate.apply(http(), index + "_template", templates.legacy());
-    }
+    EnsureIndexTemplate.apply(http(), index + ":" + SPAN + "_template", templates.span());
+    EnsureIndexTemplate.apply(http(), index + ":" + DEPENDENCY + "_template",
+      templates.dependency());
     return templates;
   }
 
-  private boolean shouldUseSingleTypeIndexing(IndexTemplates templates) {
-    return (templates.span() != null && singleTypeIndexingEnabled())
-      || templates.version() >= 6;
-  }
-
   @Memoized // hosts resolution might imply a network call, and we might make a new okhttp instance
   HttpCall.Factory http() {
     List<String> hosts = hostsSupplier().get();

diff --git a/...ge/elasticsearch-http/src/main/java/zipkin/storage/elasticsearch/http/IndexTemplates.java b/...ge/elasticsearch-http/src/main/java/zipkin/storage/elasticsearch/http/IndexTemplates.java
@@ -14,7 +14,6 @@
 package zipkin.storage.elasticsearch.http;
 
 import com.google.auto.value.AutoValue;
-import zipkin.internal.Nullable;
 
 @AutoValue
 abstract class IndexTemplates {
@@ -24,18 +23,13 @@ static Builder builder() {
 
   abstract float version();
 
-  /** null when multi-type indexes are not supported */
-  @Nullable abstract String legacy();
-
   abstract String span();
 
   abstract String dependency();
 
   @AutoValue.Builder interface Builder {
     Builder version(float version);
 
-    Builder legacy(@Nullable String legacy);
-
     Builder span(String span);
 
     Builder dependency(String dependency);