Skip to content

Commit

Permalink
Switches Elasticsearch to only write single-type indexes (openzipkin#…
Browse files Browse the repository at this point in the history
…1698)

Multi-type indexes are no longer supported starting with Elasticsearch
6. We added support for single-type writes in Zipkin 1.29 as an opt-in
functionality via `ES_EXPERIMENTAL_SPAN2=true`. This makes single-
type writes the only means to ingest spans, eventhough we still read
both models.

To turn off dual-reads, set `ES_LEGACY_READS_ENABLED=false` once your
collectors have been updated to Zipkin 1.31 and old indexes are no longer
valuable.
  • Loading branch information
adriancole authored and abesto committed Sep 10, 2019
1 parent 6443ae0 commit 9adcf55
Show file tree
Hide file tree
Showing 19 changed files with 138 additions and 979 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ public class ZipkinElasticsearchHttpStorageProperties implements Serializable {
private String password;
/** When set, controls the volume of HTTP logging of the Elasticsearch Api. Options are BASIC, HEADERS, BODY */
private HttpLoggingInterceptor.Level httpLogging;
/** When true, Redundantly queries indexes made with pre v1.31 collectors. Defaults to true. */
private boolean legacyReadsEnabled = true;

public String getPipeline() {
return pipeline;
Expand Down Expand Up @@ -151,6 +153,14 @@ public void setHttpLogging(HttpLoggingInterceptor.Level httpLogging) {
this.httpLogging = httpLogging;
}

public boolean isLegacyReadsEnabled() {
return legacyReadsEnabled;
}

public void setLegacyReadsEnabled(boolean legacyReadsEnabled) {
this.legacyReadsEnabled = legacyReadsEnabled;
}

public ElasticsearchHttpStorage.Builder toBuilder(OkHttpClient client) {
ElasticsearchHttpStorage.Builder builder = ElasticsearchHttpStorage.builder(client);
if (hosts != null) builder.hosts(hosts);
Expand All @@ -160,6 +170,7 @@ public ElasticsearchHttpStorage.Builder toBuilder(OkHttpClient client) {
.pipeline(pipeline)
.maxRequests(maxRequests)
.indexShards(indexShards)
.indexReplicas(indexReplicas);
.indexReplicas(indexReplicas)
.legacyReadsEnabled(legacyReadsEnabled);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ public void doesntProvideBasicAuthInterceptor_whenBasicAuthUserNameandPasswordNo
}

@Test
public void providesBasicAuthInterceptor_whenBasicAuthUserNameandPasswordConfigured() {
public void providesBasicAuthInterceptor_whenBasicAuthUserNameAndPasswordConfigured() {
context = new AnnotationConfigApplicationContext();
addEnvironment(context,
"zipkin.storage.type:elasticsearch",
Expand All @@ -335,6 +335,35 @@ public void providesBasicAuthInterceptor_whenBasicAuthUserNameandPasswordConfigu
.contains((Class) BasicAuthInterceptor.class);
}

@Test
public void legacyReadsEnabled() {
context = new AnnotationConfigApplicationContext();
addEnvironment(context,
"zipkin.storage.type:elasticsearch",
"zipkin.storage.elasticsearch.hosts:http://host1:9200");
context.register(PropertyPlaceholderAutoConfiguration.class,
ZipkinElasticsearchOkHttpAutoConfiguration.class,
ZipkinElasticsearchHttpStorageAutoConfiguration.class);
context.refresh();

assertThat(es().legacyReadsEnabled()).isTrue();
}

@Test
public void legacyReadsEnabled_false() {
context = new AnnotationConfigApplicationContext();
addEnvironment(context,
"zipkin.storage.type:elasticsearch",
"zipkin.storage.elasticsearch.hosts:http://host1:9200",
"zipkin.storage.elasticsearch.legacy-reads-enabled:false");
context.register(PropertyPlaceholderAutoConfiguration.class,
ZipkinElasticsearchOkHttpAutoConfiguration.class,
ZipkinElasticsearchHttpStorageAutoConfiguration.class);
context.refresh();

assertThat(es().legacyReadsEnabled()).isFalse();
}

ElasticsearchHttpStorage es() {
return context.getBean(ElasticsearchHttpStorage.class);
}
Expand Down
2 changes: 2 additions & 0 deletions zipkin-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ The following apply when `STORAGE_TYPE` is set to `elasticsearch`:
Use when X-Pack security (formerly Shield) is in place.
* `ES_HTTP_LOGGING`: When set, controls the volume of HTTP logging of the Elasticsearch Api.
Options are BASIC, HEADERS, BODY
* `ES_LEGACY_READS_ENABLED`: When true, Redundantly queries indexes made with pre v1.31 collectors.
Defaults to true.
Example usage:

To connect normally:
Expand Down
1 change: 1 addition & 0 deletions zipkin-server/src/main/resources/zipkin-server-shared.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ zipkin:
username: ${ES_USERNAME:}
password: ${ES_PASSWORD:}
http-logging: ${ES_HTTP_LOGGING:}
legacy-reads-enabled: ${ES_LEGACY_READS_ENABLED:true}
mysql:
host: ${MYSQL_HOST:localhost}
port: ${MYSQL_TCP_PORT:3306}
Expand Down
83 changes: 32 additions & 51 deletions zipkin-storage/elasticsearch-http/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,19 @@ Here are some examples:
* http://elasticsearch:9200,http://1.2.3.4:9200
* http://elasticsearch-1:9200,http://elasticsearch-2:9200

## Format
Spans are stored in version 2 format, which is the same as the [v2 POST endpoint](http://zipkin.io/zipkin-api/#/default/post_spans)
with one difference described below. We add a "timestamp_millis" field
to aid in integration with other tools.

### Timestamps
Zipkin's timestamps are in epoch microseconds, which is not a supported date type in Elasticsearch.
In consideration of tools like like Kibana, this component adds "timestamp_millis" when writing
spans. This is mapped to the Elasticsearch date type, so can be used to any date-based queries.

## Indexes
Spans are stored into daily indices, for example spans with a timestamp
falling on 2016/03/19 will be stored in the index named 'zipkin-2016-03-19'.
falling on 2016/03/19 will be stored in the index named 'zipkin:span-2016-03-19'.
There is no support for TTL through this SpanStore. It is recommended
instead to use [Elastic Curator](https://www.elastic.co/guide/en/elasticsearch/client/curator/current/about.html)
to remove indices older than the point you are interested in.
Expand All @@ -36,19 +46,30 @@ the date separator from '-' to something else.
control the daily index format.

For example, spans with a timestamp falling on 2016/03/19 end up in the
index 'zipkin-2016-03-19'. When the date separator is '.', the index
would be 'zipkin-2016.03.19'.
index 'zipkin:span-2016-03-19'. When the date separator is '.', the index
would be 'zipkin:span-2016.03.19'.

### String Mapping
The Zipkin api implies aggregation and exact match (keyword) on string
fields named `traceId` and `name`, as well nested fields named
`serviceName`, `key` and `value`. Indexing on these fields is limited to
256 characters eventhough storage is currently unbounded.
fields named `traceId` and `name` and `serviceName`. Indexing on these
fields is limited to 256 characters eventhough storage is currently
unbounded.

### Timestamps
Zipkin's timestamps are in epoch microseconds, which is not a supported date type in Elasticsearch.
In consideration of tools like like Kibana, this component adds "timestamp_millis" when writing
spans. This is mapped to the Elasticsearch date type, so can be used to any date-based queries.
### Query indexing
To support the zipkin query api, a special index field named `_q` is
added to documents, containing annotation values and tag entry pairs.
Ex: the tag `"error": "500"` results in `"_q":["error", "error=500"]`.
The values in `q` are limited to 256 characters and searched as keywords.

You can check these manually like so:
```bash
$ curl -s localhost:9200/zipkin:span-2017-08-11/_search?q=_q:error=500
```

The reason for special casing is around dotted name constraints. Tags
are stored as a dictionary. Some keys include inconsistent number of dots
(ex "error" and "error.message"). Elasticsearch cannot index these as it
inteprets them as fields, and dots in fields imply an object path.

### Trace Identifiers
Unless `ElasticsearchHttpStorage.Builder.strictTraceId` is set to false,
Expand Down Expand Up @@ -82,54 +103,14 @@ your indexes:

```bash
# the output below shows which tokens will match on the trace id supplied.
$ curl -s localhost:9200/test_zipkin_http-2016-10-26/_analyze -d '{
$ curl -s localhost:9200/zipkin:span-2017-08-22/_analyze -d '{
"text": "48485a3953bb61246b221d5bc9e6496c",
"analyzer": "traceId_analyzer"
}'|jq '.tokens|.[]|.token'
"48485a3953bb61246b221d5bc9e6496c"
"6b221d5bc9e6496c"
```

### Span and service Names
Zipkin defines span and service names as lowercase. At write time, any
mixed case span or service names are downcased. If writing a custom
collector in a different language, make sure you write span and service
names in lowercase. Also, if there are any custom query tools, ensure
inputs are downcased.

Span and service name queries default to look back 24hrs (2 index days).
This can be controlled by `ElasticsearchHttpStorage.Builder.namesLookback`

#### Index format
Starting with Zipkin 1.23, service and span names are written to the
same daily indexes as spans and dependency links as the document type
"servicespan". This was added for performance reasons as formerly search
was using relatively expensive nested queries.

The documents themselves represent service and span name pairs. Only one
document is present per daily index. This is to keep the documents from
repeating at a multiplier of span count, which also simplifies query.
This deduplication is enforced at write time by using an ID convention
of the service and span name. Ex. `id = MyServiceName|MySpanName`

The document is a simple structure, like:
```json
{
"serviceName": "MyServiceName",
"spanName": "MySpanName",
}
```

The document does replicate data in the ID, but it is needed as you
cannot query based on an ID expression.

#### Notes for data written prior to Zipkin 1.23
Before Zipkin 1.23, service and span names were nested queries against
the span type. This was an expensive operation, which resulted in high
latency particularly when the UI loads. When the "servicespan" type is
missing from an index, or there's no results returned, a fallback nested
query is invoked.

## Customizing the ingest pipeline

When using Elasticsearch 5.x, you can setup an [ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/master/pipeline.html)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import zipkin.storage.elasticsearch.http.internal.LenientDoubleCallbackAsyncSpanStore;
import zipkin.storage.elasticsearch.http.internal.client.HttpCall;

import static zipkin.internal.Util.checkArgument;
import static zipkin.internal.Util.checkNotNull;
import static zipkin.moshi.JsonReaders.enterPath;
import static zipkin.storage.elasticsearch.http.ElasticsearchHttpSpanStore.DEPENDENCY;
Expand Down Expand Up @@ -68,9 +67,7 @@ public static Builder builder(OkHttpClient client) {
.namesLookback(86400000)
.shutdownClientOnClose(false)
.flushOnWrites(false)
.singleTypeIndexingEnabled(
Boolean.valueOf(System.getenv("ES_EXPERIMENTAL_SPAN2"))
);
.legacyReadsEnabled(true);
}

public static Builder builder() {
Expand Down Expand Up @@ -127,6 +124,9 @@ public final Builder hosts(final List<String> hosts) {
*/
public abstract Builder namesLookback(int namesLookback);

/** When true, Redundantly queries indexes made with pre v1.31 collectors. Defaults to true. */
public abstract Builder legacyReadsEnabled(boolean legacyReadsEnabled);

/** Visible for testing */
abstract Builder flushOnWrites(boolean flushOnWrites);

Expand Down Expand Up @@ -171,9 +171,6 @@ public final Builder dateSeparator(char dateSeparator) {
*/
public abstract Builder indexReplicas(int indexReplicas);

/** intentionally hidden for now */
abstract Builder singleTypeIndexingEnabled(boolean singleTypeIndexingEnabled);

@Override public abstract Builder strictTraceId(boolean strictTraceId);

@Override public abstract ElasticsearchHttpStorage build();
Expand Down Expand Up @@ -206,18 +203,16 @@ public final Builder dateSeparator(char dateSeparator) {

abstract int namesLookback();

abstract boolean singleTypeIndexingEnabled();
abstract boolean legacyReadsEnabled();

@Override public SpanStore spanStore() {
return StorageAdapters.asyncToBlocking(asyncSpanStore());
}

@Override public AsyncSpanStore asyncSpanStore() {
float version = ensureIndexTemplates().version();
if (version >= 6) { // then multi-type (legacy) index isn't possible
if (version >= 6 /* multi-type (legacy) index isn't possible */ || !legacyReadsEnabled()) {
return new ElasticsearchHttpSpanStore(this);
} else if (version < 2 || !singleTypeIndexingEnabled()) { // don't fan out queries unnecessarily
return new LegacyElasticsearchHttpSpanStore(this);
} else { // fan out queries as we don't know if old legacy collectors are in use
return new LenientDoubleCallbackAsyncSpanStore(
new ElasticsearchHttpSpanStore(this),
Expand All @@ -227,23 +222,15 @@ public final Builder dateSeparator(char dateSeparator) {
}

@Override public AsyncSpanConsumer asyncSpanConsumer() {
// We only write once, so we detect which approach we should take
if (shouldUseSingleTypeIndexing(ensureIndexTemplates())) {
return new ElasticsearchHttpSpanConsumer(this);
} else {
return new LegacyElasticsearchHttpSpanConsumer(this);
}
ensureIndexTemplates();
return new ElasticsearchHttpSpanConsumer(this);
}

/** This is a blocking call, only used in tests. */
void clear() throws IOException {
Set<String> toClear = new LinkedHashSet<>();
if (shouldUseSingleTypeIndexing(ensureIndexTemplates())) {
toClear.add(indexNameFormatter().formatType(SPAN));
toClear.add(indexNameFormatter().formatType(DEPENDENCY));
} else {
toClear.add(indexNameFormatter().formatType(null));
}
toClear.add(indexNameFormatter().formatType(SPAN));
toClear.add(indexNameFormatter().formatType(DEPENDENCY));
for (String index : toClear) clear(index);
}

Expand Down Expand Up @@ -298,24 +285,12 @@ CheckResult ensureClusterReady(String index) {
IndexTemplates ensureIndexTemplates() {
String index = indexNameFormatter().index();
IndexTemplates templates = new VersionSpecificTemplates(this).get(http());
if (shouldUseSingleTypeIndexing(templates)) {
EnsureIndexTemplate.apply(http(), index + ":" + SPAN + "_template", templates.span());
EnsureIndexTemplate.apply(http(), index + ":" + DEPENDENCY + "_template",
templates.dependency());
} else { // TODO: remove when we stop writing span1 format
checkArgument(templates.legacy() != null,
"multiple type template is null: version=%s, singleTypeIndexingEnabled=%s",
templates.version(), singleTypeIndexingEnabled());
EnsureIndexTemplate.apply(http(), index + "_template", templates.legacy());
}
EnsureIndexTemplate.apply(http(), index + ":" + SPAN + "_template", templates.span());
EnsureIndexTemplate.apply(http(), index + ":" + DEPENDENCY + "_template",
templates.dependency());
return templates;
}

private boolean shouldUseSingleTypeIndexing(IndexTemplates templates) {
return (templates.span() != null && singleTypeIndexingEnabled())
|| templates.version() >= 6;
}

@Memoized // hosts resolution might imply a network call, and we might make a new okhttp instance
HttpCall.Factory http() {
List<String> hosts = hostsSupplier().get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
package zipkin.storage.elasticsearch.http;

import com.google.auto.value.AutoValue;
import zipkin.internal.Nullable;

@AutoValue
abstract class IndexTemplates {
Expand All @@ -24,18 +23,13 @@ static Builder builder() {

abstract float version();

/** null when multi-type indexes are not supported */
@Nullable abstract String legacy();

abstract String span();

abstract String dependency();

@AutoValue.Builder interface Builder {
Builder version(float version);

Builder legacy(@Nullable String legacy);

Builder span(String span);

Builder dependency(String dependency);
Expand Down
Loading

0 comments on commit 9adcf55

Please sign in to comment.