Skip to content

Commit

Permalink
Allow dimension fields to have multiple values in standard and logsdb…
Browse files Browse the repository at this point in the history
… index mode (elastic#112345)

Fixes elastic#112232 Fixes
elastic#112239
  • Loading branch information
felixbarny authored and cbuescher committed Sep 4, 2024
1 parent 413ba7a commit 1f95d4b
Show file tree
Hide file tree
Showing 12 changed files with 194 additions and 65 deletions.
8 changes: 8 additions & 0 deletions docs/changelog/112345.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pr: 112345
summary: Allow dimension fields to have multiple values in standard and logsdb index
mode
area: Mapping
type: enhancement
issues:
- 112232
- 112239
4 changes: 2 additions & 2 deletions server/src/main/java/org/elasticsearch/index/IndexMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public IdFieldMapper buildIdFieldMapper(BooleanSupplier fieldDataEnabled) {

@Override
public DocumentDimensions buildDocumentDimensions(IndexSettings settings) {
return new DocumentDimensions.OnlySingleValueAllowed();
return DocumentDimensions.Noop.INSTANCE;
}

@Override
Expand Down Expand Up @@ -281,7 +281,7 @@ public MetadataFieldMapper timeSeriesRoutingHashFieldMapper() {

@Override
public DocumentDimensions buildDocumentDimensions(IndexSettings settings) {
return new DocumentDimensions.OnlySingleValueAllowed();
return DocumentDimensions.Noop.INSTANCE;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
import org.elasticsearch.index.IndexSettings;

import java.net.InetAddress;
import java.util.HashSet;
import java.util.Set;

/**
* Collects dimensions from documents.
Expand Down Expand Up @@ -49,59 +47,45 @@ default DocumentDimensions addString(String fieldName, String value) {
DocumentDimensions validate(IndexSettings settings);

/**
* Makes sure that each dimension only appears on time.
* Noop implementation that doesn't perform validations on dimension fields
*/
class OnlySingleValueAllowed implements DocumentDimensions {
private final Set<String> names = new HashSet<>();
enum Noop implements DocumentDimensions {

INSTANCE;

@Override
public DocumentDimensions addString(String fieldName, BytesRef value) {
add(fieldName);
public DocumentDimensions addString(String fieldName, BytesRef utf8Value) {
return this;
}

// Override to skip the UTF-8 conversion that happens in the default implementation
@Override
public DocumentDimensions addString(String fieldName, String value) {
add(fieldName);
return this;
}

@Override
public DocumentDimensions addIp(String fieldName, InetAddress value) {
add(fieldName);
return this;
}

@Override
public DocumentDimensions addLong(String fieldName, long value) {
add(fieldName);
return this;
}

@Override
public DocumentDimensions addUnsignedLong(String fieldName, long value) {
add(fieldName);
return this;
}

@Override
public DocumentDimensions addBoolean(String fieldName, boolean value) {
add(fieldName);
return this;
}

@Override
public DocumentDimensions validate(final IndexSettings settings) {
// DO NOTHING
public DocumentDimensions validate(IndexSettings settings) {
return this;
}

private void add(String fieldName) {
boolean isNew = names.add(fieldName);
if (false == isNew) {
throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field.");
}
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.script.BooleanFieldScript;
Expand All @@ -25,11 +26,14 @@
import org.elasticsearch.xcontent.XContentFactory;

import java.io.IOException;
import java.time.Instant;
import java.util.List;
import java.util.function.Function;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;

public class BooleanFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -257,17 +261,29 @@ public void testDimensionIndexedAndDocvalues() {
}
}

public void testDimensionMultiValuedField() throws IOException {
XContentBuilder mapping = fieldMapping(b -> {
public void testDimensionMultiValuedFieldTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
});
DocumentMapper mapper = randomBoolean() ? createDocumentMapper(mapping) : createTimeSeriesModeDocumentMapper(mapping);
}), IndexMode.TIME_SERIES);

Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", true, false))));
assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field"));
}

public void testDimensionMultiValuedFieldNonTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
}), randomFrom(IndexMode.STANDARD, IndexMode.LOGSDB));

ParsedDocument doc = mapper.parse(source(b -> {
b.array("field", true, false);
b.field("@timestamp", Instant.now());
}));
assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1)));
}

public void testDimensionInRoutingPath() throws IOException {
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "keyword").field("time_series_dimension", true)));
IndexSettings settings = createIndexSettings(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.script.IpFieldScript;
Expand All @@ -26,6 +27,7 @@

import java.io.IOException;
import java.net.InetAddress;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
Expand All @@ -35,6 +37,8 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;

public class IpFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,11 +259,11 @@ public void testDimensionIndexedAndDocvalues() {
}
}

public void testDimensionMultiValuedField() throws IOException {
public void testDimensionMultiValuedFieldTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
}));
}), IndexMode.TIME_SERIES);

Exception e = expectThrows(
DocumentParsingException.class,
Expand All @@ -268,6 +272,19 @@ public void testDimensionMultiValuedField() throws IOException {
assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field"));
}

public void testDimensionMultiValuedFieldNonTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
}), randomFrom(IndexMode.STANDARD, IndexMode.LOGSDB));

ParsedDocument doc = mapper.parse(source(b -> {
b.array("field", "192.168.1.1", "192.168.1.1");
b.field("@timestamp", Instant.now());
}));
assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1)));
}

@Override
protected String generateRandomInputValue(MappedFieldType ft) {
return NetworkAddress.format(randomIp(randomBoolean()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.AnalyzerScope;
Expand All @@ -44,6 +45,7 @@
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
Expand All @@ -57,6 +59,8 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;

public class KeywordFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -373,17 +377,29 @@ public void testDimensionIndexedAndDocvalues() {
}
}

public void testDimensionMultiValuedField() throws IOException {
XContentBuilder mapping = fieldMapping(b -> {
public void testDimensionMultiValuedFieldTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
});
DocumentMapper mapper = randomBoolean() ? createDocumentMapper(mapping) : createTimeSeriesModeDocumentMapper(mapping);
}), IndexMode.TIME_SERIES);

Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", "1234", "45678"))));
assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field"));
}

public void testDimensionMultiValuedFieldNonTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimension", true);
}), randomFrom(IndexMode.STANDARD, IndexMode.LOGSDB));

ParsedDocument doc = mapper.parse(source(b -> {
b.array("field", "1234", "45678");
b.field("@timestamp", Instant.now());
}));
assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1)));
}

public void testDimensionExtraLongKeyword() throws IOException {
DocumentMapper mapper = createTimeSeriesModeDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.DocumentMapper;
Expand All @@ -34,6 +35,7 @@
import org.junit.AssumptionViolatedException;

import java.io.IOException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -46,6 +48,8 @@
import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;

public class FlattenedFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -189,12 +193,11 @@ public void testDimensionIndexedAndDocvalues() {
}
}

public void testDimensionMultiValuedField() throws IOException {
XContentBuilder mapping = fieldMapping(b -> {
public void testDimensionMultiValuedFieldTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimensions", List.of("key1", "key2", "field3.key3"));
});
DocumentMapper mapper = randomBoolean() ? createDocumentMapper(mapping) : createTimeSeriesModeDocumentMapper(mapping);
}), IndexMode.TIME_SERIES);

Exception e = expectThrows(
DocumentParsingException.class,
Expand All @@ -203,6 +206,19 @@ public void testDimensionMultiValuedField() throws IOException {
assertThat(e.getCause().getMessage(), containsString("Dimension field [field.key1] cannot be a multi-valued field"));
}

public void testDimensionMultiValuedFieldNonTSDB() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("time_series_dimensions", List.of("key1", "key2", "field3.key3"));
}), randomFrom(IndexMode.STANDARD, IndexMode.LOGSDB));

ParsedDocument doc = mapper.parse(source(b -> {
b.array("field.key1", "value1", "value2");
b.field("@timestamp", Instant.now());
}));
assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1)));
}

public void testDisableIndex() throws Exception {

DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@ protected static String randomIndexOptions() {
return randomFrom("docs", "freqs", "positions", "offsets");
}

protected final DocumentMapper createDocumentMapper(XContentBuilder mappings, IndexMode indexMode) throws IOException {
return switch (indexMode) {
case STANDARD -> createDocumentMapper(mappings);
case TIME_SERIES -> createTimeSeriesModeDocumentMapper(mappings);
case LOGSDB -> createLogsModeDocumentMapper(mappings);
};
}

protected final DocumentMapper createDocumentMapper(XContentBuilder mappings) throws IOException {
return createMapperService(mappings).documentMapper();
}
Expand Down
Loading

0 comments on commit 1f95d4b

Please sign in to comment.