Skip to content

Commit

Permalink
semantic_text - extract Index Metadata inference information to separ…
Browse files Browse the repository at this point in the history
…ate class (#106328)
  • Loading branch information
carlosdelest authored Mar 19, 2024
1 parent db67976 commit 3ca808b
Show file tree
Hide file tree
Showing 16 changed files with 347 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.util.Collections.emptyList;
import static java.util.Collections.emptySet;
import static org.elasticsearch.cluster.metadata.AliasMetadata.newAliasMetadataBuilder;
import static org.elasticsearch.cluster.metadata.IndexMetadataTests.randomFieldInferenceMetadata;
import static org.elasticsearch.cluster.routing.RandomShardRoutingMutator.randomChange;
import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder;
import static org.elasticsearch.cluster.routing.UnassignedInfoTests.randomUnassignedInfo;
Expand Down Expand Up @@ -587,33 +587,13 @@ public IndexMetadata randomChange(IndexMetadata part) {
builder.settings(Settings.builder().put(part.getSettings()).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0));
break;
case 3:
builder.fieldsForModels(randomFieldsForModels());
builder.fieldInferenceMetadata(randomFieldInferenceMetadata(true));
break;
default:
throw new IllegalArgumentException("Shouldn't be here");
}
return builder.build();
}

/**
* Generates a random fieldsForModels map
*/
private Map<String, Set<String>> randomFieldsForModels() {
if (randomBoolean()) {
return null;
}

Map<String, Set<String>> fieldsForModels = new HashMap<>();
for (int i = 0; i < randomIntBetween(0, 5); i++) {
Set<String> fields = new HashSet<>();
for (int j = 0; j < randomIntBetween(1, 4); j++) {
fields.add(randomAlphaOfLengthBetween(4, 10));
}
fieldsForModels.put(randomAlphaOfLengthBetween(4, 10), fields);
}

return fieldsForModels;
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.action.support.RefCountingRunnable;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.FieldInferenceMetadata;
import org.elasticsearch.common.TriConsumer;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.index.shard.ShardId;
Expand Down Expand Up @@ -75,11 +76,13 @@ public static void getInstance(
Set<ShardId> shardIds,
ActionListener<BulkShardRequestInferenceProvider> listener
) {
Set<String> inferenceIds = new HashSet<>();
shardIds.stream().map(ShardId::getIndex).collect(Collectors.toSet()).stream().forEach(index -> {
var fieldsForModels = clusterState.metadata().index(index).getFieldsForModels();
inferenceIds.addAll(fieldsForModels.keySet());
});
Set<String> inferenceIds = shardIds.stream()
.map(ShardId::getIndex)
.collect(Collectors.toSet())
.stream()
.map(index -> clusterState.metadata().index(index).getFieldInferenceMetadata().getFieldInferenceOptions().values())
.flatMap(o -> o.stream().map(FieldInferenceMetadata.FieldInferenceOptions::inferenceId))
.collect(Collectors.toSet());
final Map<String, InferenceProvider> inferenceProviderMap = new ConcurrentHashMap<>();
Runnable onModelLoadingComplete = () -> listener.onResponse(
new BulkShardRequestInferenceProvider(clusterState, inferenceProviderMap)
Expand Down Expand Up @@ -134,11 +137,11 @@ public void processBulkShardRequest(
BiConsumer<BulkItemRequest, Exception> onBulkItemFailure
) {

Map<String, Set<String>> fieldsForModels = clusterState.metadata()
.index(bulkShardRequest.shardId().getIndex())
.getFieldsForModels();
Map<String, Set<String>> fieldsForInferenceIds = getFieldsForInferenceIds(
clusterState.metadata().index(bulkShardRequest.shardId().getIndex()).getFieldInferenceMetadata().getFieldInferenceOptions()
);
// No inference fields? Terminate early
if (fieldsForModels.isEmpty()) {
if (fieldsForInferenceIds.isEmpty()) {
listener.onResponse(bulkShardRequest);
return;
}
Expand Down Expand Up @@ -176,7 +179,7 @@ public void processBulkShardRequest(
if (bulkItemRequest != null) {
performInferenceOnBulkItemRequest(
bulkItemRequest,
fieldsForModels,
fieldsForInferenceIds,
i,
onBulkItemFailureWithIndex,
bulkItemReqRef.acquire()
Expand All @@ -186,6 +189,22 @@ public void processBulkShardRequest(
}
}

private static Map<String, Set<String>> getFieldsForInferenceIds(
Map<String, FieldInferenceMetadata.FieldInferenceOptions> fieldInferenceMap
) {
Map<String, Set<String>> fieldsForInferenceIdsMap = new HashMap<>();
for (Map.Entry<String, FieldInferenceMetadata.FieldInferenceOptions> entry : fieldInferenceMap.entrySet()) {
String fieldName = entry.getKey();
String inferenceId = entry.getValue().inferenceId();

// Get or create the set associated with the inferenceId
Set<String> fields = fieldsForInferenceIdsMap.computeIfAbsent(inferenceId, k -> new HashSet<>());
fields.add(fieldName);
}

return fieldsForInferenceIdsMap;
}

@SuppressWarnings("unchecked")
private void performInferenceOnBulkItemRequest(
BulkItemRequest bulkItemRequest,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.cluster.metadata;

import org.elasticsearch.cluster.Diff;
import org.elasticsearch.cluster.Diffable;
import org.elasticsearch.cluster.DiffableUtils;
import org.elasticsearch.cluster.SimpleDiffable;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
* Contains field inference information. This is necessary to add to cluster state as inference can be calculated in the coordinator
* node, which not necessarily has mapping information.
*/
public class FieldInferenceMetadata implements Diffable<FieldInferenceMetadata>, ToXContentFragment {

private final ImmutableOpenMap<String, FieldInferenceOptions> fieldInferenceOptions;

public static final FieldInferenceMetadata EMPTY = new FieldInferenceMetadata(ImmutableOpenMap.of());

public FieldInferenceMetadata(MappingLookup mappingLookup) {
ImmutableOpenMap.Builder<String, FieldInferenceOptions> builder = ImmutableOpenMap.builder();
mappingLookup.getInferenceIdsForFields().entrySet().forEach(entry -> {
builder.put(entry.getKey(), new FieldInferenceOptions(entry.getValue(), mappingLookup.sourcePaths(entry.getKey())));
});
fieldInferenceOptions = builder.build();
}

public FieldInferenceMetadata(StreamInput in) throws IOException {
fieldInferenceOptions = in.readImmutableOpenMap(StreamInput::readString, FieldInferenceOptions::new);
}

public FieldInferenceMetadata(Map<String, FieldInferenceOptions> fieldsToInferenceMap) {
fieldInferenceOptions = ImmutableOpenMap.builder(fieldsToInferenceMap).build();
}

public ImmutableOpenMap<String, FieldInferenceOptions> getFieldInferenceOptions() {
return fieldInferenceOptions;
}

public boolean isEmpty() {
return fieldInferenceOptions.isEmpty();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeMap(fieldInferenceOptions, (o, v) -> v.writeTo(o));
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.map(fieldInferenceOptions);
return builder;
}

public static FieldInferenceMetadata fromXContent(XContentParser parser) throws IOException {
return new FieldInferenceMetadata(parser.map(HashMap::new, FieldInferenceOptions::fromXContent));
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FieldInferenceMetadata that = (FieldInferenceMetadata) o;
return Objects.equals(fieldInferenceOptions, that.fieldInferenceOptions);
}

@Override
public int hashCode() {
return Objects.hash(fieldInferenceOptions);
}

@Override
public Diff<FieldInferenceMetadata> diff(FieldInferenceMetadata previousState) {
if (previousState == null) {
previousState = EMPTY;
}
return new FieldInferenceMetadataDiff(previousState, this);
}

static class FieldInferenceMetadataDiff implements Diff<FieldInferenceMetadata> {

public static final FieldInferenceMetadataDiff EMPTY = new FieldInferenceMetadataDiff(
FieldInferenceMetadata.EMPTY,
FieldInferenceMetadata.EMPTY
);

private final Diff<ImmutableOpenMap<String, FieldInferenceOptions>> fieldInferenceMapDiff;

private static final DiffableUtils.DiffableValueReader<String, FieldInferenceOptions> FIELD_INFERENCE_DIFF_VALUE_READER =
new DiffableUtils.DiffableValueReader<>(FieldInferenceOptions::new, FieldInferenceMetadataDiff::readDiffFrom);

FieldInferenceMetadataDiff(FieldInferenceMetadata before, FieldInferenceMetadata after) {
fieldInferenceMapDiff = DiffableUtils.diff(
before.fieldInferenceOptions,
after.fieldInferenceOptions,
DiffableUtils.getStringKeySerializer(),
FIELD_INFERENCE_DIFF_VALUE_READER
);
}

FieldInferenceMetadataDiff(StreamInput in) throws IOException {
fieldInferenceMapDiff = DiffableUtils.readImmutableOpenMapDiff(
in,
DiffableUtils.getStringKeySerializer(),
FIELD_INFERENCE_DIFF_VALUE_READER
);
}

public static Diff<FieldInferenceOptions> readDiffFrom(StreamInput in) throws IOException {
return SimpleDiffable.readDiffFrom(FieldInferenceOptions::new, in);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
fieldInferenceMapDiff.writeTo(out);
}

@Override
public FieldInferenceMetadata apply(FieldInferenceMetadata part) {
return new FieldInferenceMetadata(fieldInferenceMapDiff.apply(part.fieldInferenceOptions));
}
}

public record FieldInferenceOptions(String inferenceId, Set<String> sourceFields)
implements
SimpleDiffable<FieldInferenceOptions>,
ToXContentFragment {

public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
public static final ParseField SOURCE_FIELDS_FIELD = new ParseField("source_fields");

FieldInferenceOptions(StreamInput in) throws IOException {
this(in.readString(), in.readCollectionAsImmutableSet(StreamInput::readString));
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(inferenceId);
out.writeStringCollection(sourceFields);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(INFERENCE_ID_FIELD.getPreferredName(), inferenceId);
builder.field(SOURCE_FIELDS_FIELD.getPreferredName(), sourceFields);
builder.endObject();
return builder;
}

public static FieldInferenceOptions fromXContent(XContentParser parser) throws IOException {
return PARSER.parse(parser, null);
}

@SuppressWarnings("unchecked")
private static final ConstructingObjectParser<FieldInferenceOptions, Void> PARSER = new ConstructingObjectParser<>(
"field_inference_parser",
false,
(args, unused) -> new FieldInferenceOptions((String) args[0], new HashSet<>((List<String>) args[1]))
);

static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), INFERENCE_ID_FIELD);
PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), SOURCE_FIELDS_FIELD);
}
}
}
Loading

0 comments on commit 3ca808b

Please sign in to comment.