Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature/semantic-text] Simplify the integration of the field inference metadata in IndexMetadata #106743

Merged
merged 25 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7aaa3b6
Revert "Extract interface from ModelRegistry so it can be used from s…
jimczi Mar 7, 2024
ebc26d2
inference as an action filter
jimczi Mar 14, 2024
86ddc9d
add more tests
jimczi Mar 18, 2024
dd73d01
Merge branch 'feature/semantic-text' into carlosdelest/semantic-text-…
carlosdelest Mar 19, 2024
c5de0da
Merge from feature branch
carlosdelest Mar 19, 2024
b2b8635
Refactor the semantic_text field so that it can registers all the sub…
jimczi Mar 19, 2024
64e8e43
Merge branch 'bulk_inference_ref' into register_semantic_text
jimczi Mar 19, 2024
1c18fbc
Refatcor the semantic_text to register its sub fields in the mapping …
jimczi Mar 20, 2024
38f82fd
Merge remote-tracking branch 'upstream/feature/semantic-text' into re…
jimczi Mar 20, 2024
7b578d1
add task_type validation
jimczi Mar 20, 2024
2be50d7
address review comments
jimczi Mar 20, 2024
eb4731f
remove unused
jimczi Mar 20, 2024
b3fb5d3
address review comments
jimczi Mar 21, 2024
8ddc37f
Fix the mapper builder context when updating the semantic text field …
jimczi Mar 21, 2024
b3ae284
string formatting error
jimczi Mar 21, 2024
2e7fc7f
results => chunks renaming
jimczi Mar 22, 2024
b3e7813
Simplify the inference metadata support in IndexMetadata
jimczi Mar 26, 2024
5a50b7d
Merge remote-tracking branch 'upstream/feature/semantic-text' into in…
jimczi Mar 26, 2024
11112c4
checkstyle
jimczi Mar 26, 2024
8fe9ed4
fix compilation
jimczi Mar 27, 2024
3c878bb
fix double writeTo
jimczi Mar 27, 2024
254a797
Merge remote-tracking branch 'upstream/feature/semantic-text' into in…
jimczi Mar 27, 2024
1689da4
Merge remote-tracking branch 'upstream/feature/semantic-text' into in…
jimczi Mar 28, 2024
c66af79
fix wrong assertion
jimczi Mar 28, 2024
9f04e6b
restore batching lost in the previous refactor
jimczi Mar 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ private void executeBulkRequestsByShard(Map<ShardId, List<BulkItemRequest>> requ
requests.toArray(new BulkItemRequest[0])
);
var indexMetadata = clusterState.getMetadata().index(shardId.getIndexName());
if (indexMetadata != null && indexMetadata.getFieldInferenceMetadata().isEmpty() == false) {
bulkShardRequest.setFieldInferenceMetadata(indexMetadata.getFieldInferenceMetadata());
if (indexMetadata != null && indexMetadata.getInferenceFields().isEmpty() == false) {
bulkShardRequest.setInferenceFieldMap(indexMetadata.getInferenceFields());
}
bulkShardRequest.waitForActiveShards(bulkRequest.waitForActiveShards());
bulkShardRequest.timeout(bulkRequest.timeout());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
import org.elasticsearch.action.support.replication.ReplicatedWriteRequest;
import org.elasticsearch.action.support.replication.ReplicationRequest;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.cluster.metadata.FieldInferenceMetadata;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.RawIndexingDataTransportRequest;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

public final class BulkShardRequest extends ReplicatedWriteRequest<BulkShardRequest>
Expand All @@ -34,7 +35,7 @@ public final class BulkShardRequest extends ReplicatedWriteRequest<BulkShardRequ

private final BulkItemRequest[] items;

private transient FieldInferenceMetadata fieldsInferenceMetadataMap = null;
private transient Map<String, InferenceFieldMetadata> inferenceFieldMap = null;

public BulkShardRequest(StreamInput in) throws IOException {
super(in);
Expand All @@ -51,24 +52,24 @@ public BulkShardRequest(ShardId shardId, RefreshPolicy refreshPolicy, BulkItemRe
* Public for test
* Set the transient metadata indicating that this request requires running inference before proceeding.
*/
public void setFieldInferenceMetadata(FieldInferenceMetadata fieldsInferenceMetadata) {
this.fieldsInferenceMetadataMap = fieldsInferenceMetadata;
public void setInferenceFieldMap(Map<String, InferenceFieldMetadata> fieldInferenceMap) {
this.inferenceFieldMap = fieldInferenceMap;
}

/**
* Consumes the inference metadata to execute inference on the bulk items just once.
*/
public FieldInferenceMetadata consumeFieldInferenceMetadata() {
FieldInferenceMetadata ret = fieldsInferenceMetadataMap;
fieldsInferenceMetadataMap = null;
public Map<String, InferenceFieldMetadata> consumeInferenceFieldMap() {
Map<String, InferenceFieldMetadata> ret = inferenceFieldMap;
inferenceFieldMap = null;
return ret;
}

/**
* Public for test
*/
public FieldInferenceMetadata getFieldsInferenceMetadataMap() {
return fieldsInferenceMetadataMap;
public Map<String, InferenceFieldMetadata> getInferenceFieldMap() {
return inferenceFieldMap;
}

public long totalSizeInBytes() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.VersionConflictEngineException;
import org.elasticsearch.index.mapper.InferenceFieldMapper;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
Expand Down Expand Up @@ -184,7 +185,7 @@ protected void shardOperation(final UpdateRequest request, final ActionListener<
final UpdateHelper.Result result = updateHelper.prepare(request, indexShard, threadPool::absoluteTimeInMillis);
switch (result.getResponseResult()) {
case CREATED -> {
IndexRequest upsertRequest = result.action();
IndexRequest upsertRequest = removeInferenceMetadataField(indexService, result.action());
// we fetch it from the index request so we don't generate the bytes twice, its already done in the index request
final BytesReference upsertSourceBytes = upsertRequest.source();
client.bulk(
Expand Down Expand Up @@ -226,7 +227,7 @@ protected void shardOperation(final UpdateRequest request, final ActionListener<
);
}
case UPDATED -> {
IndexRequest indexRequest = result.action();
IndexRequest indexRequest = removeInferenceMetadataField(indexService, result.action());
// we fetch it from the index request so we don't generate the bytes twice, its already done in the index request
final BytesReference indexSourceBytes = indexRequest.source();
client.bulk(
Expand Down Expand Up @@ -335,4 +336,15 @@ private void handleUpdateFailureWithRetry(
}
listener.onFailure(cause instanceof Exception ? (Exception) cause : new NotSerializableExceptionWrapper(cause));
}

private IndexRequest removeInferenceMetadataField(IndexService service, IndexRequest request) {
var inferenceMetadata = service.getIndexSettings().getIndexMetadata().getInferenceFields();
if (inferenceMetadata.isEmpty()) {
return request;
}
Map<String, Object> docMap = request.sourceAsMap();
docMap.remove(InferenceFieldMapper.NAME);
request.source(docMap);
return request;
}
}

This file was deleted.

Loading