Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support question answering model #2208

Merged
merged 5 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public enum FunctionName {
SPARSE_ENCODING,
SPARSE_TOKENIZE,
TEXT_SIMILARITY,
QUESTION_ANSWERING,
AGENT;

public static FunctionName from(String value) {
Expand All @@ -42,7 +43,8 @@ public static FunctionName from(String value) {
TEXT_EMBEDDING,
TEXT_SIMILARITY,
SPARSE_ENCODING,
SPARSE_TOKENIZE
SPARSE_TOKENIZE,
QUESTION_ANSWERING
));

/**
Expand Down
5 changes: 5 additions & 0 deletions common/src/main/java/org/opensearch/ml/common/MLModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.opensearch.ml.common.controller.MLRateLimiter;
import org.opensearch.ml.common.model.MLModelFormat;
import org.opensearch.ml.common.model.MLModelState;
import org.opensearch.ml.common.model.QuestionAnsweringModelConfig;
import org.opensearch.ml.common.model.TextEmbeddingModelConfig;
import org.opensearch.ml.common.model.MetricsCorrelationModelConfig;

Expand Down Expand Up @@ -219,6 +220,8 @@ public MLModel(StreamInput input) throws IOException {
if (input.readBoolean()) {
if (algorithm.equals(FunctionName.METRICS_CORRELATION)) {
modelConfig = new MetricsCorrelationModelConfig(input);
} else if (algorithm.equals(FunctionName.QUESTION_ANSWERING)) {
modelConfig = new QuestionAnsweringModelConfig(input);
} else {
modelConfig = new TextEmbeddingModelConfig(input);
}
Expand Down Expand Up @@ -527,6 +530,8 @@ public static MLModel parse(XContentParser parser, String algorithmName) throws
case MODEL_CONFIG_FIELD:
if (FunctionName.METRICS_CORRELATION.name().equals(algorithmName)) {
modelConfig = MetricsCorrelationModelConfig.parse(parser);
} else if (FunctionName.QUESTION_ANSWERING.name().equals(algorithmName)) {
modelConfig = QuestionAnsweringModelConfig.parse(parser);
} else {
modelConfig = TextEmbeddingModelConfig.parse(parser);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ public enum MLInputDataType {
DATA_FRAME,
TEXT_DOCS,
REMOTE,
TEXT_SIMILARITY
TEXT_SIMILARITY,
QUESTION_ANSWERING
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.ml.common.dataset;

import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.ml.common.annotation.InputDataSet;

import java.io.IOException;

@Getter
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@InputDataSet(MLInputDataType.QUESTION_ANSWERING)
public class QuestionAnsweringInputDataSet extends MLInputDataset {

String question;

String context;

@Builder(toBuilder = true)
public QuestionAnsweringInputDataSet(String question, String context) {
super(MLInputDataType.QUESTION_ANSWERING);
if(question == null) {
throw new IllegalArgumentException("Question is not provided");
}
if(context == null) {
throw new IllegalArgumentException("Context is not provided");
}
this.question = question;
this.context = context;
}

public QuestionAnsweringInputDataSet(StreamInput in) throws IOException {
super(MLInputDataType.QUESTION_ANSWERING);
this.question = in.readString();
this.context = in.readString();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeString(question);
out.writeString(context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.opensearch.ml.common.dataframe.DataFrame;
import org.opensearch.ml.common.dataframe.DefaultDataFrame;
import org.opensearch.ml.common.dataset.DataFrameInputDataset;
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet;
import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet;
import org.opensearch.ml.common.output.model.ModelResultFilter;
import org.opensearch.ml.common.dataset.MLInputDataset;
Expand Down Expand Up @@ -63,6 +64,12 @@ public class MLInput implements Input {
public static final String QUERY_TEXT_FIELD = "query_text";
public static final String PARAMETERS_FIELD = "parameters";

// Input question in question answering model
public static final String QUESTION_FIELD = "question";

// Input context in question answering model
public static final String CONTEXT_FIELD = "context";

// Algorithm name
protected FunctionName algorithm;
// ML algorithm parameters
Expand Down Expand Up @@ -178,6 +185,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.endArray();
}
break;
case QUESTION_ANSWERING:
QuestionAnsweringInputDataSet qaInputDataSet = (QuestionAnsweringInputDataSet) this.inputDataset;
String question = qaInputDataSet.getQuestion();
String context = qaInputDataSet.getContext();
builder.field(QUESTION_FIELD, question);
builder.field(CONTEXT_FIELD, context);
break;
case REMOTE:
RemoteInferenceInputDataSet remoteInferenceInputDataSet = (RemoteInferenceInputDataSet) this.inputDataset;
Map<String, String> parameters = remoteInferenceInputDataSet.getParameters();
Expand Down Expand Up @@ -213,6 +227,8 @@ public static MLInput parse(XContentParser parser, String inputAlgoName) throws
List<Integer> targetResponsePositions = new ArrayList<>();
List<String> textDocs = new ArrayList<>();
String queryText = null;
String question = null;
String context = null;

ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
Expand Down Expand Up @@ -263,6 +279,12 @@ public static MLInput parse(XContentParser parser, String inputAlgoName) throws
case QUERY_TEXT_FIELD:
queryText = parser.text();
break;
case QUESTION_FIELD:
question = parser.text();
break;
case CONTEXT_FIELD:
context = parser.text();
break;
default:
parser.skipChildren();
break;
Expand All @@ -272,9 +294,10 @@ public static MLInput parse(XContentParser parser, String inputAlgoName) throws
if (algorithm == FunctionName.TEXT_EMBEDDING || algorithm == FunctionName.SPARSE_ENCODING || algorithm == FunctionName.SPARSE_TOKENIZE) {
ModelResultFilter filter = new ModelResultFilter(returnBytes, returnNumber, targetResponse, targetResponsePositions);
inputDataSet = new TextDocsInputDataSet(textDocs, filter);
}
if (algorithm == FunctionName.TEXT_SIMILARITY) {
} else if (algorithm == FunctionName.TEXT_SIMILARITY) {
inputDataSet = new TextSimilarityInputDataSet(queryText, textDocs);
} else if (algorithm == FunctionName.QUESTION_ANSWERING) {
inputDataSet = new QuestionAnsweringInputDataSet(question, context);
}
return new MLInput(algorithm, mlParameters, searchSourceBuilder, sourceIndices, dataFrame, inputDataSet);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.ml.common.input.nlp;

import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.ml.common.FunctionName;
import org.opensearch.ml.common.dataset.MLInputDataset;
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet;
import org.opensearch.ml.common.input.MLInput;

import java.io.IOException;

import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken;


/**
* MLInput which supports a question answering algorithm
* Inputs are question and context. Output is the answer
*/
@org.opensearch.ml.common.annotation.MLInput(functionNames = {FunctionName.QUESTION_ANSWERING})
public class QuestionAnsweringMLInput extends MLInput {

public QuestionAnsweringMLInput(FunctionName algorithm, MLInputDataset dataset) {
super(algorithm, null, dataset);
}

public QuestionAnsweringMLInput(StreamInput in) throws IOException {
super(in);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(ALGORITHM_FIELD, algorithm.name());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we put algorithm field or function name field?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is algorithm since the super class MLInput has algorithm. Within the model index, it is stored as algorithm rather than function name

if(parameters != null) {
builder.field(ML_PARAMETERS_FIELD, parameters);
}
if(inputDataset != null) {
QuestionAnsweringInputDataSet ds = (QuestionAnsweringInputDataSet) this.inputDataset;
String question = ds.getQuestion();
String context = ds.getContext();
builder.field(QUESTION_FIELD, question);
builder.field(CONTEXT_FIELD, context);
}
builder.endObject();
return builder;
}

public QuestionAnsweringMLInput(XContentParser parser, FunctionName functionName) throws IOException {
super();
this.algorithm = functionName;
String question = null;
String context = null;

ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
parser.nextToken();

switch (fieldName) {
case QUESTION_FIELD:
question = parser.text();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add break;?

break;
case CONTEXT_FIELD:
context = parser.text();
break;
default:
parser.skipChildren();
break;
}
}
if(question == null) {
throw new IllegalArgumentException("Question is not provided");
}
if(context == null) {
throw new IllegalArgumentException("Context is not provided");
}
inputDataset = new QuestionAnsweringInputDataSet(question, context);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public TextSimilarityMLInput(XContentParser parser, FunctionName functionName) t
break;
case QUERY_TEXT_FIELD:
queryText = parser.text();
break;
default:
parser.skipChildren();
break;
Expand Down
Loading
Loading