Skip to content

Commit

Permalink
change input output formats for question answering model
Browse files Browse the repository at this point in the history
Signed-off-by: Bhavana Ramaram <[email protected]>
  • Loading branch information
rbhavna committed Mar 18, 2024
1 parent 0f3fe61 commit 3ec4e8c
Show file tree
Hide file tree
Showing 11 changed files with 390 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ public enum MLInputDataType {
DATA_FRAME,
TEXT_DOCS,
REMOTE,
TEXT_SIMILARITY
TEXT_SIMILARITY,
QUESTION_ANSWERING
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright 2023 Aryn
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opensearch.ml.common.dataset;

import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.ml.common.annotation.InputDataSet;

import java.io.IOException;
import java.util.Objects;

@Getter
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@InputDataSet(MLInputDataType.QUESTION_ANSWERING)
public class QuestionAnsweringInputDataSet extends MLInputDataset {

String question;

String context;

@Builder(toBuilder = true)
public QuestionAnsweringInputDataSet(String question, String context) {
super(MLInputDataType.QUESTION_ANSWERING);
if(question == null) {
throw new IllegalArgumentException("Question is not provided");
}
if(context == null) {
throw new IllegalArgumentException("Context is not provided");
}
this.question = question;
this.context = context;
}

public QuestionAnsweringInputDataSet(StreamInput in) throws IOException {
super(MLInputDataType.TEXT_SIMILARITY);
this.question = in.readString();
this.context = in.readString();
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeString(question);
out.writeString(context);
}
}
27 changes: 26 additions & 1 deletion common/src/main/java/org/opensearch/ml/common/input/MLInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.opensearch.ml.common.dataframe.DataFrame;
import org.opensearch.ml.common.dataframe.DefaultDataFrame;
import org.opensearch.ml.common.dataset.DataFrameInputDataset;
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet;
import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet;
import org.opensearch.ml.common.output.model.ModelResultFilter;
import org.opensearch.ml.common.dataset.MLInputDataset;
Expand Down Expand Up @@ -63,6 +64,12 @@ public class MLInput implements Input {
public static final String QUERY_TEXT_FIELD = "query_text";
public static final String PARAMETERS_FIELD = "parameters";

// Input question in question answering model
public static final String QUESTION_FIELD = "question";

// Input context in question answering model
public static final String CONTEXT_FIELD = "context";

// Algorithm name
protected FunctionName algorithm;
// ML algorithm parameters
Expand Down Expand Up @@ -178,6 +185,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.endArray();
}
break;
case QUESTION_ANSWERING:
QuestionAnsweringInputDataSet qaInputDataSet = (QuestionAnsweringInputDataSet) this.inputDataset;
String question = qaInputDataSet.getQuestion();
String context = qaInputDataSet.getContext();
builder.field(QUESTION_FIELD, question);
builder.field(CONTEXT_FIELD, context);
break;

Check warning on line 194 in common/src/main/java/org/opensearch/ml/common/input/MLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/MLInput.java#L189-L194

Added lines #L189 - L194 were not covered by tests
case REMOTE:
RemoteInferenceInputDataSet remoteInferenceInputDataSet = (RemoteInferenceInputDataSet) this.inputDataset;
Map<String, String> parameters = remoteInferenceInputDataSet.getParameters();
Expand Down Expand Up @@ -213,6 +227,8 @@ public static MLInput parse(XContentParser parser, String inputAlgoName) throws
List<Integer> targetResponsePositions = new ArrayList<>();
List<String> textDocs = new ArrayList<>();
String queryText = null;
String question = null;
String context = null;

ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
Expand Down Expand Up @@ -263,19 +279,28 @@ public static MLInput parse(XContentParser parser, String inputAlgoName) throws
case QUERY_TEXT_FIELD:
queryText = parser.text();
break;
case QUESTION_FIELD:
question = parser.text();
break;

Check warning on line 284 in common/src/main/java/org/opensearch/ml/common/input/MLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/MLInput.java#L283-L284

Added lines #L283 - L284 were not covered by tests
case CONTEXT_FIELD:
context = parser.text();
break;

Check warning on line 287 in common/src/main/java/org/opensearch/ml/common/input/MLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/MLInput.java#L286-L287

Added lines #L286 - L287 were not covered by tests
default:
parser.skipChildren();
break;
}
}
MLInputDataset inputDataSet = null;
if (algorithm == FunctionName.TEXT_EMBEDDING || algorithm == FunctionName.SPARSE_ENCODING || algorithm == FunctionName.SPARSE_TOKENIZE || algorithm == FunctionName.QUESTION_ANSWERING) {
if (algorithm == FunctionName.TEXT_EMBEDDING || algorithm == FunctionName.SPARSE_ENCODING || algorithm == FunctionName.SPARSE_TOKENIZE) {
ModelResultFilter filter = new ModelResultFilter(returnBytes, returnNumber, targetResponse, targetResponsePositions);
inputDataSet = new TextDocsInputDataSet(textDocs, filter);
}
if (algorithm == FunctionName.TEXT_SIMILARITY) {
inputDataSet = new TextSimilarityInputDataSet(queryText, textDocs);
}
if (algorithm == FunctionName.QUESTION_ANSWERING) {
inputDataSet = new QuestionAnsweringInputDataSet(question, context);

Check warning on line 302 in common/src/main/java/org/opensearch/ml/common/input/MLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/MLInput.java#L302

Added line #L302 was not covered by tests
}
return new MLInput(algorithm, mlParameters, searchSourceBuilder, sourceIndices, dataFrame, inputDataSet);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright 2023 Aryn
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opensearch.ml.common.input.nlp;

import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.ml.common.FunctionName;
import org.opensearch.ml.common.dataset.MLInputDataset;
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet;
import org.opensearch.ml.common.input.MLInput;

import java.io.IOException;

import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken;


/**
* MLInput which supports a text similarity algorithm
* Inputs are a query and a list of texts. Outputs are real numbers
* Use this for Cross Encoder models
*/
@org.opensearch.ml.common.annotation.MLInput(functionNames = {FunctionName.QUESTION_ANSWERING})
public class QuestionAnsweringMLInput extends MLInput {

public QuestionAnsweringMLInput(FunctionName algorithm, MLInputDataset dataset) {
super(algorithm, null, dataset);
}

public QuestionAnsweringMLInput(StreamInput in) throws IOException {
super(in);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(ALGORITHM_FIELD, algorithm.name());
if(parameters != null) {
builder.field(ML_PARAMETERS_FIELD, parameters);

Check warning on line 60 in common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java#L60

Added line #L60 was not covered by tests
}
if(inputDataset != null) {
QuestionAnsweringInputDataSet ds = (QuestionAnsweringInputDataSet) this.inputDataset;
String question = ds.getQuestion();
String context = ds.getContext();
builder.field(QUESTION_FIELD, question);
builder.field(CONTEXT_FIELD, context);
}
builder.endObject();
return builder;
}

public QuestionAnsweringMLInput(XContentParser parser, FunctionName functionName) throws IOException {
super();
this.algorithm = functionName;
String question = null;
String context = null;

ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
parser.nextToken();

switch (fieldName) {
case QUESTION_FIELD:
question = parser.text();
case CONTEXT_FIELD:
context = parser.text();
default:
parser.skipChildren();
break;
}
}
if(question == null) {
throw new IllegalArgumentException("Question is not provided");

Check warning on line 95 in common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java#L95

Added line #L95 was not covered by tests
}
if(context == null) {
throw new IllegalArgumentException("Context is not provided");

Check warning on line 98 in common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java

View check run for this annotation

Codecov / codecov/patch

common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java#L98

Added line #L98 was not covered by tests
}
inputDataset = new QuestionAnsweringInputDataSet(question, context);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ private void testClassLoader_MLInput_DlModel(FunctionName functionName) throws I
@Test
public void testClassLoader_MLInput() throws IOException {
testClassLoader_MLInput_DlModel(FunctionName.TEXT_EMBEDDING);
testClassLoader_MLInput_DlModel(FunctionName.QUESTION_ANSWERING);
testClassLoader_MLInput_DlModel(FunctionName.SPARSE_ENCODING);
testClassLoader_MLInput_DlModel(FunctionName.SPARSE_TOKENIZE);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2023 Aryn
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opensearch.ml.common.dataset;

import org.junit.Test;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.common.io.stream.BytesStreamInput;
import org.opensearch.core.common.io.stream.OutputStreamStreamOutput;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;

import java.io.IOException;
import java.util.List;

import static org.junit.Assert.assertThrows;

public class QuestionAnsweringInputDatasetTest {

@Test
public void testStreaming() throws IOException {
String question = "What color is apple";
String context = "I like Apples. They are red";
QuestionAnsweringInputDataSet dataset = QuestionAnsweringInputDataSet.builder().question(question).context(context).build();
BytesStreamOutput outbytes = new BytesStreamOutput();
StreamOutput osso = new OutputStreamStreamOutput(outbytes);
dataset.writeTo(osso);
StreamInput in = new BytesStreamInput(BytesReference.toBytes(outbytes.bytes()));
QuestionAnsweringInputDataSet newDs = (QuestionAnsweringInputDataSet) MLInputDataset.fromStream(in);
assert (question.equals("What color is apple"));
assert (context.equals("I like Apples. They are red"));
}

@Test
public void noContext_ThenFail() {
String question = "What color is apple";
IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
() -> QuestionAnsweringInputDataSet.builder().question(question).build());
assert (e.getMessage().equals("Context is not provided"));
}

@Test
public void noQuestion_ThenFail() {
String context = "I like Apples. They are red";
assertThrows(IllegalArgumentException.class,
() -> QuestionAnsweringInputDataSet.builder().context(context).build());
}
}
Loading

0 comments on commit 3ec4e8c

Please sign in to comment.