-
Notifications
You must be signed in to change notification settings - Fork 140
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
change input output formats for question answering model
Signed-off-by: Bhavana Ramaram <[email protected]>
- Loading branch information
Showing
11 changed files
with
336 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
common/src/main/java/org/opensearch/ml/common/dataset/QuestionAnsweringInputDataSet.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.ml.common.dataset; | ||
|
||
import lombok.AccessLevel; | ||
import lombok.Builder; | ||
import lombok.Getter; | ||
import lombok.experimental.FieldDefaults; | ||
import org.opensearch.core.common.io.stream.StreamInput; | ||
import org.opensearch.core.common.io.stream.StreamOutput; | ||
import org.opensearch.ml.common.annotation.InputDataSet; | ||
|
||
import java.io.IOException; | ||
|
||
@Getter | ||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) | ||
@InputDataSet(MLInputDataType.QUESTION_ANSWERING) | ||
public class QuestionAnsweringInputDataSet extends MLInputDataset { | ||
|
||
String question; | ||
|
||
String context; | ||
|
||
@Builder(toBuilder = true) | ||
public QuestionAnsweringInputDataSet(String question, String context) { | ||
super(MLInputDataType.QUESTION_ANSWERING); | ||
if(question == null) { | ||
throw new IllegalArgumentException("Question is not provided"); | ||
} | ||
if(context == null) { | ||
throw new IllegalArgumentException("Context is not provided"); | ||
} | ||
this.question = question; | ||
this.context = context; | ||
} | ||
|
||
public QuestionAnsweringInputDataSet(StreamInput in) throws IOException { | ||
super(MLInputDataType.TEXT_SIMILARITY); | ||
this.question = in.readString(); | ||
this.context = in.readString(); | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
super.writeTo(out); | ||
out.writeString(question); | ||
out.writeString(context); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 89 additions & 0 deletions
89
common/src/main/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInput.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.ml.common.input.nlp; | ||
|
||
import org.opensearch.core.common.io.stream.StreamInput; | ||
import org.opensearch.core.common.io.stream.StreamOutput; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.core.xcontent.XContentParser; | ||
import org.opensearch.ml.common.FunctionName; | ||
import org.opensearch.ml.common.dataset.MLInputDataset; | ||
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet; | ||
import org.opensearch.ml.common.input.MLInput; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; | ||
|
||
|
||
/** | ||
* MLInput which supports a question answering algorithm | ||
* Inputs are question and context. Output is the answer | ||
*/ | ||
@org.opensearch.ml.common.annotation.MLInput(functionNames = {FunctionName.QUESTION_ANSWERING}) | ||
public class QuestionAnsweringMLInput extends MLInput { | ||
|
||
public QuestionAnsweringMLInput(FunctionName algorithm, MLInputDataset dataset) { | ||
super(algorithm, null, dataset); | ||
} | ||
|
||
public QuestionAnsweringMLInput(StreamInput in) throws IOException { | ||
super(in); | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
super.writeTo(out); | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.startObject(); | ||
builder.field(ALGORITHM_FIELD, algorithm.name()); | ||
if(parameters != null) { | ||
builder.field(ML_PARAMETERS_FIELD, parameters); | ||
} | ||
if(inputDataset != null) { | ||
QuestionAnsweringInputDataSet ds = (QuestionAnsweringInputDataSet) this.inputDataset; | ||
String question = ds.getQuestion(); | ||
String context = ds.getContext(); | ||
builder.field(QUESTION_FIELD, question); | ||
builder.field(CONTEXT_FIELD, context); | ||
} | ||
builder.endObject(); | ||
return builder; | ||
} | ||
|
||
public QuestionAnsweringMLInput(XContentParser parser, FunctionName functionName) throws IOException { | ||
super(); | ||
this.algorithm = functionName; | ||
String question = null; | ||
String context = null; | ||
|
||
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); | ||
while (parser.nextToken() != XContentParser.Token.END_OBJECT) { | ||
String fieldName = parser.currentName(); | ||
parser.nextToken(); | ||
|
||
switch (fieldName) { | ||
case QUESTION_FIELD: | ||
question = parser.text(); | ||
case CONTEXT_FIELD: | ||
context = parser.text(); | ||
default: | ||
parser.skipChildren(); | ||
break; | ||
} | ||
} | ||
if(question == null) { | ||
throw new IllegalArgumentException("Question is not provided"); | ||
} | ||
if(context == null) { | ||
throw new IllegalArgumentException("Context is not provided"); | ||
} | ||
inputDataset = new QuestionAnsweringInputDataSet(question, context); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
common/src/test/java/org/opensearch/ml/common/dataset/QuestionAnsweringInputDatasetTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.ml.common.dataset; | ||
|
||
import org.junit.Test; | ||
import org.opensearch.common.io.stream.BytesStreamOutput; | ||
import org.opensearch.core.common.bytes.BytesReference; | ||
import org.opensearch.core.common.io.stream.BytesStreamInput; | ||
import org.opensearch.core.common.io.stream.OutputStreamStreamOutput; | ||
import org.opensearch.core.common.io.stream.StreamInput; | ||
import org.opensearch.core.common.io.stream.StreamOutput; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
import static org.junit.Assert.assertThrows; | ||
|
||
public class QuestionAnsweringInputDatasetTest { | ||
|
||
@Test | ||
public void testStreaming() throws IOException { | ||
String question = "What color is apple"; | ||
String context = "I like Apples. They are red"; | ||
QuestionAnsweringInputDataSet dataset = QuestionAnsweringInputDataSet.builder().question(question).context(context).build(); | ||
BytesStreamOutput outbytes = new BytesStreamOutput(); | ||
StreamOutput osso = new OutputStreamStreamOutput(outbytes); | ||
dataset.writeTo(osso); | ||
StreamInput in = new BytesStreamInput(BytesReference.toBytes(outbytes.bytes())); | ||
QuestionAnsweringInputDataSet newDs = (QuestionAnsweringInputDataSet) MLInputDataset.fromStream(in); | ||
assert (question.equals("What color is apple")); | ||
assert (context.equals("I like Apples. They are red")); | ||
} | ||
|
||
@Test | ||
public void noContext_ThenFail() { | ||
String question = "What color is apple"; | ||
IllegalArgumentException e = assertThrows(IllegalArgumentException.class, | ||
() -> QuestionAnsweringInputDataSet.builder().question(question).build()); | ||
assert (e.getMessage().equals("Context is not provided")); | ||
} | ||
|
||
@Test | ||
public void noQuestion_ThenFail() { | ||
String context = "I like Apples. They are red"; | ||
assertThrows(IllegalArgumentException.class, | ||
() -> QuestionAnsweringInputDataSet.builder().context(context).build()); | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
common/src/test/java/org/opensearch/ml/common/input/nlp/QuestionAnsweringMLInputTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.ml.common.input.nlp; | ||
|
||
import org.junit.Before; | ||
import org.junit.Test; | ||
import org.opensearch.common.io.stream.BytesStreamOutput; | ||
import org.opensearch.common.settings.Settings; | ||
import org.opensearch.common.xcontent.XContentType; | ||
import org.opensearch.core.common.bytes.BytesReference; | ||
import org.opensearch.core.common.io.stream.BytesStreamInput; | ||
import org.opensearch.core.common.io.stream.OutputStreamStreamOutput; | ||
import org.opensearch.core.common.io.stream.StreamInput; | ||
import org.opensearch.core.common.io.stream.StreamOutput; | ||
import org.opensearch.core.xcontent.MediaTypeRegistry; | ||
import org.opensearch.core.xcontent.NamedXContentRegistry; | ||
import org.opensearch.core.xcontent.ToXContent; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.core.xcontent.XContentParser; | ||
import org.opensearch.ml.common.FunctionName; | ||
import org.opensearch.ml.common.dataset.MLInputDataset; | ||
import org.opensearch.ml.common.dataset.QuestionAnsweringInputDataSet; | ||
import org.opensearch.ml.common.input.MLInput; | ||
import org.opensearch.search.SearchModule; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import static org.junit.Assert.assertThrows; | ||
|
||
public class QuestionAnsweringMLInputTest { | ||
|
||
MLInput input; | ||
|
||
private final FunctionName algorithm = FunctionName.QUESTION_ANSWERING; | ||
|
||
@Before | ||
public void setup() { | ||
String question = "What color is apple"; | ||
String context = "I like Apples. They are red"; | ||
MLInputDataset dataset = QuestionAnsweringInputDataSet.builder().question(question).context(context).build(); | ||
input = new QuestionAnsweringMLInput(algorithm, dataset); | ||
} | ||
|
||
@Test | ||
public void testXContent_IsInternallyConsistent() throws IOException { | ||
XContentBuilder builder = MediaTypeRegistry.contentBuilder(XContentType.JSON); | ||
input.toXContent(builder, ToXContent.EMPTY_PARAMS); | ||
String jsonStr = builder.toString(); | ||
XContentParser parser = XContentType.JSON.xContent() | ||
.createParser(new NamedXContentRegistry(new SearchModule(Settings.EMPTY, | ||
Collections.emptyList()).getNamedXContents()), null, jsonStr); | ||
parser.nextToken(); | ||
|
||
MLInput parsedInput = MLInput.parse(parser, input.getFunctionName().name()); | ||
assert (parsedInput instanceof QuestionAnsweringMLInput); | ||
QuestionAnsweringMLInput parsedQAMLI = (QuestionAnsweringMLInput) parsedInput; | ||
String question = ((QuestionAnsweringInputDataSet) parsedQAMLI.getInputDataset()).getQuestion(); | ||
String context = ((QuestionAnsweringInputDataSet) parsedQAMLI.getInputDataset()).getContext(); | ||
assert (question.equals("What color is apple")); | ||
assert (context.equals("I like Apples. They are red")); | ||
} | ||
|
||
@Test | ||
public void testXContent_String() throws IOException { | ||
XContentBuilder builder = MediaTypeRegistry.contentBuilder(XContentType.JSON); | ||
input.toXContent(builder, ToXContent.EMPTY_PARAMS); | ||
String jsonStr = builder.toString(); | ||
assert (jsonStr.equals("{\"algorithm\":\"QUESTION_ANSWERING\",\"question\":\"What color is apple\",\"context\":\"I like Apples. They are red\"}")); | ||
} | ||
|
||
@Test | ||
public void testParseJson() throws IOException { | ||
String json = "{\"algorithm\":\"QUESTION_ANSWERING\",\"question\":\"What color is apple\",\"context\":\"I like Apples. They are red\"}"; | ||
XContentParser parser = XContentType.JSON.xContent() | ||
.createParser(new NamedXContentRegistry(new SearchModule(Settings.EMPTY, | ||
Collections.emptyList()).getNamedXContents()), null, json); | ||
parser.nextToken(); | ||
|
||
MLInput parsedInput = MLInput.parse(parser, input.getFunctionName().name()); | ||
assert (parsedInput instanceof QuestionAnsweringMLInput); | ||
QuestionAnsweringMLInput parsedQAMLI = (QuestionAnsweringMLInput) parsedInput; | ||
String question = ((QuestionAnsweringInputDataSet) parsedQAMLI.getInputDataset()).getQuestion(); | ||
String context = ((QuestionAnsweringInputDataSet) parsedQAMLI.getInputDataset()).getContext(); | ||
assert (question.equals("What color is apple")); | ||
assert (context.equals("I like Apples. They are red")); | ||
} | ||
|
||
@Test | ||
public void testStreaming() throws IOException { | ||
BytesStreamOutput outbytes = new BytesStreamOutput(); | ||
StreamOutput osso = new OutputStreamStreamOutput(outbytes); | ||
input.writeTo(osso); | ||
StreamInput in = new BytesStreamInput(BytesReference.toBytes(outbytes.bytes())); | ||
QuestionAnsweringMLInput newInput = new QuestionAnsweringMLInput(in); | ||
String newQuestion = ((QuestionAnsweringInputDataSet) newInput.getInputDataset()).getQuestion(); | ||
String oldQuestion = ((QuestionAnsweringInputDataSet) input.getInputDataset()).getQuestion(); | ||
assert (newQuestion.equals(oldQuestion)); | ||
} | ||
|
||
} |
Oops, something went wrong.