support step size for embedding model which outputs less embeddings (o…

…pensearch-project#1586) * support step size for embedding model which outputs less embeddings Signed-off-by: Yaliang Wu <[email protected]> * tune parameter name Signed-off-by: Yaliang Wu <[email protected]> * fine tune processed doc to always respect step size Signed-off-by: Yaliang Wu <[email protected]> --------- Signed-off-by: Yaliang Wu <[email protected]>
austintlee · Feb 29, 2024 · 82a2845 · 82a2845
1 parent 0778afc
commit 82a2845
Show file tree

Hide file tree

Showing 2 changed files with 70 additions and 24 deletions.
diff --git a/...hms/src/main/java/org/opensearch/ml/engine/algorithms/remote/RemoteConnectorExecutor.java b/...hms/src/main/java/org/opensearch/ml/engine/algorithms/remote/RemoteConnectorExecutor.java
@@ -5,13 +5,6 @@
 
 package org.opensearch.ml.engine.algorithms.remote;
 
-import static org.opensearch.ml.engine.algorithms.remote.ConnectorUtils.processInput;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 import org.opensearch.client.Client;
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
@@ -25,6 +18,13 @@
 import org.opensearch.ml.common.output.model.ModelTensors;
 import org.opensearch.script.ScriptService;
 
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.opensearch.ml.engine.algorithms.remote.ConnectorUtils.processInput;
+
 public interface RemoteConnectorExecutor {
 
     default ModelTensorOutput executePredict(MLInput mlInput) {
@@ -48,7 +48,14 @@ default ModelTensorOutput executePredict(MLInput mlInput) {
                 if (tempTensorOutputs.size() > 0 && tempTensorOutputs.get(0).getMlModelTensors() != null) {
                     tensorCount = tempTensorOutputs.get(0).getMlModelTensors().size();
                 }
-                processedDocs += Math.max(tensorCount, 1);
+                // This is to support some model which takes N text docs and embedding size is less than N-1.
+                // We need to tell executor what's the step size for each model run.
+                Map<String, String> parameters = getConnector().getParameters();
+                if (parameters != null && parameters.containsKey("input_docs_processed_step_size")) {
+                    processedDocs += Integer.parseInt(parameters.get("input_docs_processed_step_size"));
+                } else {
+                    processedDocs += Math.max(tensorCount, 1);
+                }
                 tensorOutputs.addAll(tempTensorOutputs);
             }
         } else {

diff --git a/...c/test/java/org/opensearch/ml/engine/algorithms/remote/HttpJsonConnectorExecutorTest.java b/...c/test/java/org/opensearch/ml/engine/algorithms/remote/HttpJsonConnectorExecutorTest.java
@@ -6,6 +6,8 @@
 package org.opensearch.ml.engine.algorithms.remote;
 
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
@@ -42,6 +44,7 @@
 import org.opensearch.script.ScriptService;
 
 import com.google.common.collect.ImmutableMap;
+import java.util.Map;
 
 public class HttpJsonConnectorExecutorTest {
     @Rule
@@ -192,22 +195,15 @@ public void executePredict_TextDocsInput() throws IOException {
             .then(invocation -> new TestTemplateService.MockTemplateScript.Factory(preprocessResult1))
             .then(invocation -> new TestTemplateService.MockTemplateScript.Factory(preprocessResult2));
 
-        ConnectorAction predictAction = ConnectorAction
-            .builder()
-            .actionType(ConnectorAction.ActionType.PREDICT)
-            .method("POST")
-            .url("http://test.com/mock")
-            .preProcessFunction(MLPreProcessFunction.TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT)
-            .postProcessFunction(MLPostProcessFunction.OPENAI_EMBEDDING)
-            .requestBody("{\"input\": ${parameters.input}}")
-            .build();
-        Connector connector = HttpConnector
-            .builder()
-            .name("test connector")
-            .version("1")
-            .protocol("http")
-            .actions(Arrays.asList(predictAction))
-            .build();
+        ConnectorAction predictAction = ConnectorAction.builder()
+                .actionType(ConnectorAction.ActionType.PREDICT)
+                .method("POST")
+                .url("http://test.com/mock")
+                .preProcessFunction(MLPreProcessFunction.TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT)
+                .postProcessFunction(MLPostProcessFunction.OPENAI_EMBEDDING)
+                .requestBody("{\"input\": ${parameters.input}}")
+                .build();
+        HttpConnector connector = HttpConnector.builder().name("test connector").version("1").protocol("http").actions(Arrays.asList(predictAction)).build();
         HttpJsonConnectorExecutor executor = spy(new HttpJsonConnectorExecutor(connector));
         executor.setScriptService(scriptService);
         when(httpClient.execute(any())).thenReturn(response);
@@ -244,6 +240,7 @@ public void executePredict_TextDocsInput() throws IOException {
         HttpEntity entity = new StringEntity(modelResponse);
         when(response.getEntity()).thenReturn(entity);
         when(executor.getHttpClient()).thenReturn(httpClient);
+        when(executor.getConnector()).thenReturn(connector);
         MLInputDataset inputDataSet = TextDocsInputDataSet.builder().docs(Arrays.asList("test doc1", "test doc2")).build();
         ModelTensorOutput modelTensorOutput = executor
             .executePredict(MLInput.builder().algorithm(FunctionName.REMOTE).inputDataset(inputDataSet).build());
@@ -261,4 +258,46 @@ public void executePredict_TextDocsInput() throws IOException {
                 modelTensorOutput.getMlModelOutputs().get(0).getMlModelTensors().get(1).getData()
             );
     }
+
+    @Test
+    public void executePredict_TextDocsInput_LessEmbeddingThanInputDocs() throws IOException {
+        String preprocessResult1 = "{\"parameters\": { \"input\": \"test doc1\" } }";
+        String preprocessResult2 = "{\"parameters\": { \"input\": \"test doc2\" } }";
+        when(scriptService.compile(any(), any()))
+                .then(invocation -> new TestTemplateService.MockTemplateScript.Factory(preprocessResult1))
+                .then(invocation -> new TestTemplateService.MockTemplateScript.Factory(preprocessResult2));
+
+        ConnectorAction predictAction = ConnectorAction.builder()
+                .actionType(ConnectorAction.ActionType.PREDICT)
+                .method("POST")
+                .url("http://test.com/mock")
+                .preProcessFunction(MLPreProcessFunction.TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT)
+                .postProcessFunction(MLPostProcessFunction.OPENAI_EMBEDDING)
+                .requestBody("{\"input\": ${parameters.input}}")
+                .build();
+        Map<String, String> parameters = ImmutableMap.of("input_docs_processed_step_size", "2");
+        HttpConnector connector = HttpConnector.builder().name("test connector").version("1").protocol("http").parameters(parameters).actions(Arrays.asList(predictAction)).build();
+        HttpJsonConnectorExecutor executor = spy(new HttpJsonConnectorExecutor(connector));
+        executor.setScriptService(scriptService);
+        when(httpClient.execute(any())).thenReturn(response);
+        // model takes 2 input docs, but only output 1 embedding
+        String modelResponse = "{\n" + "    \"object\": \"list\",\n" + "    \"data\": [\n" + "        {\n"
+                + "            \"object\": \"embedding\",\n" + "            \"index\": 0,\n" + "            \"embedding\": [\n"
+                + "                -0.014555434,\n" + "                -0.002135904,\n" + "                0.0035105038\n" + "            ]\n"
+                + "        }    ],\n"
+                + "    \"model\": \"text-embedding-ada-002-v2\",\n" + "    \"usage\": {\n" + "        \"prompt_tokens\": 5,\n"
+                + "        \"total_tokens\": 5\n" + "    }\n" + "}";
+        StatusLine statusLine = new BasicStatusLine(new ProtocolVersion("HTTP", 1, 1), 200, "OK");
+        when(response.getStatusLine()).thenReturn(statusLine);
+        HttpEntity entity = new StringEntity(modelResponse);
+        when(response.getEntity()).thenReturn(entity);
+        when(executor.getHttpClient()).thenReturn(httpClient);
+        when(executor.getConnector()).thenReturn(connector);
+        MLInputDataset inputDataSet = TextDocsInputDataSet.builder().docs(Arrays.asList("test doc1", "test doc2")).build();
+        ModelTensorOutput modelTensorOutput = executor.executePredict(MLInput.builder().algorithm(FunctionName.REMOTE).inputDataset(inputDataSet).build());
+        Assert.assertEquals(1, modelTensorOutput.getMlModelOutputs().size());
+        Assert.assertEquals(1, modelTensorOutput.getMlModelOutputs().get(0).getMlModelTensors().size());
+        Assert.assertEquals("sentence_embedding", modelTensorOutput.getMlModelOutputs().get(0).getMlModelTensors().get(0).getName());
+        Assert.assertArrayEquals(new Number[] {-0.014555434, -0.002135904, 0.0035105038}, modelTensorOutput.getMlModelOutputs().get(0).getMlModelTensors().get(0).getData());
+    }
 }