diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java index 52962c5bb..4ac63d419 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java @@ -26,9 +26,11 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; -// The abstract class for text processing use cases. Users provide a field name map -// and a model id. During ingestion, the processor will use the corresponding model -// to inference the input texts, and set the target fields according to the field name map. +/** + * The abstract class for text processing use cases. Users provide a field name map and a model id. + * During ingestion, the processor will use the corresponding model to inference the input texts, + * and set the target fields according to the field name map. + */ @Log4j2 public abstract class NLPProcessor extends AbstractProcessor { diff --git a/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java index 217d551c4..62857541e 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java @@ -17,6 +17,10 @@ import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor; import org.opensearch.neuralsearch.util.TokenWeightUtil; +/** + * This processor is used for user input data text sparse encoding processing, model_id can be used to indicate which model user use, + * and field_map can be used to indicate which fields needs text embedding and the corresponding keys for the sparse encoding results. + */ @Log4j2 public class SparseEncodingProcessor extends NLPProcessor { diff --git a/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java index 2f0647d68..430e6a1f6 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java @@ -42,6 +42,12 @@ import com.google.common.annotations.VisibleForTesting; +/** + * SparseEncodingQueryBuilder is responsible for handling "sparse_encoding" query types. It uses an ML SPARSE_ENCODING model + * or SPARSE_TOKENIZE model to produce a Map with String keys and Float values for input text. Then it will be transformed + * to Lucene FeatureQuery wrapped by Lucene BooleanQuery. + */ + @Log4j2 @Getter @Setter @@ -66,6 +72,12 @@ public static void initialize(MLCommonsClientAccessor mlClient) { private String modelId; private Supplier> queryTokensSupplier; + /** + * Constructor from stream input + * + * @param in StreamInput to initialize object from + * @throws IOException thrown if unable to read from input stream + */ public SparseEncodingQueryBuilder(StreamInput in) throws IOException { super(in); this.fieldName = in.readString(); @@ -98,6 +110,9 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws * "model_id": "string" * } * + * @param parser XContentParser + * @return NeuralQueryBuilder + * @throws IOException can be thrown by parser */ public static SparseEncodingQueryBuilder fromXContent(XContentParser parser) throws IOException { SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder(); diff --git a/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java b/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java index 2b9613be3..db249de0f 100644 --- a/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java +++ b/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java @@ -11,6 +11,12 @@ import java.util.Map; import java.util.stream.Collectors; +/** + * Utility class for working with sparse_encoding queries and ingest processor. + * Used to fetch the (token, weight) Map from the response returned by {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor} + * + */ + public class TokenWeightUtil { public static String RESPONSE_KEY = "response"; @@ -36,6 +42,8 @@ public class TokenWeightUtil { * { TOKEN_WEIGHT_MAP} * ] * }] + * + * @param mapResultList {@link Map} which is the response from {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor} */ public static List> fetchListOfTokenWeightMap(List> mapResultList) { List results = new ArrayList<>();