diff --git a/bin b/bin new file mode 160000 index 0000000..1b79cf4 --- /dev/null +++ b/bin @@ -0,0 +1 @@ +Subproject commit 1b79cf4c0a3d26d64a06bb68765f7d016dc61d5c diff --git a/pom.xml b/pom.xml index f70a52b..b26e9b3 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.mulesoft.connectors mule4-vectors-connector - 0.1.124-SNAPSHOT + 0.2.0 mule-extension MuleSoft Vectors Connector - Mule 4 MuleSoft Vectors Connector provides access to a broad number of external Vector Stores. @@ -236,12 +236,6 @@ ${langchain4jVersion} - - dev.langchain4j - langchain4j-weaviate - ${langchain4jVersion} - - dev.langchain4j langchain4j-document-transformer-jsoup @@ -254,6 +248,30 @@ ${langchain4jVersion} + + dev.langchain4j + langchain4j-qdrant + ${langchain4jVersion} + + + + io.grpc + grpc-protobuf + 1.65.1 + + + + io.grpc + grpc-netty-shaded + 1.65.1 + + + + com.google.protobuf + protobuf-java-util + 3.25.5 + + commons-io commons-io diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/constant/Constants.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/constant/Constants.java index 14007e0..036e16f 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/constant/Constants.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/constant/Constants.java @@ -37,8 +37,8 @@ private Constants() {} public static final String VECTOR_STORE_MILVUS = "MILVUS"; public static final String VECTOR_STORE_CHROMA = "CHROMA"; public static final String VECTOR_STORE_PINECONE = "PINECONE"; - public static final String VECTOR_STORE_WEAVIATE = "WEAVIATE"; public static final String VECTOR_STORE_AI_SEARCH = "AI_SEARCH"; + public static final String VECTOR_STORE_QDRANT = "QDRANT"; public static final String STORE_SCHEMA_METADATA_FIELD_NAME = "metadata"; public static final String STORE_SCHEMA_VECTOR_FIELD_NAME = "vector"; @@ -74,7 +74,6 @@ private Constants() {} public static final String JSON_KEY_TEXT = "text"; public static final String JSON_KEY_STATUS = "status"; public static final String JSON_KEY_EMBEDDING = "embedding"; - public static final String JSON_KEY_EMBEDDINGS = "embeddings"; public static final String JSON_KEY_DIMENSIONS = "dimensions"; public static final String JSON_KEY_RESPONSE = "response"; public static final String JSON_KEY_QUESTION = "question"; diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java index 98265a6..c2e250b 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/EmbeddingOperationValidator.java @@ -24,7 +24,6 @@ * Constants.VECTOR_STORE_MILVUS, * Constants.VECTOR_STORE_CHROMA, * Constants.VECTOR_STORE_PINECONE, - * Constants.VECTOR_STORE_WEAVIATE, * Constants.VECTOR_STORE_AI_SEARCH * ))); * @@ -47,11 +46,10 @@ public class EmbeddingOperationValidator { Constants.VECTOR_STORE_MILVUS, Constants.VECTOR_STORE_CHROMA, Constants.VECTOR_STORE_PINECONE, - Constants.VECTOR_STORE_WEAVIATE, - Constants.VECTOR_STORE_AI_SEARCH + Constants.VECTOR_STORE_AI_SEARCH, + Constants.VECTOR_STORE_QDRANT ))); - // Weaviate not supported for FILTER_BY_METADATA operation EMBEDDING_OPERATION_TYPE_TO_SUPPORTED_VECTOR_STORES.put(Constants.EMBEDDING_OPERATION_TYPE_FILTER_BY_METADATA, new HashSet<>(Arrays.asList( Constants.VECTOR_STORE_PGVECTOR, @@ -60,7 +58,8 @@ public class EmbeddingOperationValidator { Constants.VECTOR_STORE_MILVUS, Constants.VECTOR_STORE_CHROMA, Constants.VECTOR_STORE_PINECONE, - Constants.VECTOR_STORE_AI_SEARCH + Constants.VECTOR_STORE_AI_SEARCH, + Constants.VECTOR_STORE_QDRANT ))); EMBEDDING_OPERATION_TYPE_TO_SUPPORTED_VECTOR_STORES.put(Constants.EMBEDDING_OPERATION_TYPE_REMOVE_EMBEDDINGS, @@ -71,7 +70,6 @@ public class EmbeddingOperationValidator { Constants.VECTOR_STORE_MILVUS, Constants.VECTOR_STORE_CHROMA, // Constants.VECTOR_STORE_PINECONE, - Constants.VECTOR_STORE_WEAVIATE, Constants.VECTOR_STORE_AI_SEARCH ))); @@ -83,8 +81,8 @@ public class EmbeddingOperationValidator { Constants.VECTOR_STORE_MILVUS, Constants.VECTOR_STORE_CHROMA, // Constants.VECTOR_STORE_PINECONE, // Do not support GTE with strings. - // Constants.VECTOR_STORE_WEAVIATE, // Not Supported - Constants.VECTOR_STORE_AI_SEARCH + Constants.VECTOR_STORE_AI_SEARCH, + Constants.VECTOR_STORE_QDRANT ))); } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/provider/VectorStoreProvider.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/provider/VectorStoreProvider.java index 22c4e27..042a109 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/provider/VectorStoreProvider.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/helper/provider/VectorStoreProvider.java @@ -16,13 +16,13 @@ public Set resolve() throws ValueResolvingException { return ValueBuilder.getValuesFor( Constants.VECTOR_STORE_PGVECTOR, Constants.VECTOR_STORE_ELASTICSEARCH, - Constants.VECTOR_STORE_OPENSEARCH, + Constants.VECTOR_STORE_OPENSEARCH, Constants.VECTOR_STORE_MILVUS, Constants.VECTOR_STORE_CHROMA, Constants.VECTOR_STORE_PINECONE, - Constants.VECTOR_STORE_WEAVIATE, Constants.VECTOR_STORE_AI_SEARCH, - Constants.VECTOR_STORE_OPENSEARCH + Constants.VECTOR_STORE_OPENSEARCH, + Constants.VECTOR_STORE_QDRANT ); // MuleChainVectorsConstants.VECTOR_STORE_NEO4J } diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java index 8d842a6..44ff8c8 100644 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/BaseStore.java @@ -15,7 +15,7 @@ import org.mule.extension.mulechain.vectors.internal.store.opensearch.OpenSearchStore; import org.mule.extension.mulechain.vectors.internal.store.pgvector.PGVectorStore; import org.mule.extension.mulechain.vectors.internal.store.pinecone.PineconeStore; -import org.mule.extension.mulechain.vectors.internal.store.weviate.WeaviateStore; +import org.mule.extension.mulechain.vectors.internal.store.qdrant.QdrantStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -274,11 +274,6 @@ public BaseStore build() { baseStore = new AISearchStore(storeName, configuration, queryParams, dimension); break; - case Constants.VECTOR_STORE_WEAVIATE: - - baseStore = new WeaviateStore(storeName, configuration, queryParams, dimension); - break; - case Constants.VECTOR_STORE_CHROMA: baseStore = new ChromaStore(storeName, configuration, queryParams, dimension); @@ -299,6 +294,11 @@ public BaseStore build() { baseStore = new OpenSearchStore(storeName, configuration, queryParams, dimension); break; + case Constants.VECTOR_STORE_QDRANT: + + baseStore = new QdrantStore(storeName, configuration, queryParams, dimension); + break; + default: //throw new IllegalOperationException("Unsupported Vector Store: " + configuration.getVectorStore()); baseStore = null; diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/qdrant/QdrantStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/qdrant/QdrantStore.java new file mode 100644 index 0000000..65472ae --- /dev/null +++ b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/qdrant/QdrantStore.java @@ -0,0 +1,155 @@ +package org.mule.extension.mulechain.vectors.internal.store.qdrant; + +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import com.google.protobuf.util.JsonFormat; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore; +import io.qdrant.client.QdrantClient; +import io.qdrant.client.QdrantGrpcClient; +import io.qdrant.client.grpc.Collections; +import io.qdrant.client.grpc.JsonWithInt; +import io.qdrant.client.grpc.Points; +import org.json.JSONObject; +import org.mule.extension.mulechain.vectors.internal.config.Configuration; +import org.mule.extension.mulechain.vectors.internal.constant.Constants; +import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; +import org.mule.extension.mulechain.vectors.internal.store.BaseStore; +import org.mule.extension.mulechain.vectors.internal.util.JsonUtils; + +import java.util.*; +import java.util.concurrent.ExecutionException; + +public class QdrantStore extends BaseStore { + + private final QdrantClient client; + private final String payloadTextKey; + + public QdrantStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { + + super(storeName, configuration, queryParams, dimension); + + JSONObject config = JsonUtils.readConfigFile(configuration.getConfigFilePath()); + JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_QDRANT); + String host = vectorStoreConfig.getString("QDRANT_HOST"); + String apiKey = vectorStoreConfig.getString("QDRANT_API_KEY"); + int port = vectorStoreConfig.getInt("QDRANT_GRPC_PORT"); + boolean useTls = vectorStoreConfig.getBoolean("QDRANT_USE_TLS"); + this.client = new QdrantClient(QdrantGrpcClient.newBuilder(host, port, useTls).withApiKey(apiKey).build()); + this.payloadTextKey = vectorStoreConfig.getString("QDRANT_TEXT_KEY"); + + try { + if (!this.client.collectionExistsAsync(this.storeName).get() && dimension > 0) { + this.client.createCollectionAsync(storeName, + Collections.VectorParams.newBuilder().setDistance(Collections.Distance.Cosine) + .setSize(dimension).build()) + .get(); + } + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + public EmbeddingStore buildEmbeddingStore() { + + return QdrantEmbeddingStore.builder() + .client(client) + .payloadTextKey(payloadTextKey) + .collectionName(storeName) + .build(); + } + + @Override + public JSONObject listSources() { + try { + // Optional max limit of 100k points. + int MAX_POINTS = 10000; + + HashMap sourceObjectMap = new HashMap(); + JSONObject jsonObject = new JSONObject(); + + boolean keepScrolling = true; + Points.PointId nextOffset = null; + List points = new ArrayList<>(MAX_POINTS); + while (keepScrolling && points.size() < MAX_POINTS) { + Points.ScrollPoints.Builder request = Points.ScrollPoints.newBuilder() + .setCollectionName(storeName) + .setLimit(Math.min(queryParams.embeddingPageSize(), MAX_POINTS - points.size())); + if (nextOffset != null) { + request.setOffset(nextOffset); + } + + Points.ScrollResponse response = client.scrollAsync(request.build()).get(); + + points.addAll(response.getResultList()); + nextOffset = response.getNextPageOffset(); + keepScrolling = nextOffset.hasNum() || nextOffset.hasUuid(); + } + + for (Points.RetrievedPoint point : points) { + JSONObject metadataObject = new JSONObject(JsonFactory.toJson(point.getPayloadMap())); + JSONObject sourceObject = getSourceObject(metadataObject); + addOrUpdateSourceObjectIntoSourceObjectMap(sourceObjectMap, sourceObject); + } + + jsonObject.put(Constants.JSON_KEY_SOURCES, + JsonUtils.jsonObjectCollectionToJsonArray(sourceObjectMap.values())); + jsonObject.put(Constants.JSON_KEY_SOURCE_COUNT, sourceObjectMap.size()); + + return jsonObject; + } catch (ExecutionException | InterruptedException | InvalidProtocolBufferException e) { + throw new RuntimeException(e); + } + } +} + +final class JsonFactory { + public static String toJson(Map map) + throws InvalidProtocolBufferException { + + Struct.Builder structBuilder = Struct.newBuilder(); + map.forEach((key, value) -> structBuilder.putFields(key, toProtobufValue(value))); + return JsonFormat.printer().print(structBuilder.build()); + } + + private static Value toProtobufValue(io.qdrant.client.grpc.JsonWithInt.Value value) { + switch (value.getKindCase()) { + case NULL_VALUE: + return Value.newBuilder().setNullValueValue(0).build(); + + case BOOL_VALUE: + return Value.newBuilder().setBoolValue(value.getBoolValue()).build(); + + case STRING_VALUE: + return Value.newBuilder().setStringValue(value.getStringValue()).build(); + + case INTEGER_VALUE: + return Value.newBuilder().setNumberValue(value.getIntegerValue()).build(); + + case DOUBLE_VALUE: + return Value.newBuilder().setNumberValue(value.getDoubleValue()).build(); + + case STRUCT_VALUE: + Struct.Builder structBuilder = Struct.newBuilder(); + value.getStructValue() + .getFieldsMap() + .forEach( + (key, val) -> { + structBuilder.putFields(key, toProtobufValue(val)); + }); + return Value.newBuilder().setStructValue(structBuilder).build(); + + case LIST_VALUE: + Value.Builder listBuilder = Value.newBuilder(); + value.getListValue().getValuesList().stream() + .map(JsonFactory::toProtobufValue) + .forEach(listBuilder.getListValueBuilder()::addValues); + return listBuilder.build(); + + default: + throw new IllegalArgumentException("Unsupported payload value type: " + value.getKindCase()); + } + } +} diff --git a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java b/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java deleted file mode 100644 index 5738078..0000000 --- a/src/main/java/org/mule/extension/mulechain/vectors/internal/store/weviate/WeaviateStore.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.mule.extension.mulechain.vectors.internal.store.weviate; - -import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; -import org.json.JSONObject; -import org.mule.extension.mulechain.vectors.internal.config.Configuration; -import org.mule.extension.mulechain.vectors.internal.constant.Constants; -import org.mule.extension.mulechain.vectors.internal.helper.parameter.QueryParameters; -import org.mule.extension.mulechain.vectors.internal.store.BaseStore; - -import static org.mule.extension.mulechain.vectors.internal.util.JsonUtils.readConfigFile; - -public class WeaviateStore extends BaseStore { - - private String host; - private String protocol; - private String apiKey; - - public WeaviateStore(String storeName, Configuration configuration, QueryParameters queryParams, int dimension) { - - super(storeName, configuration, queryParams, dimension); - - JSONObject config = readConfigFile(configuration.getConfigFilePath()); - JSONObject vectorStoreConfig = config.getJSONObject(Constants.VECTOR_STORE_WEAVIATE); - this.host = vectorStoreConfig.getString("WEAVIATE_HOST"); - this.protocol = vectorStoreConfig.getString("WEAVIATE_PROTOCOL"); - this.apiKey = vectorStoreConfig.getString("WEAVIATE_APIKEY"); - } - - public EmbeddingStore buildEmbeddingStore() { - - return WeaviateEmbeddingStore.builder() - .scheme(protocol) - .host(host) - // "Default" class is used if not specified. Must start from an uppercase letter! - .objectClass(storeName) - // If true (default), then WeaviateEmbeddingStore will generate a hashed ID based on provided - // text segment, which avoids duplicated entries in DB. If false, then random ID will be generated. - .avoidDups(true) - // Consistency level: ONE, QUORUM (default) or ALL. - .consistencyLevel("ALL") - .apiKey(apiKey) - .build(); - } -}