diff --git a/docs/api_refs/typedoc.json b/docs/api_refs/typedoc.json
index fa0fda258ea8..d92ae9104b00 100644
--- a/docs/api_refs/typedoc.json
+++ b/docs/api_refs/typedoc.json
@@ -230,6 +230,7 @@
"./langchain/src/retrievers/self_query/pinecone.ts",
"./langchain/src/retrievers/self_query/supabase.ts",
"./langchain/src/retrievers/self_query/weaviate.ts",
+ "./langchain/src/retrievers/self_query/vectara.ts",
"./langchain/src/retrievers/vespa.ts",
"./langchain/src/cache/index.ts",
"./langchain/src/cache/cloudflare_kv.ts",
diff --git a/docs/core_docs/docs/modules/data_connection/retrievers/how_to/self_query/vectara-self-query.mdx b/docs/core_docs/docs/modules/data_connection/retrievers/how_to/self_query/vectara-self-query.mdx
new file mode 100644
index 000000000000..4afdef301f6b
--- /dev/null
+++ b/docs/core_docs/docs/modules/data_connection/retrievers/how_to/self_query/vectara-self-query.mdx
@@ -0,0 +1,39 @@
+# Vectara Self Query Retriever
+
+This example shows how to use a self query retriever with a [Vectara](https://vectara.com/) vector store.
+
+If you haven't already set up Vectara, please [follow the instructions here](/docs/integrations/vectorstores/vectara.mdx).
+
+## Usage
+
+This example shows how to intialize a `SelfQueryRetriever` with a vector store:
+
+import CodeBlock from "@theme/CodeBlock";
+import Example from "@examples/retrievers/vectara_self_query.ts";
+
+{Example}
+
+You can also initialize the retriever with default search parameters that apply in
+addition to the generated query:
+
+```typescript
+const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
+ llm,
+ vectorStore,
+ documentContents,
+ attributeInfo,
+ /**
+ * We need to use a translator that translates the queries into a
+ * filter format that the vector store can understand. LangChain provides one here.
+ */
+ structuredQueryTranslator: new VectaraTranslator()(),
+ searchParams: {
+ filter: {
+ filter: "( doc.genre = 'science fiction' ) and ( doc.rating > 8.5 )",
+ },
+ mergeFiltersOperator: "and",
+ },
+});
+```
+
+See the [official docs](https://docs.vectara.com/) for more on how to construct metadata filters.
diff --git a/examples/src/retrievers/vectara_self_query.ts b/examples/src/retrievers/vectara_self_query.ts
new file mode 100644
index 000000000000..53e3bd6ec760
--- /dev/null
+++ b/examples/src/retrievers/vectara_self_query.ts
@@ -0,0 +1,137 @@
+import { AttributeInfo } from "langchain/schema/query_constructor";
+import { Document } from "langchain/document";
+import { SelfQueryRetriever } from "langchain/retrievers/self_query";
+
+import { OpenAI } from "langchain/llms/openai";
+import { VectaraStore } from "langchain/vectorstores/vectara";
+import { VectaraTranslator } from "langchain/retrievers/self_query/vectara";
+import { FakeEmbeddings } from "langchain/embeddings/fake";
+/**
+ * First, we create a bunch of documents. You can load your own documents here instead.
+ * Each document has a pageContent and a metadata field. Make sure your metadata matches the AttributeInfo below.
+ */
+const docs = [
+ new Document({
+ pageContent:
+ "A bunch of scientists bring back dinosaurs and mayhem breaks loose",
+ metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
+ }),
+ new Document({
+ pageContent:
+ "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
+ metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
+ }),
+ new Document({
+ pageContent:
+ "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
+ metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
+ }),
+ new Document({
+ pageContent:
+ "A bunch of normal-sized women are supremely wholesome and some men pine after them",
+ metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
+ }),
+ new Document({
+ pageContent: "Toys come alive and have a blast doing so",
+ metadata: { year: 1995, genre: "animated" },
+ }),
+ new Document({
+ pageContent: "Three men walk into the Zone, three men walk out of the Zone",
+ metadata: {
+ year: 1979,
+ rating: 9.9,
+ director: "Andrei Tarkovsky",
+ genre: "science fiction",
+ },
+ }),
+];
+
+/**
+ * Next, we define the attributes we want to be able to query on.
+ * in this case, we want to be able to query on the genre, year, director, rating, and length of the movie.
+ * We also provide a description of each attribute and the type of the attribute.
+ * This is used to generate the query prompts.
+ *
+ * We need to setup the filters in the vectara as well otherwise filter won't work.
+ * To setup the filter in vectara, go to Data -> {your_created_corpus} -> overview
+ * In the overview section edit the filters section and all the following attributes in
+ * the filters.
+ */
+const attributeInfo: AttributeInfo[] = [
+ {
+ name: "genre",
+ description: "The genre of the movie",
+ type: "string or array of strings",
+ },
+ {
+ name: "year",
+ description: "The year the movie was released",
+ type: "number",
+ },
+ {
+ name: "director",
+ description: "The director of the movie",
+ type: "string",
+ },
+ {
+ name: "rating",
+ description: "The rating of the movie (1-10)",
+ type: "number",
+ },
+];
+
+/**
+ * Next, we instantiate a vector store. This is where we store the embeddings of the documents.
+ * We also need to provide an embeddings object. This is used to embed the documents.
+ */
+
+const config = {
+ customerId: Number(process.env.VECTARA_CUSTOMER_ID),
+ corpusId: Number(process.env.VECTARA_CORPUS_ID),
+ apiKey: String(process.env.VECTARA_API_KEY),
+ verbose: true,
+};
+
+const vectorStore = await VectaraStore.fromDocuments(
+ docs,
+ new FakeEmbeddings(),
+ config
+);
+
+const llm = new OpenAI();
+const documentContents = "Brief summary of a movie";
+
+const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
+ llm,
+ vectorStore,
+ documentContents,
+ attributeInfo,
+ /**
+ * We need to create a basic translator that translates the queries into a
+ * filter format that the vector store can understand. We provide a basic translator
+ * here, but you can create your own translator by extending BaseTranslator
+ * abstract class. Note that the vector store needs to support filtering on the metadata
+ * attributes you want to query on.
+ */
+ structuredQueryTranslator: new VectaraTranslator(),
+});
+
+/**
+ * Now we can query the vector store.
+ * We can ask questions like "Which movies are less than 90 minutes?" or "Which movies are rated higher than 8.5?".
+ * We can also ask questions like "Which movies are either comedy or drama and are less than 90 minutes?".
+ * The retriever will automatically convert these questions into queries that can be used to retrieve documents.
+ */
+const query1 = await selfQueryRetriever.getRelevantDocuments(
+ "What are some movies about dinosaurs"
+);
+const query2 = await selfQueryRetriever.getRelevantDocuments(
+ "I want to watch a movie rated higher than 8.5"
+);
+const query3 = await selfQueryRetriever.getRelevantDocuments(
+ "Which movies are directed by Greta Gerwig?"
+);
+const query4 = await selfQueryRetriever.getRelevantDocuments(
+ "Which movies are either comedy or science fiction and are rated higher than 8.5?"
+);
+console.log(query1, query2, query3, query4);
diff --git a/langchain/.gitignore b/langchain/.gitignore
index fc2be83f13e7..6b112fee5412 100644
--- a/langchain/.gitignore
+++ b/langchain/.gitignore
@@ -634,6 +634,9 @@ retrievers/self_query/supabase.d.ts
retrievers/self_query/weaviate.cjs
retrievers/self_query/weaviate.js
retrievers/self_query/weaviate.d.ts
+retrievers/self_query/vectara.cjs
+retrievers/self_query/vectara.js
+retrievers/self_query/vectara.d.ts
retrievers/vespa.cjs
retrievers/vespa.js
retrievers/vespa.d.ts
diff --git a/langchain/package.json b/langchain/package.json
index f6aaedad24b1..cdecf18d9256 100644
--- a/langchain/package.json
+++ b/langchain/package.json
@@ -646,6 +646,9 @@
"retrievers/self_query/weaviate.cjs",
"retrievers/self_query/weaviate.js",
"retrievers/self_query/weaviate.d.ts",
+ "retrievers/self_query/vectara.cjs",
+ "retrievers/self_query/vectara.js",
+ "retrievers/self_query/vectara.d.ts",
"retrievers/vespa.cjs",
"retrievers/vespa.js",
"retrievers/vespa.d.ts",
@@ -2468,6 +2471,11 @@
"import": "./retrievers/self_query/weaviate.js",
"require": "./retrievers/self_query/weaviate.cjs"
},
+ "./retrievers/self_query/vectara": {
+ "types": "./retrievers/self_query/vectara.d.ts",
+ "import": "./retrievers/self_query/vectara.js",
+ "require": "./retrievers/self_query/vectara.cjs"
+ },
"./retrievers/vespa": {
"types": "./retrievers/vespa.d.ts",
"import": "./retrievers/vespa.js",
diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js
index 98200151910d..d29b2151219f 100644
--- a/langchain/scripts/create-entrypoints.js
+++ b/langchain/scripts/create-entrypoints.js
@@ -249,6 +249,7 @@ const entrypoints = {
"retrievers/self_query/pinecone": "retrievers/self_query/pinecone",
"retrievers/self_query/supabase": "retrievers/self_query/supabase",
"retrievers/self_query/weaviate": "retrievers/self_query/weaviate",
+ "retrievers/self_query/vectara": "retrievers/self_query/vectara",
"retrievers/vespa": "retrievers/vespa",
// cache
cache: "cache/index",
@@ -459,6 +460,7 @@ const requiresOptionalDependency = [
"retrievers/self_query/pinecone",
"retrievers/self_query/supabase",
"retrievers/self_query/weaviate",
+ "retrievers/self_query/vectara",
"output_parsers/expression",
"chains/query_constructor",
"chains/query_constructor/ir",
diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts
index a214f4fed76c..c98adf9c6451 100644
--- a/langchain/src/load/import_constants.ts
+++ b/langchain/src/load/import_constants.ts
@@ -131,6 +131,7 @@ export const optionalImportEntrypoints = [
"langchain/retrievers/self_query/pinecone",
"langchain/retrievers/self_query/supabase",
"langchain/retrievers/self_query/weaviate",
+ "langchain/retrievers/self_query/vectara",
"langchain/cache/cloudflare_kv",
"langchain/cache/momento",
"langchain/cache/redis",
diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts
index 248bc5f9eafb..ff96bdc124d3 100644
--- a/langchain/src/load/import_type.d.ts
+++ b/langchain/src/load/import_type.d.ts
@@ -391,6 +391,9 @@ export interface OptionalImportMap {
"langchain/retrievers/self_query/weaviate"?:
| typeof import("../retrievers/self_query/weaviate.js")
| Promise;
+ "langchain/retrievers/self_query/vectara"?:
+ | typeof import("../retrievers/self_query/vectara.js")
+ | Promise;
"langchain/cache/cloudflare_kv"?:
| typeof import("../cache/cloudflare_kv.js")
| Promise;
diff --git a/langchain/src/retrievers/self_query/tests/vectara_self_query.int.test.ts b/langchain/src/retrievers/self_query/tests/vectara_self_query.int.test.ts
new file mode 100644
index 000000000000..22a20532237d
--- /dev/null
+++ b/langchain/src/retrievers/self_query/tests/vectara_self_query.int.test.ts
@@ -0,0 +1,113 @@
+/* eslint-disable no-process-env */
+import { test } from "@jest/globals";
+import { Document } from "../../../document.js";
+import { AttributeInfo } from "../../../schema/query_constructor.js";
+import { SelfQueryRetriever } from "../index.js";
+import { OpenAI } from "../../../llms/openai.js";
+import { VectaraTranslator } from "../vectara.js";
+import { FakeEmbeddings } from "../../../embeddings/fake.js";
+import { VectaraStore } from "../../../vectorstores/vectara.js";
+
+test.skip("Vectara Self Query Retriever Test", async () => {
+ const docs = [
+ new Document({
+ pageContent:
+ "A bunch of scientists bring back dinosaurs and mayhem breaks loose",
+ metadata: { year: 1993, rating: 7.7, genre: "science fiction" },
+ }),
+ new Document({
+ pageContent:
+ "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
+ metadata: { year: 2010, director: "Christopher Nolan", rating: 8.2 },
+ }),
+ new Document({
+ pageContent:
+ "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
+ metadata: { year: 2006, director: "Satoshi Kon", rating: 8.6 },
+ }),
+ new Document({
+ pageContent:
+ "A bunch of normal-sized women are supremely wholesome and some men pine after them",
+ metadata: { year: 2019, director: "Greta Gerwig", rating: 8.3 },
+ }),
+ new Document({
+ pageContent: "Toys come alive and have a blast doing so",
+ metadata: { year: 1995, genre: "animated" },
+ }),
+ new Document({
+ pageContent:
+ "Three men walk into the Zone, three men walk out of the Zone",
+ metadata: {
+ year: 1979,
+ rating: 9.9,
+ director: "Andrei Tarkovsky",
+ genre: "science fiction",
+ },
+ }),
+ ];
+
+ const attributeInfo: AttributeInfo[] = [
+ {
+ name: "genre",
+ description: "The genre of the movie",
+ type: "string or array of strings",
+ },
+ {
+ name: "year",
+ description: "The year the movie was released",
+ type: "number",
+ },
+ {
+ name: "director",
+ description: "The director of the movie",
+ type: "string",
+ },
+ {
+ name: "rating",
+ description: "The rating of the movie (1-10)",
+ type: "number",
+ },
+ ];
+ const config = {
+ customerId: Number(process.env.VECTARA_CUSTOMER_ID),
+ corpusId: Number(process.env.VECTARA_CORPUS_ID),
+ apiKey: String(process.env.VECTARA_API_KEY),
+ verbose: true,
+ };
+
+ const vectorStore = await VectaraStore.fromDocuments(
+ docs,
+ new FakeEmbeddings(),
+ config
+ );
+
+ const llm = new OpenAI();
+ const documentContents = "Brief summary of a movie";
+
+ const selfQueryRetriever = await SelfQueryRetriever.fromLLM({
+ llm,
+ vectorStore,
+ documentContents,
+ attributeInfo,
+
+ structuredQueryTranslator: new VectaraTranslator(),
+ });
+
+ const query1 = await selfQueryRetriever.getRelevantDocuments(
+ "I want to watch a movie rated higher than 8.5"
+ );
+ const query2 = await selfQueryRetriever.getRelevantDocuments(
+ "Which movies are directed by Greta Gerwig?"
+ );
+ const query3 = await selfQueryRetriever.getRelevantDocuments(
+ "Which movies are either comedy or science fiction and are rated higher than 8.5?"
+ );
+ const query4 = await selfQueryRetriever.getRelevantDocuments(
+ "Wau wau wau wau hello gello hello?"
+ );
+ console.log(query1, query2, query3, query4);
+ expect(query1.length).toBe(2);
+ expect(query2.length).toBe(1);
+ expect(query3.length).toBe(1);
+ expect(query4.length).toBe(0);
+});
diff --git a/langchain/src/retrievers/self_query/vectara.ts b/langchain/src/retrievers/self_query/vectara.ts
new file mode 100644
index 000000000000..8dec60c8a17a
--- /dev/null
+++ b/langchain/src/retrievers/self_query/vectara.ts
@@ -0,0 +1,181 @@
+import {
+ Comparator,
+ Comparators,
+ Comparison,
+ NOT,
+ Operation,
+ Operator,
+ Operators,
+ StructuredQuery,
+ Visitor,
+} from "../../chains/query_constructor/ir.js";
+import { VectaraFilter, VectaraStore } from "../../vectorstores/vectara.js";
+import { BaseTranslator } from "./base.js";
+import { isFilterEmpty } from "./utils.js";
+
+type AllowedOperator = Exclude;
+
+export type VectaraVisitorResult =
+ | VectaraOperationResult
+ | VectaraComparisonResult
+ | VectaraVisitorStructuredQueryResult;
+// eslint-disable-next-line @typescript-eslint/ban-types
+export type VectaraOperationResult = String;
+// eslint-disable-next-line @typescript-eslint/ban-types
+export type VectaraComparisonResult = String;
+export type VectaraVisitorStructuredQueryResult = {
+ filter?: { filter?: VectaraOperationResult | VectaraComparisonResult };
+};
+
+type Value = number | string;
+function processValue(value: Value): string {
+ /** Convert a value to a string and add single quotes if it is a string. */
+ if (typeof value === "string") {
+ return `'${value}'`;
+ } else {
+ return String(value);
+ }
+}
+
+export class VectaraTranslator<
+ T extends VectaraStore
+> extends BaseTranslator {
+ declare VisitOperationOutput: VectaraOperationResult;
+
+ declare VisitComparisonOutput: VectaraComparisonResult;
+
+ allowedOperators: Operator[] = [Operators.and, Operators.or];
+
+ allowedComparators: Comparator[] = [
+ Comparators.eq,
+ Comparators.ne,
+ Comparators.lt,
+ Comparators.lte,
+ Comparators.gt,
+ Comparators.gte,
+ ];
+
+ formatFunction(func: Operator | Comparator): string {
+ if (func in Comparators) {
+ if (
+ this.allowedComparators.length > 0 &&
+ this.allowedComparators.indexOf(func as Comparator) === -1
+ ) {
+ throw new Error(
+ `Comparator ${func} not allowed. Allowed operators: ${this.allowedComparators.join(
+ ", "
+ )}`
+ );
+ }
+ } else if (func in Operators) {
+ if (
+ this.allowedOperators.length > 0 &&
+ this.allowedOperators.indexOf(func as Operator) === -1
+ ) {
+ throw new Error(
+ `Operator ${func} not allowed. Allowed operators: ${this.allowedOperators.join(
+ ", "
+ )}`
+ );
+ }
+ } else {
+ throw new Error("Unknown comparator or operator");
+ }
+
+ const mapDict = {
+ and: " and ",
+ or: " or ",
+ eq: "=",
+ ne: "!=",
+ lt: "<",
+ lte: "<=",
+ gt: ">",
+ gte: ">=",
+ };
+ return mapDict[func as Comparator | AllowedOperator];
+ }
+
+ /**
+ * Visits an operation and returns a VectaraOperationResult. The
+ * operation's arguments are visited and the operator is formatted.
+ * @param operation The operation to visit.
+ * @returns A VectaraOperationResult.
+ */
+ visitOperation(operation: Operation): this["VisitOperationOutput"] {
+ const args = operation.args?.map((arg) =>
+ arg.accept(this as Visitor)
+ ) as VectaraVisitorResult[];
+ const operator = this.formatFunction(operation.operator);
+ return `( ${args.join(operator)} )`;
+ }
+
+ /**
+ * Visits a comparison and returns a VectaraComparisonResult. The
+ * comparison's value is checked for type and the comparator is formatted.
+ * Throws an error if the value type is not supported.
+ * @param comparison The comparison to visit.
+ * @returns A VectaraComparisonResult.
+ */
+ visitComparison(comparison: Comparison): this["VisitComparisonOutput"] {
+ const comparator = this.formatFunction(comparison.comparator);
+ return `( doc.${comparison.attribute} ${comparator} ${processValue(
+ comparison.value
+ )} )`;
+ }
+
+ /**
+ * Visits a structured query and returns a VectaraStructuredQueryResult.
+ * If the query has a filter, it is visited.
+ * @param query The structured query to visit.
+ * @returns A VectaraStructuredQueryResult.
+ */
+ visitStructuredQuery(
+ query: StructuredQuery
+ ): this["VisitStructuredQueryOutput"] {
+ let nextArg = {};
+ if (query.filter) {
+ nextArg = {
+ filter: { filter: query.filter.accept(this as Visitor) },
+ };
+ }
+ return nextArg;
+ }
+
+ mergeFilters(
+ defaultFilter: VectaraFilter | undefined,
+ generatedFilter: VectaraFilter | undefined,
+ mergeType = "and",
+ forceDefaultFilter = false
+ ): VectaraFilter | undefined {
+ if (isFilterEmpty(defaultFilter) && isFilterEmpty(generatedFilter)) {
+ return undefined;
+ }
+ if (isFilterEmpty(defaultFilter) || mergeType === "replace") {
+ if (isFilterEmpty(generatedFilter)) {
+ return undefined;
+ }
+ return generatedFilter;
+ }
+ if (isFilterEmpty(generatedFilter)) {
+ if (forceDefaultFilter) {
+ return defaultFilter;
+ }
+ if (mergeType === "and") {
+ return undefined;
+ }
+ return defaultFilter;
+ }
+
+ if (mergeType === "and") {
+ return {
+ filter: `${defaultFilter} and ${generatedFilter}`,
+ } as VectaraFilter;
+ } else if (mergeType === "or") {
+ return {
+ filter: `${defaultFilter} or ${generatedFilter}`,
+ };
+ } else {
+ throw new Error("Unknown merge type");
+ }
+ }
+}