From b6e96dc211d126ef16c8e84caafddff552951413 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:17:11 -0500 Subject: [PATCH 01/10] initial gradient embeddings implementation --- langchain/src/embeddings/gradient_ai.ts | 120 ++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 langchain/src/embeddings/gradient_ai.ts diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts new file mode 100644 index 000000000000..703799e5b48e --- /dev/null +++ b/langchain/src/embeddings/gradient_ai.ts @@ -0,0 +1,120 @@ +import { Gradient } from "@gradientai/nodejs-sdk"; +import { getEnvironmentVariable } from "../util/env.js"; +import { chunkArray } from "../util/chunk.js"; +import { Embeddings, EmbeddingsParams } from "./base.js"; + +/** + * Interface for GradientEmbeddings parameters. Extends EmbeddingsParams and + * defines additional parameters specific to the GradientEmbeddings class. + */ +export interface GradientEmbeddingsParams extends EmbeddingsParams { + /** + * Gradient AI Access Token. + * Provide Access Token if you do not wish to automatically pull from env. + */ + gradientAccessKey?: string; + /** + * Gradient Workspace Id. + * Provide workspace id if you do not wish to automatically pull from env. + */ + workspaceId?: string; + /** + * Gradient AI Model Slug. + */ + modelSlug?: string; +} + +/** + * Class for generating embeddings using the Gradient AI's API. Extends the + * Embeddings class and implements GradientEmbeddingsParams and + */ +export class GradientEmbeddings + extends Embeddings + implements GradientEmbeddingsParams +{ + gradientAccessKey?: string; + + workspaceId?: string; + + batchSize = 128; + + model: any; + + constructor(fields: GradientEmbeddingsParams) { + super(fields); + + this.gradientAccessKey = + fields?.gradientAccessKey ?? + getEnvironmentVariable("GRADIENT_ACCESS_TOKEN"); + this.workspaceId = + fields?.workspaceId ?? getEnvironmentVariable("GRADIENT_WORKSPACE_ID"); + + if (!this.gradientAccessKey) { + throw new Error("Missing Gradient AI Access Token"); + } + + if (!this.workspaceId) { + throw new Error("Missing Gradient AI Workspace ID"); + } + } + + /** + * Method to generate embeddings for an array of documents. Splits the + * documents into batches and makes requests to the Gradient API to generate + * embeddings. + * @param texts Array of documents to generate embeddings for. + * @returns Promise that resolves to a 2D array of embeddings for each document. + */ + async embedDocuments(texts: string[]): Promise { + await this.setModel(); + + const batches = chunkArray( + texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => + this.model.generateEmbeddings({ + inputs: batch, + }) + ); + const batchResponses = await Promise.all(batchRequests); + + const embeddings: number[][] = []; + for (let i = 0; i < batchResponses.length; i += 1) { + const batch = batches[i]; + const { data: batchResponse } = batchResponses[i]; + for (let j = 0; j < batch.length; j += 1) { + embeddings.push(batchResponse[j].embedding); + } + } + return embeddings; + } + + /** + * Method to generate an embedding for a single document. Calls the + * embedDocuments method with the document as the input. + * @param text Document to generate an embedding for. + * @returns Promise that resolves to an embedding for the document. + */ + async embedQuery(text: string): Promise { + const data = await this.embedDocuments([text]); + return data[0]; + } + + /** + * Method to set the model to use for generating embeddings. + * @sets the class' `model` value to that of the retrieved Embeddings Model. + */ + async setModel() { + if (this.model) return; + + const gradient = new Gradient({ + accessToken: this.gradientAccessKey, + workspaceId: this.workspaceId, + }); + this.model = await gradient.getEmbeddingsModel({ + slug: "bge-large", + }); + } +} From c0951d59b76b89d638cbb7d37d6991b853537e99 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:19:23 -0500 Subject: [PATCH 02/10] format --- langchain/src/embeddings/gradient_ai.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index 703799e5b48e..1a7ae9430674 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -67,11 +67,8 @@ export class GradientEmbeddings */ async embedDocuments(texts: string[]): Promise { await this.setModel(); - - const batches = chunkArray( - texts, - this.batchSize - ); + + const batches = chunkArray(texts, this.batchSize); const batchRequests = batches.map((batch) => this.model.generateEmbeddings({ From 78ce39b0b25759d8362c8a8cc79dcd985f2abee8 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:20:38 -0500 Subject: [PATCH 03/10] remove modelslug --- langchain/src/embeddings/gradient_ai.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index 1a7ae9430674..ae2f3d27b315 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -18,10 +18,6 @@ export interface GradientEmbeddingsParams extends EmbeddingsParams { * Provide workspace id if you do not wish to automatically pull from env. */ workspaceId?: string; - /** - * Gradient AI Model Slug. - */ - modelSlug?: string; } /** From 105ecbc3d74d6143ba2ee84a0d71001d9e2a7193 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:24:28 -0500 Subject: [PATCH 04/10] update package and entrypoint -> yarn build --- docs/api_refs/typedoc.json | 1 + langchain/.gitignore | 3 +++ langchain/package.json | 8 ++++++++ langchain/scripts/create-entrypoints.js | 2 ++ langchain/src/load/import_constants.ts | 1 + langchain/src/load/import_type.d.ts | 3 +++ 6 files changed, 18 insertions(+) diff --git a/docs/api_refs/typedoc.json b/docs/api_refs/typedoc.json index 7ff44c31fe65..c03de0c396a3 100644 --- a/docs/api_refs/typedoc.json +++ b/docs/api_refs/typedoc.json @@ -67,6 +67,7 @@ "./langchain/src/embeddings/minimax.ts", "./langchain/src/embeddings/voyage.ts", "./langchain/src/embeddings/llama_cpp.ts", + "./langchain/src/embeddings/gradient_ai.ts", "./langchain/src/llms/load.ts", "./langchain/src/llms/base.ts", "./langchain/src/llms/openai.ts", diff --git a/langchain/.gitignore b/langchain/.gitignore index 886cf43390bd..ca07764e707e 100644 --- a/langchain/.gitignore +++ b/langchain/.gitignore @@ -145,6 +145,9 @@ embeddings/voyage.d.ts embeddings/llama_cpp.cjs embeddings/llama_cpp.js embeddings/llama_cpp.d.ts +embeddings/gradient_ai.cjs +embeddings/gradient_ai.js +embeddings/gradient_ai.d.ts llms/load.cjs llms/load.js llms/load.d.ts diff --git a/langchain/package.json b/langchain/package.json index c855ae367c8f..cb20fe22f55b 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -157,6 +157,9 @@ "embeddings/llama_cpp.cjs", "embeddings/llama_cpp.js", "embeddings/llama_cpp.d.ts", + "embeddings/gradient_ai.cjs", + "embeddings/gradient_ai.js", + "embeddings/gradient_ai.d.ts", "llms/load.cjs", "llms/load.js", "llms/load.d.ts", @@ -1698,6 +1701,11 @@ "import": "./embeddings/llama_cpp.js", "require": "./embeddings/llama_cpp.cjs" }, + "./embeddings/gradient_ai": { + "types": "./embeddings/gradient_ai.d.ts", + "import": "./embeddings/gradient_ai.js", + "require": "./embeddings/gradient_ai.cjs" + }, "./llms/load": { "types": "./llms/load.d.ts", "import": "./llms/load.js", diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 446eaac7856b..a8cf7cfaa321 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -63,6 +63,7 @@ const entrypoints = { "embeddings/minimax": "embeddings/minimax", "embeddings/voyage": "embeddings/voyage", "embeddings/llama_cpp": "embeddings/llama_cpp", + "embeddings/gradient_ai": "embeddings/gradient_ai", // llms "llms/load": "llms/load", "llms/base": "llms/base", @@ -364,6 +365,7 @@ const requiresOptionalDependency = [ "embeddings/hf", "embeddings/hf_transformers", "embeddings/llama_cpp", + "embeddings/gradient_ai", "llms/load", "llms/cohere", "llms/googlevertexai", diff --git a/langchain/src/load/import_constants.ts b/langchain/src/load/import_constants.ts index a7d4a74ee1f8..9bf8f2b4cb46 100644 --- a/langchain/src/load/import_constants.ts +++ b/langchain/src/load/import_constants.ts @@ -24,6 +24,7 @@ export const optionalImportEntrypoints = [ "langchain/embeddings/googlevertexai", "langchain/embeddings/googlepalm", "langchain/embeddings/llama_cpp", + "langchain/embeddings/gradient_ai", "langchain/llms/load", "langchain/llms/cohere", "langchain/llms/hf", diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index 00fb99f9d953..052c975320f8 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -70,6 +70,9 @@ export interface OptionalImportMap { "langchain/embeddings/llama_cpp"?: | typeof import("../embeddings/llama_cpp.js") | Promise; + "langchain/embeddings/gradient_ai"?: + | typeof import("../embeddings/gradient_ai.js") + | Promise; "langchain/llms/load"?: | typeof import("../llms/load.js") | Promise; From 6ab4ea63ffd7ac86c3e7ca7ead85bc0c69e11050 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:43:11 -0500 Subject: [PATCH 05/10] map texts and change response reading --- langchain/src/embeddings/gradient_ai.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index ae2f3d27b315..6f654df17045 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -64,7 +64,9 @@ export class GradientEmbeddings async embedDocuments(texts: string[]): Promise { await this.setModel(); - const batches = chunkArray(texts, this.batchSize); + const mappedTexts = texts.map((text) => ({ input: text })); + + const batches = chunkArray(mappedTexts, this.batchSize); const batchRequests = batches.map((batch) => this.model.generateEmbeddings({ @@ -76,7 +78,7 @@ export class GradientEmbeddings const embeddings: number[][] = []; for (let i = 0; i < batchResponses.length; i += 1) { const batch = batches[i]; - const { data: batchResponse } = batchResponses[i]; + const { embeddings: batchResponse } = batchResponses[i]; for (let j = 0; j < batch.length; j += 1) { embeddings.push(batchResponse[j].embedding); } From 5b3b005316f1b9f2d9d2ec9c9f625e48c6050658 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Thu, 30 Nov 2023 21:43:29 -0500 Subject: [PATCH 06/10] add example --- examples/src/embeddings/gradient_ai.ts | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 examples/src/embeddings/gradient_ai.ts diff --git a/examples/src/embeddings/gradient_ai.ts b/examples/src/embeddings/gradient_ai.ts new file mode 100644 index 000000000000..d6f2e65475a7 --- /dev/null +++ b/examples/src/embeddings/gradient_ai.ts @@ -0,0 +1,9 @@ +import { GradientEmbeddings } from "langchain/embeddings/gradient_ai"; + +export const run = async () => { + const model = new GradientEmbeddings(); + const res = await model.embedQuery( + "What would be a good company name a company that makes colorful socks?" + ); + console.log({ res }); +}; From aa9c3575f39055512f536879b1e240e8e6a5acd1 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Fri, 1 Dec 2023 09:59:23 -0500 Subject: [PATCH 07/10] add `caller.call` wrapper --- langchain/src/embeddings/gradient_ai.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index 6f654df17045..74375ad438b7 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -69,9 +69,11 @@ export class GradientEmbeddings const batches = chunkArray(mappedTexts, this.batchSize); const batchRequests = batches.map((batch) => - this.model.generateEmbeddings({ - inputs: batch, - }) + this.caller.call(async () => + this.model.generateEmbeddings({ + inputs: batch, + }) + ) ); const batchResponses = await Promise.all(batchRequests); From 31b257433583142751a2fad6b027778c01a56a43 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Fri, 1 Dec 2023 10:09:43 -0500 Subject: [PATCH 08/10] add docs --- .../text_embedding/gradient_ai.mdx | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx diff --git a/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx b/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx new file mode 100644 index 000000000000..2e12d9a61f08 --- /dev/null +++ b/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx @@ -0,0 +1,33 @@ +# Gradient AI + +The `GradientEmbeddings` class uses the Gradient AI API to generate embeddings for a given text. + +## Setup + +You'll need to install the official Gradient Node SDK as a peer dependency: + +```bash npm2yarn +npm i @gradientai/nodejs-sdk +``` + +You will need to set the following environment variables for using the Gradient AI API. + +1. `GRADIENT_ACCESS_TOKEN` +2. `GRADIENT_WORKSPACE_ID` + +Alternatively, these can be set during the GradientAI Class instantiation as `gradientAccessKey` and `workspaceId` respectively. +For example: + +```typescript +const model = new GradientEmbeddings({ + gradientAccessKey: "My secret Access Token" + workspaceId: "My secret workspace id" +}); +``` + +## Usage + +import CodeBlock from "@theme/CodeBlock"; +import GradientEmbeddingsExample from "@examples/embeddings/gradient_ai.ts"; + +{GradientEmbeddingsExample} From 795b0d8bb9faa94192b6fdcf64d9ba47abc28224 Mon Sep 17 00:00:00 2001 From: Chase McDougall Date: Fri, 1 Dec 2023 10:12:59 -0500 Subject: [PATCH 09/10] remove run form example --- examples/src/embeddings/gradient_ai.ts | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/src/embeddings/gradient_ai.ts b/examples/src/embeddings/gradient_ai.ts index d6f2e65475a7..a749c2dc494e 100644 --- a/examples/src/embeddings/gradient_ai.ts +++ b/examples/src/embeddings/gradient_ai.ts @@ -1,9 +1,7 @@ import { GradientEmbeddings } from "langchain/embeddings/gradient_ai"; -export const run = async () => { - const model = new GradientEmbeddings(); - const res = await model.embedQuery( - "What would be a good company name a company that makes colorful socks?" - ); - console.log({ res }); -}; +const model = new GradientEmbeddings(); +const res = await model.embedQuery( + "What would be a good company name a company that makes colorful socks?" +); +console.log({ res }); From f469ec00d945a3f8421b32f4be78bce3f66a74bb Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Fri, 1 Dec 2023 18:41:27 -0800 Subject: [PATCH 10/10] Update gradient_ai.mdx --- .../docs/integrations/text_embedding/gradient_ai.mdx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx b/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx index 2e12d9a61f08..a8f9cda8daa2 100644 --- a/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx +++ b/docs/core_docs/docs/integrations/text_embedding/gradient_ai.mdx @@ -1,3 +1,7 @@ +--- +sidebar_class_name: node-only +--- + # Gradient AI The `GradientEmbeddings` class uses the Gradient AI API to generate embeddings for a given text. @@ -12,8 +16,10 @@ npm i @gradientai/nodejs-sdk You will need to set the following environment variables for using the Gradient AI API. -1. `GRADIENT_ACCESS_TOKEN` -2. `GRADIENT_WORKSPACE_ID` +``` +export GRADIENT_ACCESS_TOKEN= +export GRADIENT_WORKSPACE_ID= +``` Alternatively, these can be set during the GradientAI Class instantiation as `gradientAccessKey` and `workspaceId` respectively. For example: