From 52603b609355506474f3235880bd0158af871541 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 6 Dec 2023 15:44:20 -0800 Subject: [PATCH 01/22] Initial langchain-community commit --- langchain/package.json | 1 + langchain/src/util/event-source-parse.ts | 288 +--- libs/langchain-community/.eslintrc.cjs | 66 + libs/langchain-community/.gitignore | 3 + libs/langchain-community/LICENSE | 21 + libs/langchain-community/jest.config.cjs | 19 + libs/langchain-community/jest.env.cjs | 12 + libs/langchain-community/package.json | 608 +++++++ .../scripts/check-tree-shaking.js | 80 + .../scripts/create-entrypoints.js | 100 ++ .../scripts/identify-secrets.js | 77 + .../scripts/move-cjs-to-dist.js | 38 + .../scripts/release-branch.sh | 6 + libs/langchain-community/src/chat_models.ts | 88 + .../src/chat_models/anthropic.ts | 451 ++++++ .../src/chat_models/baiduwenxin.ts | 546 +++++++ .../src/chat_models/bedrock/index.ts | 40 + .../src/chat_models/bedrock/web.ts | 434 +++++ .../src/chat_models/cloudflare_workersai.ts | 250 +++ .../src/chat_models/fireworks.ts | 139 ++ .../src/chat_models/googlepalm.ts | 342 ++++ .../src/chat_models/googlevertexai/common.ts | 405 +++++ .../src/chat_models/googlevertexai/index.ts | 64 + .../src/chat_models/googlevertexai/web.ts | 66 + .../src/chat_models/iflytek_xinghuo/common.ts | 490 ++++++ .../src/chat_models/iflytek_xinghuo/index.ts | 43 + .../src/chat_models/iflytek_xinghuo/web.ts | 49 + .../src/chat_models/llama_cpp.ts | 327 ++++ .../src/chat_models/minimax.ts | 881 +++++++++++ .../src/chat_models/ollama.ts | 301 ++++ .../src/chat_models/portkey.ts | 187 +++ .../tests/chatanthropic.int.test.ts | 313 ++++ .../tests/chatbaiduwenxin.int.test.ts | 136 ++ .../chat_models/tests/chatbedrock.int.test.ts | 186 +++ .../chatcloudflare_workersai.int.test.ts | 131 ++ .../tests/chatfireworks.int.test.ts | 73 + .../tests/chatgooglepalm.int.test.ts | 138 ++ .../chat_models/tests/chatgooglepalm.test.ts | 252 +++ .../tests/chatgooglevertexai.int.test.ts | 145 ++ .../tests/chatgooglevertexai.test.ts | 116 ++ .../tests/chatgooglevertexai_web.int.test.ts | 146 ++ .../tests/chatgooglevertexai_web.test.ts | 149 ++ .../tests/chatiflytekxinghuo.int.test.ts | 12 + .../tests/chatllama_cpp.int.test.ts | 118 ++ .../chat_models/tests/chatollama.int.test.ts | 156 ++ .../tests/chatopenai-extended.int.test.ts | 176 ++ .../tests/chatopenai-vision.int.test.ts | 54 + .../chat_models/tests/chatopenai.int.test.ts | 777 +++++++++ .../src/chat_models/tests/data/hotdog.jpg | Bin 0 -> 28191 bytes .../src/chat_models/tests/minimax.int.test.ts | 341 ++++ .../src/chat_models/yandex.ts | 141 ++ libs/langchain-community/src/index.ts | 3 + libs/langchain-community/src/llms.ts | 73 + .../src/load/import_constants.ts | 176 ++ .../src/load/import_map.ts | 108 ++ .../src/load/import_type.d.ts | 580 +++++++ libs/langchain-community/src/load/index.ts | 18 + libs/langchain-community/src/load/map_keys.ts | 4 + .../src/load/serializable.ts | 1 + .../src/tests/chat_models.test.ts | 5 + .../src/tests/integration.int.test.ts | 5 + .../src/tests/llms.test.ts | 5 + .../src/tests/vectorstores.test.ts | 5 + .../src/types/assemblyai-types.ts | 6 + .../src/types/expression-parser.d.ts | 91 ++ .../src/types/googlevertexai-types.ts | 89 ++ .../src/types/openai-types.ts | 172 ++ .../src/types/pdf-parse.d.ts | 1409 +++++++++++++++++ .../src/types/type-utils.ts | 3 + libs/langchain-community/src/util/bedrock.ts | 134 ++ .../src/util/event-source-parse.ts | 287 ++++ .../src/util/googlevertexai-connection.ts | 423 +++++ .../src/util/googlevertexai-gauth.ts | 38 + .../src/util/googlevertexai-webauth.ts | 119 ++ .../src/util/iflytek_websocket_stream.ts | 95 ++ .../langchain-community/src/util/llama_cpp.ts | 79 + libs/langchain-community/src/util/ollama.ts | 146 ++ libs/langchain-community/src/vectorstores.ts | 80 + libs/langchain-community/tsconfig.cjs.json | 8 + libs/langchain-community/tsconfig.json | 23 + yarn.lock | 459 +++++- 81 files changed, 14338 insertions(+), 288 deletions(-) create mode 100644 libs/langchain-community/.eslintrc.cjs create mode 100644 libs/langchain-community/.gitignore create mode 100644 libs/langchain-community/LICENSE create mode 100644 libs/langchain-community/jest.config.cjs create mode 100644 libs/langchain-community/jest.env.cjs create mode 100644 libs/langchain-community/package.json create mode 100644 libs/langchain-community/scripts/check-tree-shaking.js create mode 100644 libs/langchain-community/scripts/create-entrypoints.js create mode 100644 libs/langchain-community/scripts/identify-secrets.js create mode 100644 libs/langchain-community/scripts/move-cjs-to-dist.js create mode 100644 libs/langchain-community/scripts/release-branch.sh create mode 100644 libs/langchain-community/src/chat_models.ts create mode 100644 libs/langchain-community/src/chat_models/anthropic.ts create mode 100644 libs/langchain-community/src/chat_models/baiduwenxin.ts create mode 100644 libs/langchain-community/src/chat_models/bedrock/index.ts create mode 100644 libs/langchain-community/src/chat_models/bedrock/web.ts create mode 100644 libs/langchain-community/src/chat_models/cloudflare_workersai.ts create mode 100644 libs/langchain-community/src/chat_models/fireworks.ts create mode 100644 libs/langchain-community/src/chat_models/googlepalm.ts create mode 100644 libs/langchain-community/src/chat_models/googlevertexai/common.ts create mode 100644 libs/langchain-community/src/chat_models/googlevertexai/index.ts create mode 100644 libs/langchain-community/src/chat_models/googlevertexai/web.ts create mode 100644 libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts create mode 100644 libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts create mode 100644 libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts create mode 100644 libs/langchain-community/src/chat_models/llama_cpp.ts create mode 100644 libs/langchain-community/src/chat_models/minimax.ts create mode 100644 libs/langchain-community/src/chat_models/ollama.ts create mode 100644 libs/langchain-community/src/chat_models/portkey.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/tests/data/hotdog.jpg create mode 100644 libs/langchain-community/src/chat_models/tests/minimax.int.test.ts create mode 100644 libs/langchain-community/src/chat_models/yandex.ts create mode 100644 libs/langchain-community/src/index.ts create mode 100644 libs/langchain-community/src/llms.ts create mode 100644 libs/langchain-community/src/load/import_constants.ts create mode 100644 libs/langchain-community/src/load/import_map.ts create mode 100644 libs/langchain-community/src/load/import_type.d.ts create mode 100644 libs/langchain-community/src/load/index.ts create mode 100644 libs/langchain-community/src/load/map_keys.ts create mode 100644 libs/langchain-community/src/load/serializable.ts create mode 100644 libs/langchain-community/src/tests/chat_models.test.ts create mode 100644 libs/langchain-community/src/tests/integration.int.test.ts create mode 100644 libs/langchain-community/src/tests/llms.test.ts create mode 100644 libs/langchain-community/src/tests/vectorstores.test.ts create mode 100644 libs/langchain-community/src/types/assemblyai-types.ts create mode 100644 libs/langchain-community/src/types/expression-parser.d.ts create mode 100644 libs/langchain-community/src/types/googlevertexai-types.ts create mode 100644 libs/langchain-community/src/types/openai-types.ts create mode 100644 libs/langchain-community/src/types/pdf-parse.d.ts create mode 100644 libs/langchain-community/src/types/type-utils.ts create mode 100644 libs/langchain-community/src/util/bedrock.ts create mode 100644 libs/langchain-community/src/util/event-source-parse.ts create mode 100644 libs/langchain-community/src/util/googlevertexai-connection.ts create mode 100644 libs/langchain-community/src/util/googlevertexai-gauth.ts create mode 100644 libs/langchain-community/src/util/googlevertexai-webauth.ts create mode 100644 libs/langchain-community/src/util/iflytek_websocket_stream.ts create mode 100644 libs/langchain-community/src/util/llama_cpp.ts create mode 100644 libs/langchain-community/src/util/ollama.ts create mode 100644 libs/langchain-community/src/vectorstores.ts create mode 100644 libs/langchain-community/tsconfig.cjs.json create mode 100644 libs/langchain-community/tsconfig.json diff --git a/langchain/package.json b/langchain/package.json index 94fd29bb9156..56209eca2bc0 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1432,6 +1432,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.9.1", + "@langchain/community": "workspace:*", "@langchain/core": "~0.0.9", "binary-extensions": "^2.2.0", "expr-eval": "^2.0.2", diff --git a/langchain/src/util/event-source-parse.ts b/langchain/src/util/event-source-parse.ts index 5f43f25a0271..93e538d555a9 100644 --- a/langchain/src/util/event-source-parse.ts +++ b/langchain/src/util/event-source-parse.ts @@ -1,287 +1 @@ -/* eslint-disable prefer-template */ -/* eslint-disable default-case */ -/* eslint-disable no-plusplus */ -// Adapted from https://github.com/gfortaine/fetch-event-source/blob/main/src/parse.ts -// due to a packaging issue in the original. -// MIT License -import { type Readable } from "stream"; -import { IterableReadableStream } from "./stream.js"; - -export const EventStreamContentType = "text/event-stream"; - -/** - * Represents a message sent in an event stream - * https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format - */ -export interface EventSourceMessage { - /** The event ID to set the EventSource object's last event ID value. */ - id: string; - /** A string identifying the type of event described. */ - event: string; - /** The event data */ - data: string; - /** The reconnection interval (in milliseconds) to wait before retrying the connection */ - retry?: number; -} - -function isNodeJSReadable(x: unknown): x is Readable { - return x != null && typeof x === "object" && "on" in x; -} - -/** - * Converts a ReadableStream into a callback pattern. - * @param stream The input ReadableStream. - * @param onChunk A function that will be called on each new byte chunk in the stream. - * @returns {Promise} A promise that will be resolved when the stream closes. - */ -export async function getBytes( - stream: ReadableStream, - onChunk: (arr: Uint8Array, flush?: boolean) => void -) { - // stream is a Node.js Readable / PassThrough stream - // this can happen if node-fetch is polyfilled - if (isNodeJSReadable(stream)) { - return new Promise((resolve) => { - stream.on("readable", () => { - let chunk; - // eslint-disable-next-line no-constant-condition - while (true) { - chunk = stream.read(); - if (chunk == null) { - onChunk(new Uint8Array(), true); - break; - } - onChunk(chunk); - } - - resolve(); - }); - }); - } - - const reader = stream.getReader(); - // CHANGED: Introduced a "flush" mechanism to process potential pending messages when the stream ends. - // This change is essential to ensure that we capture every last piece of information from streams, - // such as those from Azure OpenAI, which may not terminate with a blank line. Without this - // mechanism, we risk ignoring a possibly significant last message. - // See https://github.com/langchain-ai/langchainjs/issues/1299 for details. - // eslint-disable-next-line no-constant-condition - while (true) { - const result = await reader.read(); - if (result.done) { - onChunk(new Uint8Array(), true); - break; - } - onChunk(result.value); - } -} - -const enum ControlChars { - NewLine = 10, - CarriageReturn = 13, - Space = 32, - Colon = 58, -} - -/** - * Parses arbitary byte chunks into EventSource line buffers. - * Each line should be of the format "field: value" and ends with \r, \n, or \r\n. - * @param onLine A function that will be called on each new EventSource line. - * @returns A function that should be called for each incoming byte chunk. - */ -export function getLines( - onLine: (line: Uint8Array, fieldLength: number, flush?: boolean) => void -) { - let buffer: Uint8Array | undefined; - let position: number; // current read position - let fieldLength: number; // length of the `field` portion of the line - let discardTrailingNewline = false; - - // return a function that can process each incoming byte chunk: - return function onChunk(arr: Uint8Array, flush?: boolean) { - if (flush) { - onLine(arr, 0, true); - return; - } - - if (buffer === undefined) { - buffer = arr; - position = 0; - fieldLength = -1; - } else { - // we're still parsing the old line. Append the new bytes into buffer: - buffer = concat(buffer, arr); - } - - const bufLength = buffer.length; - let lineStart = 0; // index where the current line starts - while (position < bufLength) { - if (discardTrailingNewline) { - if (buffer[position] === ControlChars.NewLine) { - lineStart = ++position; // skip to next char - } - - discardTrailingNewline = false; - } - - // start looking forward till the end of line: - let lineEnd = -1; // index of the \r or \n char - for (; position < bufLength && lineEnd === -1; ++position) { - switch (buffer[position]) { - case ControlChars.Colon: - if (fieldLength === -1) { - // first colon in line - fieldLength = position - lineStart; - } - break; - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore:7029 \r case below should fallthrough to \n: - case ControlChars.CarriageReturn: - discardTrailingNewline = true; - // eslint-disable-next-line no-fallthrough - case ControlChars.NewLine: - lineEnd = position; - break; - } - } - - if (lineEnd === -1) { - // We reached the end of the buffer but the line hasn't ended. - // Wait for the next arr and then continue parsing: - break; - } - - // we've reached the line end, send it out: - onLine(buffer.subarray(lineStart, lineEnd), fieldLength); - lineStart = position; // we're now on the next line - fieldLength = -1; - } - - if (lineStart === bufLength) { - buffer = undefined; // we've finished reading it - } else if (lineStart !== 0) { - // Create a new view into buffer beginning at lineStart so we don't - // need to copy over the previous lines when we get the new arr: - buffer = buffer.subarray(lineStart); - position -= lineStart; - } - }; -} - -/** - * Parses line buffers into EventSourceMessages. - * @param onId A function that will be called on each `id` field. - * @param onRetry A function that will be called on each `retry` field. - * @param onMessage A function that will be called on each message. - * @returns A function that should be called for each incoming line buffer. - */ -export function getMessages( - onMessage?: (msg: EventSourceMessage) => void, - onId?: (id: string) => void, - onRetry?: (retry: number) => void -) { - let message = newMessage(); - const decoder = new TextDecoder(); - - // return a function that can process each incoming line buffer: - return function onLine( - line: Uint8Array, - fieldLength: number, - flush?: boolean - ) { - if (flush) { - if (!isEmpty(message)) { - onMessage?.(message); - message = newMessage(); - } - return; - } - - if (line.length === 0) { - // empty line denotes end of message. Trigger the callback and start a new message: - onMessage?.(message); - message = newMessage(); - } else if (fieldLength > 0) { - // exclude comments and lines with no values - // line is of format ":" or ": " - // https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation - const field = decoder.decode(line.subarray(0, fieldLength)); - const valueOffset = - fieldLength + (line[fieldLength + 1] === ControlChars.Space ? 2 : 1); - const value = decoder.decode(line.subarray(valueOffset)); - - switch (field) { - case "data": - // if this message already has data, append the new value to the old. - // otherwise, just set to the new value: - message.data = message.data ? message.data + "\n" + value : value; // otherwise, - break; - case "event": - message.event = value; - break; - case "id": - onId?.((message.id = value)); - break; - case "retry": { - const retry = parseInt(value, 10); - if (!Number.isNaN(retry)) { - // per spec, ignore non-integers - onRetry?.((message.retry = retry)); - } - break; - } - } - } - }; -} - -function concat(a: Uint8Array, b: Uint8Array) { - const res = new Uint8Array(a.length + b.length); - res.set(a); - res.set(b, a.length); - return res; -} - -function newMessage(): EventSourceMessage { - // data, event, and id must be initialized to empty strings: - // https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation - // retry should be initialized to undefined so we return a consistent shape - // to the js engine all the time: https://mathiasbynens.be/notes/shapes-ics#takeaways - return { - data: "", - event: "", - id: "", - retry: undefined, - }; -} - -export function convertEventStreamToIterableReadableDataStream( - stream: ReadableStream -) { - const dataStream = new ReadableStream({ - async start(controller) { - const enqueueLine = getMessages((msg) => { - if (msg.data) controller.enqueue(msg.data); - }); - const onLine = ( - line: Uint8Array, - fieldLength: number, - flush?: boolean - ) => { - enqueueLine(line, fieldLength, flush); - if (flush) controller.close(); - }; - await getBytes(stream, getLines(onLine)); - }, - }); - return IterableReadableStream.fromReadableStream(dataStream); -} - -function isEmpty(message: EventSourceMessage): boolean { - return ( - message.data === "" && - message.event === "" && - message.id === "" && - message.retry === undefined - ); -} +export * from "@langchain/community/utils/event-source-parse"; \ No newline at end of file diff --git a/libs/langchain-community/.eslintrc.cjs b/libs/langchain-community/.eslintrc.cjs new file mode 100644 index 000000000000..344f8a9d6cd9 --- /dev/null +++ b/libs/langchain-community/.eslintrc.cjs @@ -0,0 +1,66 @@ +module.exports = { + extends: [ + "airbnb-base", + "eslint:recommended", + "prettier", + "plugin:@typescript-eslint/recommended", + ], + parserOptions: { + ecmaVersion: 12, + parser: "@typescript-eslint/parser", + project: "./tsconfig.json", + sourceType: "module", + }, + plugins: ["@typescript-eslint", "no-instanceof"], + ignorePatterns: [ + ".eslintrc.cjs", + "scripts", + "node_modules", + "dist", + "dist-cjs", + "*.js", + "*.cjs", + "*.d.ts", + ], + rules: { + "no-process-env": 2, + "no-instanceof/no-instanceof": 2, + "@typescript-eslint/explicit-module-boundary-types": 0, + "@typescript-eslint/no-empty-function": 0, + "@typescript-eslint/no-shadow": 0, + "@typescript-eslint/no-empty-interface": 0, + "@typescript-eslint/no-use-before-define": ["error", "nofunc"], + "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }], + "@typescript-eslint/no-floating-promises": "error", + "@typescript-eslint/no-misused-promises": "error", + camelcase: 0, + "class-methods-use-this": 0, + "import/extensions": [2, "ignorePackages"], + "import/no-extraneous-dependencies": [ + "error", + { devDependencies: ["**/*.test.ts"] }, + ], + "import/no-unresolved": 0, + "import/prefer-default-export": 0, + "keyword-spacing": "error", + "max-classes-per-file": 0, + "max-len": 0, + "no-await-in-loop": 0, + "no-bitwise": 0, + "no-console": 0, + "no-restricted-syntax": 0, + "no-shadow": 0, + "no-continue": 0, + "no-void": 0, + "no-underscore-dangle": 0, + "no-use-before-define": 0, + "no-useless-constructor": 0, + "no-return-await": 0, + "consistent-return": 0, + "no-else-return": 0, + "func-names": 0, + "no-lonely-if": 0, + "prefer-rest-params": 0, + "new-cap": ["error", { properties: false, capIsNew: false }], + }, +}; diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore new file mode 100644 index 000000000000..ae701c7fe705 --- /dev/null +++ b/libs/langchain-community/.gitignore @@ -0,0 +1,3 @@ +node_modules +dist +.yarn diff --git a/libs/langchain-community/LICENSE b/libs/langchain-community/LICENSE new file mode 100644 index 000000000000..8cd8f501eb49 --- /dev/null +++ b/libs/langchain-community/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2023 LangChain + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/libs/langchain-community/jest.config.cjs b/libs/langchain-community/jest.config.cjs new file mode 100644 index 000000000000..5cc0b1ab72c6 --- /dev/null +++ b/libs/langchain-community/jest.config.cjs @@ -0,0 +1,19 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: "ts-jest/presets/default-esm", + testEnvironment: "./jest.env.cjs", + modulePathIgnorePatterns: ["dist/", "docs/"], + moduleNameMapper: { + "^(\\.{1,2}/.*)\\.js$": "$1", + }, + transform: { + "^.+\\.tsx?$": ["@swc/jest"], + }, + transformIgnorePatterns: [ + "/node_modules/", + "\\.pnp\\.[^\\/]+$", + "./scripts/jest-setup-after-env.js", + ], + setupFiles: ["dotenv/config"], + testTimeout: 20_000, +}; diff --git a/libs/langchain-community/jest.env.cjs b/libs/langchain-community/jest.env.cjs new file mode 100644 index 000000000000..2ccedccb8672 --- /dev/null +++ b/libs/langchain-community/jest.env.cjs @@ -0,0 +1,12 @@ +const { TestEnvironment } = require("jest-environment-node"); + +class AdjustedTestEnvironmentToSupportFloat32Array extends TestEnvironment { + constructor(config, context) { + // Make `instanceof Float32Array` return true in tests + // to avoid https://github.com/xenova/transformers.js/issues/57 and https://github.com/jestjs/jest/issues/2549 + super(config, context); + this.global.Float32Array = Float32Array; + } +} + +module.exports = AdjustedTestEnvironmentToSupportFloat32Array; diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json new file mode 100644 index 000000000000..09ee8ae750cd --- /dev/null +++ b/libs/langchain-community/package.json @@ -0,0 +1,608 @@ +{ + "name": "@langchain/community", + "version": "0.0.0", + "description": "Sample integration for LangChain.js", + "type": "module", + "engines": { + "node": ">=18" + }, + "main": "./index.js", + "types": "./index.d.ts", + "repository": { + "type": "git", + "url": "git@github.com:langchain-ai/langchainjs.git" + }, + "scripts": { + "build": "yarn clean && yarn build:esm && yarn build:cjs && yarn build:scripts", + "build:esm": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist/ && rm -rf dist/tests dist/**/tests", + "build:cjs": "NODE_OPTIONS=--max-old-space-size=4096 tsc --outDir dist-cjs/ -p tsconfig.cjs.json && node scripts/move-cjs-to-dist.js && rm -rf dist-cjs", + "build:watch": "node scripts/create-entrypoints.js && tsc --outDir dist/ --watch", + "build:scripts": "node scripts/create-entrypoints.js && node scripts/check-tree-shaking.js", + "lint": "NODE_OPTIONS=--max-old-space-size=4096 eslint src && dpdm --exit-code circular:1 --no-warning --no-tree src/*.ts src/**/*.ts", + "lint:fix": "yarn lint --fix", + "clean": "rm -rf dist/ && NODE_OPTIONS=--max-old-space-size=4096 node scripts/create-entrypoints.js pre", + "prepack": "yarn build", + "release": "release-it --only-version --config .release-it.json", + "test": "NODE_OPTIONS=--experimental-vm-modules jest --testPathIgnorePatterns=\\.int\\.test.ts --testTimeout 30000 --maxWorkers=50%", + "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch --testPathIgnorePatterns=\\.int\\.test.ts", + "test:single": "NODE_OPTIONS=--experimental-vm-modules yarn run jest --config jest.config.cjs --testTimeout 100000", + "test:int": "NODE_OPTIONS=--experimental-vm-modules jest --testPathPattern=\\.int\\.test.ts --testTimeout 100000 --maxWorkers=50%", + "format": "prettier --write \"src\"", + "format:check": "prettier --check \"src\"" + }, + "author": "LangChain", + "license": "MIT", + "dependencies": { + "@langchain/core": "~0.0.9", + "@langchain/openai": "~0.0.1" + }, + "devDependencies": { + "@aws-crypto/sha256-js": "^5.0.0", + "@aws-sdk/client-bedrock-runtime": "^3.422.0", + "@aws-sdk/client-dynamodb": "^3.310.0", + "@aws-sdk/client-kendra": "^3.352.0", + "@aws-sdk/client-lambda": "^3.310.0", + "@aws-sdk/client-s3": "^3.310.0", + "@aws-sdk/client-sagemaker-runtime": "^3.414.0", + "@aws-sdk/client-sfn": "^3.362.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@aws-sdk/types": "^3.357.0", + "@azure/storage-blob": "^12.15.0", + "@clickhouse/client": "^0.2.5", + "@cloudflare/ai": "^1.0.12", + "@cloudflare/workers-types": "^4.20230922.0", + "@elastic/elasticsearch": "^8.4.0", + "@faker-js/faker": "^7.6.0", + "@getmetal/metal-sdk": "^4.0.0", + "@getzep/zep-js": "^0.9.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@google-cloud/storage": "^6.10.1", + "@gradientai/nodejs-sdk": "^1.2.0", + "@huggingface/inference": "^2.6.4", + "@jest/globals": "^29.5.0", + "@mozilla/readability": "^0.4.4", + "@notionhq/client": "^2.2.10", + "@opensearch-project/opensearch": "^2.2.0", + "@pinecone-database/pinecone": "^1.1.0", + "@planetscale/database": "^1.8.0", + "@qdrant/js-client-rest": "^1.2.0", + "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", + "@smithy/eventstream-codec": "^2.0.5", + "@smithy/protocol-http": "^3.0.6", + "@smithy/signature-v4": "^2.0.10", + "@smithy/util-utf8": "^2.0.0", + "@supabase/postgrest-js": "^1.1.1", + "@supabase/supabase-js": "^2.10.0", + "@swc/core": "^1.3.90", + "@swc/jest": "^0.2.29", + "@tensorflow-models/universal-sentence-encoder": "^1.3.3", + "@tensorflow/tfjs-backend-cpu": "^3", + "@tensorflow/tfjs-converter": "^3.6.0", + "@tensorflow/tfjs-core": "^3.6.0", + "@tsconfig/recommended": "^1.0.2", + "@types/d3-dsv": "^2", + "@types/decamelize": "^1.2.0", + "@types/flat": "^5.0.2", + "@types/html-to-text": "^9", + "@types/js-yaml": "^4", + "@types/jsdom": "^21.1.1", + "@types/lodash": "^4", + "@types/mozilla-readability": "^0.2.1", + "@types/pdf-parse": "^1.1.1", + "@types/pg": "^8", + "@types/pg-copy-streams": "^1.2.2", + "@types/uuid": "^9", + "@types/ws": "^8", + "@typescript-eslint/eslint-plugin": "^5.58.0", + "@typescript-eslint/parser": "^5.58.0", + "@upstash/redis": "^1.20.6", + "@vercel/kv": "^0.2.3", + "@vercel/postgres": "^0.5.0", + "@writerai/writer-sdk": "^0.40.2", + "@xata.io/client": "^0.28.0", + "@xenova/transformers": "^2.5.4", + "@zilliz/milvus2-sdk-node": ">=2.2.11", + "apify-client": "^2.7.1", + "assemblyai": "^2.0.2", + "axios": "^0.26.0", + "cassandra-driver": "^4.7.2", + "cheerio": "^1.0.0-rc.12", + "chromadb": "^1.5.3", + "closevector-common": "0.1.0-alpha.1", + "closevector-node": "0.1.0-alpha.10", + "closevector-web": "0.1.0-alpha.15", + "cohere-ai": ">=6.0.0", + "convex": "^1.3.1", + "d3-dsv": "^2.0.0", + "dotenv": "^16.0.3", + "dpdm": "^3.12.0", + "epub2": "^3.0.1", + "eslint": "^8.33.0", + "eslint-config-airbnb-base": "^15.0.0", + "eslint-config-prettier": "^8.6.0", + "eslint-plugin-import": "^2.27.5", + "eslint-plugin-jest": "^27.6.0", + "eslint-plugin-no-instanceof": "^1.0.1", + "eslint-plugin-prettier": "^4.2.1", + "faiss-node": "^0.5.1", + "fast-xml-parser": "^4.2.7", + "firebase-admin": "^11.9.0", + "google-auth-library": "^8.9.0", + "googleapis": "^126.0.1", + "graphql": "^16.6.0", + "hnswlib-node": "^1.4.2", + "html-to-text": "^9.0.5", + "ignore": "^5.2.0", + "ioredis": "^5.3.2", + "jest": "^29.5.0", + "jest-environment-node": "^29.6.4", + "jsdom": "^22.1.0", + "llmonitor": "^0.5.9", + "lodash": "^4.17.21", + "mammoth": "^1.5.1", + "ml-matrix": "^6.10.4", + "mongodb": "^5.2.0", + "mysql2": "^3.3.3", + "neo4j-driver": "^5.12.0", + "node-llama-cpp": "2.7.3", + "notion-to-md": "^3.1.0", + "officeparser": "^4.0.4", + "pdf-parse": "1.1.1", + "peggy": "^3.0.2", + "pg": "^8.11.0", + "pg-copy-streams": "^6.0.5", + "pickleparser": "^0.2.1", + "playwright": "^1.32.1", + "portkey-ai": "^0.1.11", + "prettier": "^2.8.3", + "puppeteer": "^19.7.2", + "pyodide": "^0.24.1", + "redis": "^4.6.6", + "release-it": "^15.10.1", + "replicate": "^0.18.0", + "rimraf": "^5.0.1", + "rollup": "^3.19.1", + "sonix-speech-recognition": "^2.1.1", + "sqlite3": "^5.1.4", + "srt-parser-2": "^1.2.2", + "ts-jest": "^29.1.0", + "typeorm": "^0.3.12", + "typescript": "~5.1.6", + "typesense": "^1.5.3", + "usearch": "^1.1.1", + "vectordb": "^0.1.4", + "voy-search": "0.6.2", + "weaviate-ts-client": "^1.4.0", + "web-auth-library": "^1.0.3", + "youtube-transcript": "^1.0.6", + "youtubei.js": "^5.8.0" + }, + "peerDependencies": { + "@aws-crypto/sha256-js": "^5.0.0", + "@aws-sdk/client-bedrock-runtime": "^3.422.0", + "@aws-sdk/client-dynamodb": "^3.310.0", + "@aws-sdk/client-kendra": "^3.352.0", + "@aws-sdk/client-lambda": "^3.310.0", + "@aws-sdk/client-s3": "^3.310.0", + "@aws-sdk/client-sagemaker-runtime": "^3.310.0", + "@aws-sdk/client-sfn": "^3.310.0", + "@aws-sdk/credential-provider-node": "^3.388.0", + "@azure/storage-blob": "^12.15.0", + "@clickhouse/client": "^0.2.5", + "@cloudflare/ai": "^1.0.12", + "@elastic/elasticsearch": "^8.4.0", + "@getmetal/metal-sdk": "*", + "@getzep/zep-js": "^0.9.0", + "@gomomento/sdk": "^1.51.1", + "@gomomento/sdk-core": "^1.51.1", + "@gomomento/sdk-web": "^1.51.1", + "@google-ai/generativelanguage": "^0.2.1", + "@google-cloud/storage": "^6.10.1", + "@gradientai/nodejs-sdk": "^1.2.0", + "@huggingface/inference": "^2.6.4", + "@mozilla/readability": "*", + "@notionhq/client": "^2.2.10", + "@opensearch-project/opensearch": "*", + "@pinecone-database/pinecone": "^1.1.0", + "@planetscale/database": "^1.8.0", + "@qdrant/js-client-rest": "^1.2.0", + "@raycast/api": "^1.55.2", + "@rockset/client": "^0.9.1", + "@smithy/eventstream-codec": "^2.0.5", + "@smithy/protocol-http": "^3.0.6", + "@smithy/signature-v4": "^2.0.10", + "@smithy/util-utf8": "^2.0.0", + "@supabase/postgrest-js": "^1.1.1", + "@supabase/supabase-js": "^2.10.0", + "@tensorflow-models/universal-sentence-encoder": "*", + "@tensorflow/tfjs-converter": "*", + "@tensorflow/tfjs-core": "*", + "@upstash/redis": "^1.20.6", + "@vercel/kv": "^0.2.3", + "@vercel/postgres": "^0.5.0", + "@writerai/writer-sdk": "^0.40.2", + "@xata.io/client": "^0.28.0", + "@xenova/transformers": "^2.5.4", + "@zilliz/milvus2-sdk-node": ">=2.2.7", + "apify-client": "^2.7.1", + "assemblyai": "^2.0.2", + "axios": "*", + "cassandra-driver": "^4.7.2", + "cheerio": "^1.0.0-rc.12", + "chromadb": "*", + "closevector-common": "0.1.0-alpha.1", + "closevector-node": "0.1.0-alpha.10", + "closevector-web": "0.1.0-alpha.16", + "cohere-ai": ">=6.0.0", + "convex": "^1.3.1", + "d3-dsv": "^2.0.0", + "epub2": "^3.0.1", + "faiss-node": "^0.5.1", + "fast-xml-parser": "^4.2.7", + "firebase-admin": "^11.9.0", + "google-auth-library": "^8.9.0", + "googleapis": "^126.0.1", + "hnswlib-node": "^1.4.2", + "html-to-text": "^9.0.5", + "ignore": "^5.2.0", + "ioredis": "^5.3.2", + "jsdom": "*", + "llmonitor": "^0.5.9", + "lodash": "^4.17.21", + "mammoth": "*", + "mongodb": "^5.2.0", + "mysql2": "^3.3.3", + "neo4j-driver": "*", + "node-llama-cpp": "*", + "notion-to-md": "^3.1.0", + "officeparser": "^4.0.4", + "pdf-parse": "1.1.1", + "peggy": "^3.0.2", + "pg": "^8.11.0", + "pg-copy-streams": "^6.0.5", + "pickleparser": "^0.2.1", + "playwright": "^1.32.1", + "portkey-ai": "^0.1.11", + "puppeteer": "^19.7.2", + "pyodide": "^0.24.1", + "redis": "^4.6.4", + "replicate": "^0.18.0", + "sonix-speech-recognition": "^2.1.1", + "srt-parser-2": "^1.2.2", + "typeorm": "^0.3.12", + "typesense": "^1.5.3", + "usearch": "^1.1.1", + "vectordb": "^0.1.4", + "voy-search": "0.6.2", + "weaviate-ts-client": "^1.4.0", + "web-auth-library": "^1.0.3", + "ws": "^8.14.2", + "youtube-transcript": "^1.0.6", + "youtubei.js": "^5.8.0" + }, + "peerDependenciesMeta": { + "@aws-crypto/sha256-js": { + "optional": true + }, + "@aws-sdk/client-bedrock-runtime": { + "optional": true + }, + "@aws-sdk/client-dynamodb": { + "optional": true + }, + "@aws-sdk/client-kendra": { + "optional": true + }, + "@aws-sdk/client-lambda": { + "optional": true + }, + "@aws-sdk/client-s3": { + "optional": true + }, + "@aws-sdk/client-sagemaker-runtime": { + "optional": true + }, + "@aws-sdk/client-sfn": { + "optional": true + }, + "@aws-sdk/credential-provider-node": { + "optional": true + }, + "@azure/storage-blob": { + "optional": true + }, + "@clickhouse/client": { + "optional": true + }, + "@cloudflare/ai": { + "optional": true + }, + "@elastic/elasticsearch": { + "optional": true + }, + "@getmetal/metal-sdk": { + "optional": true + }, + "@getzep/zep-js": { + "optional": true + }, + "@gomomento/sdk": { + "optional": true + }, + "@gomomento/sdk-core": { + "optional": true + }, + "@gomomento/sdk-web": { + "optional": true + }, + "@google-ai/generativelanguage": { + "optional": true + }, + "@google-cloud/storage": { + "optional": true + }, + "@gradientai/nodejs-sdk": { + "optional": true + }, + "@huggingface/inference": { + "optional": true + }, + "@mozilla/readability": { + "optional": true + }, + "@notionhq/client": { + "optional": true + }, + "@opensearch-project/opensearch": { + "optional": true + }, + "@pinecone-database/pinecone": { + "optional": true + }, + "@planetscale/database": { + "optional": true + }, + "@qdrant/js-client-rest": { + "optional": true + }, + "@raycast/api": { + "optional": true + }, + "@rockset/client": { + "optional": true + }, + "@smithy/eventstream-codec": { + "optional": true + }, + "@smithy/protocol-http": { + "optional": true + }, + "@smithy/signature-v4": { + "optional": true + }, + "@smithy/util-utf8": { + "optional": true + }, + "@supabase/postgrest-js": { + "optional": true + }, + "@supabase/supabase-js": { + "optional": true + }, + "@tensorflow-models/universal-sentence-encoder": { + "optional": true + }, + "@tensorflow/tfjs-converter": { + "optional": true + }, + "@tensorflow/tfjs-core": { + "optional": true + }, + "@upstash/redis": { + "optional": true + }, + "@vercel/kv": { + "optional": true + }, + "@vercel/postgres": { + "optional": true + }, + "@writerai/writer-sdk": { + "optional": true + }, + "@xata.io/client": { + "optional": true + }, + "@xenova/transformers": { + "optional": true + }, + "@zilliz/milvus2-sdk-node": { + "optional": true + }, + "apify-client": { + "optional": true + }, + "assemblyai": { + "optional": true + }, + "axios": { + "optional": true + }, + "cassandra-driver": { + "optional": true + }, + "cheerio": { + "optional": true + }, + "chromadb": { + "optional": true + }, + "closevector-common": { + "optional": true + }, + "closevector-node": { + "optional": true + }, + "closevector-web": { + "optional": true + }, + "cohere-ai": { + "optional": true + }, + "convex": { + "optional": true + }, + "d3-dsv": { + "optional": true + }, + "epub2": { + "optional": true + }, + "faiss-node": { + "optional": true + }, + "fast-xml-parser": { + "optional": true + }, + "firebase-admin": { + "optional": true + }, + "google-auth-library": { + "optional": true + }, + "googleapis": { + "optional": true + }, + "hnswlib-node": { + "optional": true + }, + "html-to-text": { + "optional": true + }, + "ignore": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "jsdom": { + "optional": true + }, + "llmonitor": { + "optional": true + }, + "lodash": { + "optional": true + }, + "mammoth": { + "optional": true + }, + "mongodb": { + "optional": true + }, + "mysql2": { + "optional": true + }, + "neo4j-driver": { + "optional": true + }, + "node-llama-cpp": { + "optional": true + }, + "notion-to-md": { + "optional": true + }, + "officeparser": { + "optional": true + }, + "pdf-parse": { + "optional": true + }, + "peggy": { + "optional": true + }, + "pg": { + "optional": true + }, + "pg-copy-streams": { + "optional": true + }, + "pickleparser": { + "optional": true + }, + "playwright": { + "optional": true + }, + "portkey-ai": { + "optional": true + }, + "puppeteer": { + "optional": true + }, + "pyodide": { + "optional": true + }, + "redis": { + "optional": true + }, + "replicate": { + "optional": true + }, + "sonix-speech-recognition": { + "optional": true + }, + "srt-parser-2": { + "optional": true + }, + "typeorm": { + "optional": true + }, + "typesense": { + "optional": true + }, + "usearch": { + "optional": true + }, + "vectordb": { + "optional": true + }, + "voy-search": { + "optional": true + }, + "weaviate-ts-client": { + "optional": true + }, + "web-auth-library": { + "optional": true + }, + "ws": { + "optional": true + }, + "youtube-transcript": { + "optional": true + }, + "youtubei.js": { + "optional": true + } + }, + "publishConfig": { + "access": "public" + }, + "exports": { + ".": { + "types": "./index.d.ts", + "import": "./index.js", + "require": "./index.cjs" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist/", + "index.cjs", + "index.js", + "index.d.ts" + ] +} diff --git a/libs/langchain-community/scripts/check-tree-shaking.js b/libs/langchain-community/scripts/check-tree-shaking.js new file mode 100644 index 000000000000..8073e3d5507b --- /dev/null +++ b/libs/langchain-community/scripts/check-tree-shaking.js @@ -0,0 +1,80 @@ +import fs from "fs/promises"; +import { rollup } from "rollup"; + +const packageJson = JSON.parse(await fs.readFile("package.json", "utf-8")); + +export function listEntrypoints() { + const exports = packageJson.exports; + const entrypoints = []; + + for (const [key, value] of Object.entries(exports)) { + if (key === "./package.json") { + continue; + } + if (typeof value === "string") { + entrypoints.push(value); + } else if (typeof value === "object" && value.import) { + entrypoints.push(value.import); + } + } + + return entrypoints; +} + +export function listExternals() { + return [ + ...Object.keys(packageJson.dependencies), + ...Object.keys(packageJson.peerDependencies ?? {}), + /node\:/, + /@langchain\/core\//, + ]; +} + +export async function checkTreeShaking() { + const externals = listExternals(); + const entrypoints = listEntrypoints(); + const consoleLog = console.log; + const reportMap = new Map(); + + for (const entrypoint of entrypoints) { + let sideEffects = ""; + + console.log = function (...args) { + const line = args.length ? args.join(" ") : ""; + if (line.trim().startsWith("First side effect in")) { + sideEffects += line + "\n"; + } + }; + + await rollup({ + external: externals, + input: entrypoint, + experimentalLogSideEffects: true, + }); + + reportMap.set(entrypoint, { + log: sideEffects, + hasSideEffects: sideEffects.length > 0, + }); + } + + console.log = consoleLog; + + let failed = false; + for (const [entrypoint, report] of reportMap) { + if (report.hasSideEffects) { + failed = true; + console.log("---------------------------------"); + console.log(`Tree shaking failed for ${entrypoint}`); + console.log(report.log); + } + } + + if (failed) { + process.exit(1); + } else { + console.log("Tree shaking checks passed!"); + } +} + +checkTreeShaking(); diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js new file mode 100644 index 000000000000..01a4daeb25ce --- /dev/null +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -0,0 +1,100 @@ +import * as fs from "fs"; +import * as path from "path"; + +// .gitignore +const DEFAULT_GITIGNORE_PATHS = ["node_modules", "dist", ".yarn"]; + +// This lists all the entrypoints for the library. Each key corresponds to an +// importable path, eg. `import { AgentExecutor } from "langchain/agents"`. +// The value is the path to the file in `src/` that exports the entrypoint. +// This is used to generate the `exports` field in package.json. +// Order is not important. +const entrypoints = { + index: "index", +}; + +// Entrypoints in this list require an optional dependency to be installed. +// Therefore they are not tested in the generated test-exports-* packages. +const requiresOptionalDependency = []; + +const updateJsonFile = (relativePath, updateFunction) => { + const contents = fs.readFileSync(relativePath).toString(); + const res = updateFunction(JSON.parse(contents)); + fs.writeFileSync(relativePath, JSON.stringify(res, null, 2) + "\n"); +}; + +const generateFiles = () => { + const files = [...Object.entries(entrypoints), ["index", "index"]].flatMap( + ([key, value]) => { + const nrOfDots = key.split("/").length - 1; + const relativePath = "../".repeat(nrOfDots) || "./"; + const compiledPath = `${relativePath}dist/${value}.js`; + return [ + [ + `${key}.cjs`, + `module.exports = require('${relativePath}dist/${value}.cjs');`, + ], + [`${key}.js`, `export * from '${compiledPath}'`], + [`${key}.d.ts`, `export * from '${compiledPath}'`], + ]; + } + ); + + return Object.fromEntries(files); +}; + +const updateConfig = () => { + const generatedFiles = generateFiles(); + const filenames = Object.keys(generatedFiles); + + // Update package.json `exports` and `files` fields + updateJsonFile("./package.json", (json) => ({ + ...json, + exports: Object.assign( + Object.fromEntries( + [...Object.keys(entrypoints)].map((key) => { + let entryPoint = { + types: `./${key}.d.ts`, + import: `./${key}.js`, + require: `./${key}.cjs`, + }; + + return [key === "index" ? "." : `./${key}`, entryPoint]; + }) + ), + { "./package.json": "./package.json" } + ), + files: ["dist/", ...filenames], + })); + + // Write generated files + Object.entries(generatedFiles).forEach(([filename, content]) => { + fs.mkdirSync(path.dirname(filename), { recursive: true }); + fs.writeFileSync(filename, content); + }); + + // Update .gitignore + fs.writeFileSync( + "./.gitignore", + filenames.join("\n") + "\n" + DEFAULT_GITIGNORE_PATHS.join("\n") + "\n" + ); +}; + +const cleanGenerated = () => { + const filenames = Object.keys(generateFiles()); + filenames.forEach((fname) => { + try { + fs.unlinkSync(fname); + } catch { + // ignore error + } + }); +}; + +const command = process.argv[2]; + +if (command === "pre") { + cleanGenerated(); +} else { + updateConfig(); +} diff --git a/libs/langchain-community/scripts/identify-secrets.js b/libs/langchain-community/scripts/identify-secrets.js new file mode 100644 index 000000000000..c54bdd97c870 --- /dev/null +++ b/libs/langchain-community/scripts/identify-secrets.js @@ -0,0 +1,77 @@ +import ts from "typescript"; +import * as fs from "fs"; + +export function identifySecrets() { + const secrets = new Set(); + + const tsConfig = ts.parseJsonConfigFileContent( + ts.readJsonConfigFile("./tsconfig.json", (p) => + fs.readFileSync(p, "utf-8") + ), + ts.sys, + "./src/" + ); + + for (const fileName of tsConfig.fileNames.filter( + (fn) => !fn.endsWith("test.ts") + )) { + const sourceFile = ts.createSourceFile( + fileName, + fs.readFileSync(fileName, "utf-8"), + tsConfig.options.target, + true + ); + sourceFile.forEachChild((node) => { + switch (node.kind) { + case ts.SyntaxKind.ClassDeclaration: + case ts.SyntaxKind.ClassExpression: { + node.forEachChild((node) => { + // look for get lc_secrets() + switch (node.kind) { + case ts.SyntaxKind.GetAccessor: { + const property = node; + if (property.name.getText() === "lc_secrets") { + // look for return { ... } + property.body.statements.forEach((stmt) => { + if ( + stmt.kind === ts.SyntaxKind.ReturnStatement && + stmt.expression.kind === + ts.SyntaxKind.ObjectLiteralExpression + ) { + // collect secret identifier + stmt.expression.properties.forEach((element) => { + if ( + element.initializer.kind === + ts.SyntaxKind.StringLiteral + ) { + const secret = element.initializer.text; + + if (secret.toUpperCase() !== secret) { + throw new Error( + `Secret identifier must be uppercase: ${secret} at ${fileName}` + ); + } + if (/\s/.test(secret)) { + throw new Error( + `Secret identifier must not contain whitespace: ${secret} at ${fileName}` + ); + } + + secrets.add(secret); + } + }); + } + }); + } + break; + } + } + }); + break; + } + } + }); + } + + return secrets; +} diff --git a/libs/langchain-community/scripts/move-cjs-to-dist.js b/libs/langchain-community/scripts/move-cjs-to-dist.js new file mode 100644 index 000000000000..1e89ccca88e9 --- /dev/null +++ b/libs/langchain-community/scripts/move-cjs-to-dist.js @@ -0,0 +1,38 @@ +import { resolve, dirname, parse, format } from "node:path"; +import { readdir, readFile, writeFile } from "node:fs/promises"; +import { fileURLToPath } from "node:url"; + +function abs(relativePath) { + return resolve(dirname(fileURLToPath(import.meta.url)), relativePath); +} + +async function moveAndRename(source, dest) { + for (const file of await readdir(abs(source), { withFileTypes: true })) { + if (file.isDirectory()) { + await moveAndRename(`${source}/${file.name}`, `${dest}/${file.name}`); + } else if (file.isFile()) { + const parsed = parse(file.name); + + // Ignore anything that's not a .js file + if (parsed.ext !== ".js") { + continue; + } + + // Rewrite any require statements to use .cjs + const content = await readFile(abs(`${source}/${file.name}`), "utf8"); + const rewritten = content.replace(/require\("(\..+?).js"\)/g, (_, p1) => { + return `require("${p1}.cjs")`; + }); + + // Rename the file to .cjs + const renamed = format({ name: parsed.name, ext: ".cjs" }); + + await writeFile(abs(`${dest}/${renamed}`), rewritten, "utf8"); + } + } +} + +moveAndRename("../dist-cjs", "../dist").catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/libs/langchain-community/scripts/release-branch.sh b/libs/langchain-community/scripts/release-branch.sh new file mode 100644 index 000000000000..7504238c5561 --- /dev/null +++ b/libs/langchain-community/scripts/release-branch.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +if [[ $(git branch --show-current) == "main" ]]; then + git checkout -B release + git push -u origin release +fi diff --git a/libs/langchain-community/src/chat_models.ts b/libs/langchain-community/src/chat_models.ts new file mode 100644 index 000000000000..81683647a0bd --- /dev/null +++ b/libs/langchain-community/src/chat_models.ts @@ -0,0 +1,88 @@ +import { type BaseMessage } from "@langchain/core/messages"; +import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; + +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + type BaseChatModelParams, + SimpleChatModel, +} from "@langchain/core/language_models/chat_models"; + +// Uncomment if implementing streaming + +// import { +// ChatGenerationChunk, +// } from "@langchain/core/outputs"; +// import { +// AIMessageChunk, +// } from "@langchain/core/messages"; + +/** + * Input to chat model class. + */ +export interface ChatIntegrationInput extends BaseChatModelParams {} + +/** + * Integration with a chat model. + */ +export class ChatIntegration< + CallOptions extends BaseLanguageModelCallOptions = BaseLanguageModelCallOptions + > + extends SimpleChatModel + implements ChatIntegrationInput +{ + // Used for tracing, replace with the same name as your class + static lc_name() { + return "ChatIntegration"; + } + + lc_serializable = true; + + constructor(fields?: ChatIntegrationInput) { + super(fields ?? {}); + } + + // Replace + _llmType() { + return "chat_integration"; + } + + /** + * For some given input messages and options, return a string output. + */ + _call( + _messages: BaseMessage[], + _options: this["ParsedCallOptions"], + _runManager?: CallbackManagerForLLMRun + ): Promise { + throw new Error("Not implemented."); + } + + /** + * Implement to support streaming. + * Should yield chunks iteratively. + */ + // async *_streamResponseChunks( + // messages: BaseMessage[], + // options: this["ParsedCallOptions"], + // runManager?: CallbackManagerForLLMRun + // ): AsyncGenerator { + // // All models have a built in `this.caller` property for retries + // const stream = await this.caller.call(async () => + // createStreamMethod() + // ); + // for await (const chunk of stream) { + // if (!chunk.done) { + // yield new ChatGenerationChunk({ + // text: chunk.response, + // message: new AIMessageChunk({ content: chunk.response }), + // }); + // await runManager?.handleLLMNewToken(chunk.response ?? ""); + // } + // } + // } + + /** @ignore */ + _combineLLMOutput() { + return []; + } +} diff --git a/libs/langchain-community/src/chat_models/anthropic.ts b/libs/langchain-community/src/chat_models/anthropic.ts new file mode 100644 index 000000000000..a06de018ee42 --- /dev/null +++ b/libs/langchain-community/src/chat_models/anthropic.ts @@ -0,0 +1,451 @@ +import { + Anthropic, + AI_PROMPT, + HUMAN_PROMPT, + ClientOptions, +} from "@anthropic-ai/sdk"; +import type { CompletionCreateParams } from "@anthropic-ai/sdk/resources/completions"; +import type { Stream } from "@anthropic-ai/sdk/streaming"; + +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + AIMessageChunk, + type BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { + type ChatGeneration, + ChatGenerationChunk, + type ChatResult, +} from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { + BaseChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; +import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; + +export { AI_PROMPT, HUMAN_PROMPT }; + +/** + * Extracts the custom role of a generic chat message. + * @param message The chat message from which to extract the custom role. + * @returns The custom role of the chat message. + */ +function extractGenericMessageCustomRole(message: ChatMessage) { + if ( + message.role !== AI_PROMPT && + message.role !== HUMAN_PROMPT && + message.role !== "" + ) { + console.warn(`Unknown message role: ${message.role}`); + } + + return message.role; +} + +/** + * Gets the Anthropic prompt from a base message. + * @param message The base message from which to get the Anthropic prompt. + * @returns The Anthropic prompt from the base message. + */ +function getAnthropicPromptFromMessage(message: BaseMessage): string { + const type = message._getType(); + switch (type) { + case "ai": + return AI_PROMPT; + case "human": + return HUMAN_PROMPT; + case "system": + return ""; + case "generic": { + if (!ChatMessage.isInstance(message)) + throw new Error("Invalid generic chat message"); + return extractGenericMessageCustomRole(message); + } + default: + throw new Error(`Unknown message type: ${type}`); + } +} + +export const DEFAULT_STOP_SEQUENCES = [HUMAN_PROMPT]; + +/** + * Input to AnthropicChat class. + */ +export interface AnthropicInput { + /** Amount of randomness injected into the response. Ranges + * from 0 to 1. Use temp closer to 0 for analytical / + * multiple choice, and temp closer to 1 for creative + * and generative tasks. + */ + temperature?: number; + + /** Only sample from the top K options for each subsequent + * token. Used to remove "long tail" low probability + * responses. Defaults to -1, which disables it. + */ + topK?: number; + + /** Does nucleus sampling, in which we compute the + * cumulative distribution over all the options for each + * subsequent token in decreasing probability order and + * cut it off once it reaches a particular probability + * specified by top_p. Defaults to -1, which disables it. + * Note that you should either alter temperature or top_p, + * but not both. + */ + topP?: number; + + /** A maximum number of tokens to generate before stopping. */ + maxTokensToSample: number; + + /** A list of strings upon which to stop generating. + * You probably want `["\n\nHuman:"]`, as that's the cue for + * the next turn in the dialog agent. + */ + stopSequences?: string[]; + + /** Whether to stream the results or not */ + streaming?: boolean; + + /** Anthropic API key */ + anthropicApiKey?: string; + + /** Anthropic API URL */ + anthropicApiUrl?: string; + + /** Model name to use */ + modelName: string; + + /** Overridable Anthropic ClientOptions */ + clientOptions: ClientOptions; + + /** Holds any additional parameters that are valid to pass to {@link + * https://console.anthropic.com/docs/api/reference | + * `anthropic.complete`} that are not explicitly specified on this class. + */ + invocationKwargs?: Kwargs; +} + +/** + * A type representing additional parameters that can be passed to the + * Anthropic API. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type Kwargs = Record; + +/** + * Wrapper around Anthropic large language models. + * + * To use you should have the `@anthropic-ai/sdk` package installed, with the + * `ANTHROPIC_API_KEY` environment variable set. + * + * @remarks + * Any parameters that are valid to be passed to {@link + * https://console.anthropic.com/docs/api/reference | + * `anthropic.complete`} can be passed through {@link invocationKwargs}, + * even if not explicitly available on this class. + * @example + * ```typescript + * const model = new ChatAnthropic({ + * temperature: 0.9, + * anthropicApiKey: 'YOUR-API-KEY', + * }); + * const res = await model.invoke({ input: 'Hello!' }); + * console.log(res); + * ``` + */ +export class ChatAnthropic< + CallOptions extends BaseLanguageModelCallOptions = BaseLanguageModelCallOptions + > + extends BaseChatModel + implements AnthropicInput +{ + static lc_name() { + return "ChatAnthropic"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + anthropicApiKey: "ANTHROPIC_API_KEY", + }; + } + + get lc_aliases(): Record { + return { + modelName: "model", + }; + } + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + lc_serializable = true; + + anthropicApiKey?: string; + + apiUrl?: string; + + temperature = 1; + + topK = -1; + + topP = -1; + + maxTokensToSample = 2048; + + modelName = "claude-2"; + + invocationKwargs?: Kwargs; + + stopSequences?: string[]; + + streaming = false; + + clientOptions: ClientOptions; + + // Used for non-streaming requests + protected batchClient: Anthropic; + + // Used for streaming requests + protected streamingClient: Anthropic; + + constructor(fields?: Partial & BaseChatModelParams) { + super(fields ?? {}); + + this.anthropicApiKey = + fields?.anthropicApiKey ?? getEnvironmentVariable("ANTHROPIC_API_KEY"); + if (!this.anthropicApiKey) { + throw new Error("Anthropic API key not found"); + } + + // Support overriding the default API URL (i.e., https://api.anthropic.com) + this.apiUrl = fields?.anthropicApiUrl; + + this.modelName = fields?.modelName ?? this.modelName; + this.invocationKwargs = fields?.invocationKwargs ?? {}; + + this.temperature = fields?.temperature ?? this.temperature; + this.topK = fields?.topK ?? this.topK; + this.topP = fields?.topP ?? this.topP; + this.maxTokensToSample = + fields?.maxTokensToSample ?? this.maxTokensToSample; + this.stopSequences = fields?.stopSequences ?? this.stopSequences; + + this.streaming = fields?.streaming ?? false; + this.clientOptions = fields?.clientOptions ?? {}; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams( + options?: this["ParsedCallOptions"] + ): Omit & Kwargs { + return { + model: this.modelName, + temperature: this.temperature, + top_k: this.topK, + top_p: this.topP, + stop_sequences: + options?.stop?.concat(DEFAULT_STOP_SEQUENCES) ?? + this.stopSequences ?? + DEFAULT_STOP_SEQUENCES, + max_tokens_to_sample: this.maxTokensToSample, + stream: this.streaming, + ...this.invocationKwargs, + }; + } + + /** @ignore */ + _identifyingParams() { + return { + model_name: this.modelName, + ...this.invocationParams(), + }; + } + + /** + * Get the identifying parameters for the model + */ + identifyingParams() { + return { + model_name: this.modelName, + ...this.invocationParams(), + }; + } + + async *_streamResponseChunks( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const params = this.invocationParams(options); + const stream = await this.createStreamWithRetry({ + ...params, + prompt: this.formatMessagesAsPrompt(messages), + }); + let modelSent = false; + let stopReasonSent = false; + for await (const data of stream) { + if (options.signal?.aborted) { + stream.controller.abort(); + throw new Error("AbortError: User aborted the request."); + } + const additional_kwargs: Record = {}; + if (data.model && !modelSent) { + additional_kwargs.model = data.model; + modelSent = true; + } else if (data.stop_reason && !stopReasonSent) { + additional_kwargs.stop_reason = data.stop_reason; + stopReasonSent = true; + } + const delta = data.completion ?? ""; + yield new ChatGenerationChunk({ + message: new AIMessageChunk({ + content: delta, + additional_kwargs, + }), + text: delta, + }); + await runManager?.handleLLMNewToken(delta); + if (data.stop_reason) { + break; + } + } + } + + /** + * Formats messages as a prompt for the model. + * @param messages The base messages to format as a prompt. + * @returns The formatted prompt. + */ + protected formatMessagesAsPrompt(messages: BaseMessage[]): string { + return ( + messages + .map((message) => { + const messagePrompt = getAnthropicPromptFromMessage(message); + return `${messagePrompt} ${message.content}`; + }) + .join("") + AI_PROMPT + ); + } + + /** @ignore */ + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + if (this.stopSequences && options.stop) { + throw new Error( + `"stopSequence" parameter found in input and default params` + ); + } + + const params = this.invocationParams(options); + let response; + if (params.stream) { + response = { + completion: "", + model: "", + stop_reason: "", + }; + const stream = await this._streamResponseChunks( + messages, + options, + runManager + ); + for await (const chunk of stream) { + response.completion += chunk.message.content; + response.model = + (chunk.message.additional_kwargs.model as string) ?? response.model; + response.stop_reason = + (chunk.message.additional_kwargs.stop_reason as string) ?? + response.stop_reason; + } + } else { + response = await this.completionWithRetry( + { + ...params, + prompt: this.formatMessagesAsPrompt(messages), + }, + { signal: options.signal } + ); + } + + const generations: ChatGeneration[] = (response.completion ?? "") + .split(AI_PROMPT) + .map((message) => ({ + text: message, + message: new AIMessage(message), + })); + + return { + generations, + }; + } + + /** + * Creates a streaming request with retry. + * @param request The parameters for creating a completion. + * @returns A streaming request. + */ + protected async createStreamWithRetry( + request: CompletionCreateParams & Kwargs + ): Promise> { + if (!this.streamingClient) { + const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined; + this.streamingClient = new Anthropic({ + ...this.clientOptions, + ...options, + apiKey: this.anthropicApiKey, + maxRetries: 0, + }); + } + const makeCompletionRequest = async () => + this.streamingClient.completions.create( + { ...request, stream: true }, + { headers: request.headers } + ); + return this.caller.call(makeCompletionRequest); + } + + /** @ignore */ + protected async completionWithRetry( + request: CompletionCreateParams & Kwargs, + options: { signal?: AbortSignal } + ): Promise { + if (!this.anthropicApiKey) { + throw new Error("Missing Anthropic API key."); + } + if (!this.batchClient) { + const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined; + this.batchClient = new Anthropic({ + ...this.clientOptions, + ...options, + apiKey: this.anthropicApiKey, + maxRetries: 0, + }); + } + const makeCompletionRequest = async () => + this.batchClient.completions.create( + { ...request, stream: false }, + { headers: request.headers } + ); + return this.caller.callWithOptions( + { signal: options.signal }, + makeCompletionRequest + ); + } + + _llmType() { + return "anthropic"; + } + + /** @ignore */ + _combineLLMOutput() { + return []; + } +} diff --git a/libs/langchain-community/src/chat_models/baiduwenxin.ts b/libs/langchain-community/src/chat_models/baiduwenxin.ts new file mode 100644 index 000000000000..46d0a6d952ca --- /dev/null +++ b/libs/langchain-community/src/chat_models/baiduwenxin.ts @@ -0,0 +1,546 @@ +import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + AIMessage, + BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { + ChatGeneration, + ChatResult +} from "@langchain/core/outputs"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Type representing the role of a message in the Wenxin chat model. + */ +export type WenxinMessageRole = "assistant" | "user"; + +/** + * Interface representing a message in the Wenxin chat model. + */ +interface WenxinMessage { + role: WenxinMessageRole; + content: string; +} + +/** + * Interface representing the usage of tokens in a chat completion. + */ +interface TokenUsage { + completionTokens?: number; + promptTokens?: number; + totalTokens?: number; +} + +/** + * Interface representing a request for a chat completion. + */ +interface ChatCompletionRequest { + messages: WenxinMessage[]; + stream?: boolean; + user_id?: string; + temperature?: number; + top_p?: number; + penalty_score?: number; + system?: string; +} + +/** + * Interface representing a response from a chat completion. + */ +interface ChatCompletionResponse { + id: string; + object: string; + created: number; + result: string; + need_clear_history: boolean; + usage: TokenUsage; +} + +/** + * Interface defining the input to the ChatBaiduWenxin class. + */ +declare interface BaiduWenxinChatInput { + /** Model name to use. Available options are: ERNIE-Bot, ERNIE-Bot-turbo, ERNIE-Bot-4 + * @default "ERNIE-Bot-turbo" + */ + modelName: string; + + /** Whether to stream the results or not. Defaults to false. */ + streaming?: boolean; + + /** Messages to pass as a prefix to the prompt */ + prefixMessages?: WenxinMessage[]; + + /** + * ID of the end-user who made requests. + */ + userId?: string; + + /** + * API key to use when making requests. Defaults to the value of + * `BAIDU_API_KEY` environment variable. + */ + baiduApiKey?: string; + + /** + * Secret key to use when making requests. Defaults to the value of + * `BAIDU_SECRET_KEY` environment variable. + */ + baiduSecretKey?: string; + + /** Amount of randomness injected into the response. Ranges + * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / + * multiple choice, and temp closer to 1 for creative + * and generative tasks. Defaults to 0.95. + */ + temperature?: number; + + /** Total probability mass of tokens to consider at each step. Range + * from 0 to 1.0. Defaults to 0.8. + */ + topP?: number; + + /** Penalizes repeated tokens according to frequency. Range + * from 1.0 to 2.0. Defaults to 1.0. + */ + penaltyScore?: number; +} + +/** + * Function that extracts the custom role of a generic chat message. + * @param message Chat message from which to extract the custom role. + * @returns The custom role of the chat message. + */ +function extractGenericMessageCustomRole(message: ChatMessage) { + if (message.role !== "assistant" && message.role !== "user") { + console.warn(`Unknown message role: ${message.role}`); + } + + return message.role as WenxinMessageRole; +} + +/** + * Function that converts a base message to a Wenxin message role. + * @param message Base message to convert. + * @returns The Wenxin message role. + */ +function messageToWenxinRole(message: BaseMessage): WenxinMessageRole { + const type = message._getType(); + switch (type) { + case "ai": + return "assistant"; + case "human": + return "user"; + case "system": + throw new Error("System messages should not be here"); + case "function": + throw new Error("Function messages not supported"); + case "generic": { + if (!ChatMessage.isInstance(message)) + throw new Error("Invalid generic chat message"); + return extractGenericMessageCustomRole(message); + } + default: + throw new Error(`Unknown message type: ${type}`); + } +} + +/** + * Wrapper around Baidu ERNIE large language models that use the Chat endpoint. + * + * To use you should have the `BAIDU_API_KEY` and `BAIDU_SECRET_KEY` + * environment variable set. + * + * @augments BaseLLM + * @augments BaiduERNIEInput + * @example + * ```typescript + * const ernieTurbo = new ChatBaiduWenxin({ + * baiduApiKey: "YOUR-API-KEY", + * baiduSecretKey: "YOUR-SECRET-KEY", + * }); + * + * const ernie = new ChatBaiduWenxin({ + * modelName: "ERNIE-Bot", + * temperature: 1, + * baiduApiKey: "YOUR-API-KEY", + * baiduSecretKey: "YOUR-SECRET-KEY", + * }); + * + * const messages = [new HumanMessage("Hello")]; + * + * let res = await ernieTurbo.call(messages); + * + * res = await ernie.call(messages); + * ``` + */ +export class ChatBaiduWenxin + extends BaseChatModel + implements BaiduWenxinChatInput +{ + static lc_name() { + return "ChatBaiduWenxin"; + } + + get callKeys(): string[] { + return ["stop", "signal", "options"]; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + baiduApiKey: "BAIDU_API_KEY", + baiduSecretKey: "BAIDU_SECRET_KEY", + }; + } + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + get lc_aliases(): { [key: string]: string } | undefined { + return undefined; + } + + lc_serializable = true; + + baiduApiKey?: string; + + baiduSecretKey?: string; + + accessToken: string; + + streaming = false; + + prefixMessages?: WenxinMessage[]; + + userId?: string; + + modelName = "ERNIE-Bot-turbo"; + + apiUrl: string; + + temperature?: number | undefined; + + topP?: number | undefined; + + penaltyScore?: number | undefined; + + constructor(fields?: Partial & BaseChatModelParams) { + super(fields ?? {}); + + this.baiduApiKey = + fields?.baiduApiKey ?? getEnvironmentVariable("BAIDU_API_KEY"); + if (!this.baiduApiKey) { + throw new Error("Baidu API key not found"); + } + + this.baiduSecretKey = + fields?.baiduSecretKey ?? getEnvironmentVariable("BAIDU_SECRET_KEY"); + if (!this.baiduSecretKey) { + throw new Error("Baidu Secret key not found"); + } + + this.streaming = fields?.streaming ?? this.streaming; + this.prefixMessages = fields?.prefixMessages ?? this.prefixMessages; + this.userId = fields?.userId ?? this.userId; + this.temperature = fields?.temperature ?? this.temperature; + this.topP = fields?.topP ?? this.topP; + this.penaltyScore = fields?.penaltyScore ?? this.penaltyScore; + + this.modelName = fields?.modelName ?? this.modelName; + + if (this.modelName === "ERNIE-Bot") { + this.apiUrl = + "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions"; + } else if (this.modelName === "ERNIE-Bot-turbo") { + this.apiUrl = + "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant"; + } else if (this.modelName === "ERNIE-Bot-4") { + this.apiUrl = + "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro"; + } else { + throw new Error(`Invalid model name: ${this.modelName}`); + } + } + + /** + * Method that retrieves the access token for making requests to the Baidu + * API. + * @param options Optional parsed call options. + * @returns The access token for making requests to the Baidu API. + */ + async getAccessToken(options?: this["ParsedCallOptions"]) { + const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.baiduApiKey}&client_secret=${this.baiduSecretKey}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + signal: options?.signal, + }); + if (!response.ok) { + const text = await response.text(); + const error = new Error( + `Baidu get access token failed with status code ${response.status}, response: ${text}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + const json = await response.json(); + return json.access_token; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams(): Omit { + return { + stream: this.streaming, + user_id: this.userId, + temperature: this.temperature, + top_p: this.topP, + penalty_score: this.penaltyScore, + }; + } + + /** + * Get the identifying parameters for the model + */ + identifyingParams() { + return { + model_name: this.modelName, + ...this.invocationParams(), + }; + } + + /** @ignore */ + async _generate( + messages: BaseMessage[], + options?: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const tokenUsage: TokenUsage = {}; + + const params = this.invocationParams(); + + // Wenxin requires the system message to be put in the params, not messages array + const systemMessage = messages.find( + (message) => message._getType() === "system" + ); + if (systemMessage) { + // eslint-disable-next-line no-param-reassign + messages = messages.filter((message) => message !== systemMessage); + params.system = systemMessage.text; + } + const messagesMapped: WenxinMessage[] = messages.map((message) => ({ + role: messageToWenxinRole(message), + content: message.text, + })); + + const data = params.stream + ? await new Promise((resolve, reject) => { + let response: ChatCompletionResponse; + let rejected = false; + let resolved = false; + this.completionWithRetry( + { + ...params, + messages: messagesMapped, + }, + true, + options?.signal, + (event) => { + const data = JSON.parse(event.data); + + if (data?.error_code) { + if (rejected) { + return; + } + rejected = true; + reject(new Error(data?.error_msg)); + return; + } + + const message = data as { + id: string; + object: string; + created: number; + sentence_id?: number; + is_end: boolean; + result: string; + need_clear_history: boolean; + usage: TokenUsage; + }; + + // on the first message set the response properties + if (!response) { + response = { + id: message.id, + object: message.object, + created: message.created, + result: message.result, + need_clear_history: message.need_clear_history, + usage: message.usage, + }; + } else { + response.result += message.result; + response.created = message.created; + response.need_clear_history = message.need_clear_history; + response.usage = message.usage; + } + + // TODO this should pass part.index to the callback + // when that's supported there + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(message.result ?? ""); + + if (message.is_end) { + if (resolved || rejected) { + return; + } + resolved = true; + resolve(response); + } + } + ).catch((error) => { + if (!rejected) { + rejected = true; + reject(error); + } + }); + }) + : await this.completionWithRetry( + { + ...params, + messages: messagesMapped, + }, + false, + options?.signal + ).then((data) => { + if (data?.error_code) { + throw new Error(data?.error_msg); + } + return data; + }); + + const { + completion_tokens: completionTokens, + prompt_tokens: promptTokens, + total_tokens: totalTokens, + } = data.usage ?? {}; + + if (completionTokens) { + tokenUsage.completionTokens = + (tokenUsage.completionTokens ?? 0) + completionTokens; + } + + if (promptTokens) { + tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; + } + + if (totalTokens) { + tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; + } + + const generations: ChatGeneration[] = []; + const text = data.result ?? ""; + generations.push({ + text, + message: new AIMessage(text), + }); + return { + generations, + llmOutput: { tokenUsage }, + }; + } + + /** @ignore */ + async completionWithRetry( + request: ChatCompletionRequest, + stream: boolean, + signal?: AbortSignal, + onmessage?: (event: MessageEvent) => void + ) { + // The first run will get the accessToken + if (!this.accessToken) { + this.accessToken = await this.getAccessToken(); + } + + const makeCompletionRequest = async () => { + const url = `${this.apiUrl}?access_token=${this.accessToken}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(request), + signal, + }); + + if (!stream) { + return response.json(); + } else { + if (response.body) { + // response will not be a stream if an error occurred + if ( + !response.headers + .get("content-type") + ?.startsWith("text/event-stream") + ) { + onmessage?.( + new MessageEvent("message", { + data: await response.text(), + }) + ); + return; + } + + const reader = response.body.getReader(); + + const decoder = new TextDecoder("utf-8"); + let data = ""; + + let continueReading = true; + while (continueReading) { + const { done, value } = await reader.read(); + if (done) { + continueReading = false; + break; + } + data += decoder.decode(value); + + let continueProcessing = true; + while (continueProcessing) { + const newlineIndex = data.indexOf("\n"); + if (newlineIndex === -1) { + continueProcessing = false; + break; + } + const line = data.slice(0, newlineIndex); + data = data.slice(newlineIndex + 1); + + if (line.startsWith("data:")) { + const event = new MessageEvent("message", { + data: line.slice("data:".length).trim(), + }); + onmessage?.(event); + } + } + } + } + } + }; + return this.caller.call(makeCompletionRequest); + } + + _llmType() { + return "baiduwenxin"; + } + + /** @ignore */ + _combineLLMOutput() { + return []; + } +} diff --git a/libs/langchain-community/src/chat_models/bedrock/index.ts b/libs/langchain-community/src/chat_models/bedrock/index.ts new file mode 100644 index 000000000000..07b5ab549ace --- /dev/null +++ b/libs/langchain-community/src/chat_models/bedrock/index.ts @@ -0,0 +1,40 @@ +import { defaultProvider } from "@aws-sdk/credential-provider-node"; + +import type { BaseChatModelParams } from "@langchain/core/language_models/chat_models"; + +import { BaseBedrockInput } from "../../util/bedrock.js"; +import { BedrockChat as BaseBedrockChat } from "./web.js"; + +/** + * @example + * ```typescript + * const model = new BedrockChat({ + * model: "anthropic.claude-v2", + * region: "us-east-1", + * }); + * const res = await model.invoke([{ content: "Tell me a joke" }]); + * console.log(res); + * ``` + */ +export class BedrockChat extends BaseBedrockChat { + static lc_name() { + return "BedrockChat"; + } + + constructor(fields?: Partial & BaseChatModelParams) { + super({ + ...fields, + credentials: fields?.credentials ?? defaultProvider(), + }); + } +} + +export { + convertMessagesToPromptAnthropic, + convertMessagesToPrompt, +} from "./web.js"; + +/** + * @deprecated Use `BedrockChat` instead. + */ +export const ChatBedrock = BedrockChat; diff --git a/libs/langchain-community/src/chat_models/bedrock/web.ts b/libs/langchain-community/src/chat_models/bedrock/web.ts new file mode 100644 index 000000000000..f76518d00cd5 --- /dev/null +++ b/libs/langchain-community/src/chat_models/bedrock/web.ts @@ -0,0 +1,434 @@ +import { SignatureV4 } from "@smithy/signature-v4"; +import { HttpRequest } from "@smithy/protocol-http"; +import { EventStreamCodec } from "@smithy/eventstream-codec"; +import { fromUtf8, toUtf8 } from "@smithy/util-utf8"; +import { Sha256 } from "@aws-crypto/sha256-js"; + +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { + BaseBedrockInput, + BedrockLLMInputOutputAdapter, + type CredentialType, +} from "../../util/bedrock.js"; +import { + AIMessageChunk, + BaseMessage, + AIMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { ChatGenerationChunk } from "@langchain/core/outputs"; + +import type { SerializedFields } from "../../load/map_keys.js"; + +function convertOneMessageToText( + message: BaseMessage, + humanPrompt: string, + aiPrompt: string +): string { + if (message._getType() === "human") { + return `${humanPrompt} ${message.content}`; + } else if (message._getType() === "ai") { + return `${aiPrompt} ${message.content}`; + } else if (message._getType() === "system") { + return `${humanPrompt} ${message.content}`; + } else if (ChatMessage.isInstance(message)) { + return `\n\n${ + message.role[0].toUpperCase() + message.role.slice(1) + }: {message.content}`; + } + throw new Error(`Unknown role: ${message._getType()}`); +} + +export function convertMessagesToPromptAnthropic( + messages: BaseMessage[], + humanPrompt = "\n\nHuman:", + aiPrompt = "\n\nAssistant:" +): string { + const messagesCopy = [...messages]; + + if ( + messagesCopy.length === 0 || + messagesCopy[messagesCopy.length - 1]._getType() !== "ai" + ) { + messagesCopy.push(new AIMessage({ content: "" })); + } + + return messagesCopy + .map((message) => convertOneMessageToText(message, humanPrompt, aiPrompt)) + .join(""); +} + +/** + * Function that converts an array of messages into a single string prompt + * that can be used as input for a chat model. It delegates the conversion + * logic to the appropriate provider-specific function. + * @param messages Array of messages to be converted. + * @param options Options to be used during the conversion. + * @returns A string prompt that can be used as input for a chat model. + */ +export function convertMessagesToPrompt( + messages: BaseMessage[], + provider: string +): string { + if (provider === "anthropic") { + return convertMessagesToPromptAnthropic(messages); + } + throw new Error(`Provider ${provider} does not support chat.`); +} + +/** + * A type of Large Language Model (LLM) that interacts with the Bedrock + * service. It extends the base `LLM` class and implements the + * `BaseBedrockInput` interface. The class is designed to authenticate and + * interact with the Bedrock service, which is a part of Amazon Web + * Services (AWS). It uses AWS credentials for authentication and can be + * configured with various parameters such as the model to use, the AWS + * region, and the maximum number of tokens to generate. + * @example + * ```typescript + * const model = new BedrockChat({ + * model: "anthropic.claude-v2", + * region: "us-east-1", + * }); + * const res = await model.invoke([{ content: "Tell me a joke" }]); + * console.log(res); + * ``` + */ +export class BedrockChat extends SimpleChatModel implements BaseBedrockInput { + model = "amazon.titan-tg1-large"; + + region: string; + + credentials: CredentialType; + + temperature?: number | undefined = undefined; + + maxTokens?: number | undefined = undefined; + + fetchFn: typeof fetch; + + endpointHost?: string; + + /** @deprecated */ + stopSequences?: string[]; + + modelKwargs?: Record; + + codec: EventStreamCodec = new EventStreamCodec(toUtf8, fromUtf8); + + streaming = false; + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + get lc_aliases(): Record { + return { + model: "model_id", + region: "region_name", + }; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + "credentials.accessKeyId": "BEDROCK_AWS_ACCESS_KEY_ID", + "credentials.secretAccessKey": "BEDROCK_AWS_SECRET_ACCESS_KEY", + }; + } + + get lc_attributes(): SerializedFields | undefined { + return { region: this.region }; + } + + _llmType() { + return "bedrock"; + } + + static lc_name() { + return "BedrockChat"; + } + + constructor(fields?: Partial & BaseChatModelParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + const allowedModels = ["ai21", "anthropic", "amazon", "cohere", "meta"]; + if (!allowedModels.includes(this.model.split(".")[0])) { + throw new Error( + `Unknown model: '${this.model}', only these are supported: ${allowedModels}` + ); + } + const region = + fields?.region ?? getEnvironmentVariable("AWS_DEFAULT_REGION"); + if (!region) { + throw new Error( + "Please set the AWS_DEFAULT_REGION environment variable or pass it to the constructor as the region field." + ); + } + this.region = region; + + const credentials = fields?.credentials; + if (!credentials) { + throw new Error( + "Please set the AWS credentials in the 'credentials' field." + ); + } + this.credentials = credentials; + + this.temperature = fields?.temperature ?? this.temperature; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.fetchFn = fields?.fetchFn ?? fetch.bind(globalThis); + this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl; + this.stopSequences = fields?.stopSequences; + this.modelKwargs = fields?.modelKwargs; + this.streaming = fields?.streaming ?? this.streaming; + } + + /** Call out to Bedrock service model. + Arguments: + prompt: The prompt to pass into the model. + + Returns: + The string generated by the model. + + Example: + response = model.call("Tell me a joke.") + */ + async _call( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const service = "bedrock-runtime"; + const endpointHost = + this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; + const provider = this.model.split(".")[0]; + if (this.streaming) { + const stream = this._streamResponseChunks(messages, options, runManager); + let finalResult: ChatGenerationChunk | undefined; + for await (const chunk of stream) { + if (finalResult === undefined) { + finalResult = chunk; + } else { + finalResult = finalResult.concat(chunk); + } + } + const messageContent = finalResult?.message.content; + if (messageContent && typeof messageContent !== "string") { + throw new Error( + "Non-string output for ChatBedrock is currently not supported." + ); + } + return messageContent ?? ""; + } + + const response = await this._signedFetch(messages, options, { + bedrockMethod: "invoke", + endpointHost, + provider, + }); + const json = await response.json(); + if (!response.ok) { + throw new Error( + `Error ${response.status}: ${json.message ?? JSON.stringify(json)}` + ); + } + const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); + return text; + } + + async _signedFetch( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + fields: { + bedrockMethod: "invoke" | "invoke-with-response-stream"; + endpointHost: string; + provider: string; + } + ) { + const { bedrockMethod, endpointHost, provider } = fields; + const inputBody = BedrockLLMInputOutputAdapter.prepareInput( + provider, + convertMessagesToPromptAnthropic(messages), + this.maxTokens, + this.temperature, + options.stop ?? this.stopSequences, + this.modelKwargs, + fields.bedrockMethod + ); + + const url = new URL( + `https://${endpointHost}/model/${this.model}/${bedrockMethod}` + ); + + const request = new HttpRequest({ + hostname: url.hostname, + path: url.pathname, + protocol: url.protocol, + method: "POST", // method must be uppercase + body: JSON.stringify(inputBody), + query: Object.fromEntries(url.searchParams.entries()), + headers: { + // host is required by AWS Signature V4: https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + host: url.host, + accept: "application/json", + "content-type": "application/json", + }, + }); + + const signer = new SignatureV4({ + credentials: this.credentials, + service: "bedrock", + region: this.region, + sha256: Sha256, + }); + + const signedRequest = await signer.sign(request); + + // Send request to AWS using the low-level fetch API + const response = await this.caller.callWithOptions( + { signal: options.signal }, + async () => + this.fetchFn(url, { + headers: signedRequest.headers, + body: signedRequest.body, + method: signedRequest.method, + }) + ); + return response; + } + + async *_streamResponseChunks( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const provider = this.model.split(".")[0]; + const service = "bedrock-runtime"; + + const endpointHost = + this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; + + const bedrockMethod = + provider === "anthropic" || provider === "cohere" || provider === "meta" + ? "invoke-with-response-stream" + : "invoke"; + + const response = await this._signedFetch(messages, options, { + bedrockMethod, + endpointHost, + provider, + }); + + if (response.status < 200 || response.status >= 300) { + throw Error( + `Failed to access underlying url '${endpointHost}': got ${ + response.status + } ${response.statusText}: ${await response.text()}` + ); + } + + if ( + provider === "anthropic" || + provider === "cohere" || + provider === "meta" + ) { + const reader = response.body?.getReader(); + const decoder = new TextDecoder(); + for await (const chunk of this._readChunks(reader)) { + const event = this.codec.decode(chunk); + if ( + (event.headers[":event-type"] !== undefined && + event.headers[":event-type"].value !== "chunk") || + event.headers[":content-type"].value !== "application/json" + ) { + throw Error(`Failed to get event chunk: got ${chunk}`); + } + const body = JSON.parse(decoder.decode(event.body)); + if (body.message) { + throw new Error(body.message); + } + if (body.bytes !== undefined) { + const chunkResult = JSON.parse( + decoder.decode( + Uint8Array.from(atob(body.bytes), (m) => m.codePointAt(0) ?? 0) + ) + ); + const text = BedrockLLMInputOutputAdapter.prepareOutput( + provider, + chunkResult + ); + yield new ChatGenerationChunk({ + text, + message: new AIMessageChunk({ content: text }), + }); + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(text); + } + } + } else { + const json = await response.json(); + const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); + yield new ChatGenerationChunk({ + text, + message: new AIMessageChunk({ content: text }), + }); + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(text); + } + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _readChunks(reader: any) { + function _concatChunks(a: Uint8Array, b: Uint8Array) { + const newBuffer = new Uint8Array(a.length + b.length); + newBuffer.set(a); + newBuffer.set(b, a.length); + return newBuffer; + } + + function getMessageLength(buffer: Uint8Array) { + if (buffer.byteLength === 0) return 0; + const view = new DataView( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength + ); + + return view.getUint32(0, false); + } + + return { + async *[Symbol.asyncIterator]() { + let readResult = await reader.read(); + + let buffer: Uint8Array = new Uint8Array(0); + while (!readResult.done) { + const chunk: Uint8Array = readResult.value; + + buffer = _concatChunks(buffer, chunk); + let messageLength = getMessageLength(buffer); + + while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) { + yield buffer.slice(0, messageLength); + buffer = buffer.slice(messageLength); + messageLength = getMessageLength(buffer); + } + + readResult = await reader.read(); + } + }, + }; + } + + _combineLLMOutput() { + return {}; + } +} + +/** + * @deprecated Use `BedrockChat` instead. + */ +export const ChatBedrock = BedrockChat; diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts new file mode 100644 index 000000000000..dd34ecf2d177 --- /dev/null +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -0,0 +1,250 @@ +import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { + AIMessageChunk, + BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { ChatGenerationChunk } from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; + +import type { CloudflareWorkersAIInput } from "../llms/cloudflare_workersai.js"; +import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; + +/** + * An interface defining the options for a Cloudflare Workers AI call. It extends + * the BaseLanguageModelCallOptions interface. + */ +export interface ChatCloudflareWorkersAICallOptions + extends BaseLanguageModelCallOptions {} + +/** + * A class that enables calls to the Cloudflare Workers AI API to access large language + * models in a chat-like fashion. It extends the SimpleChatModel class and + * implements the CloudflareWorkersAIInput interface. + * @example + * ```typescript + * const model = new ChatCloudflareWorkersAI({ + * model: "@cf/meta/llama-2-7b-chat-int8", + * cloudflareAccountId: process.env.CLOUDFLARE_ACCOUNT_ID, + * cloudflareApiToken: process.env.CLOUDFLARE_API_TOKEN + * }); + * + * const response = await model.invoke([ + * ["system", "You are a helpful assistant that translates English to German."], + * ["human", `Translate "I love programming".`] + * ]); + * + * console.log(response); + * ``` + */ +export class ChatCloudflareWorkersAI + extends SimpleChatModel + implements CloudflareWorkersAIInput +{ + static lc_name() { + return "ChatCloudflareWorkersAI"; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + model = "@cf/meta/llama-2-7b-chat-int8"; + + cloudflareAccountId?: string; + + cloudflareApiToken?: string; + + baseUrl: string; + + streaming = false; + + constructor(fields?: CloudflareWorkersAIInput & BaseChatModelParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.streaming = fields?.streaming ?? this.streaming; + this.cloudflareAccountId = + fields?.cloudflareAccountId ?? + getEnvironmentVariable("CLOUDFLARE_ACCOUNT_ID"); + this.cloudflareApiToken = + fields?.cloudflareApiToken ?? + getEnvironmentVariable("CLOUDFLARE_API_TOKEN"); + this.baseUrl = + fields?.baseUrl ?? + `https://api.cloudflare.com/client/v4/accounts/${this.cloudflareAccountId}/ai/run`; + if (this.baseUrl.endsWith("/")) { + this.baseUrl = this.baseUrl.slice(0, -1); + } + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + cloudflareApiToken: "CLOUDFLARE_API_TOKEN", + }; + } + + _llmType() { + return "cloudflare"; + } + + /** Get the identifying parameters for this LLM. */ + get identifyingParams() { + return { model: this.model }; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams(_options?: this["ParsedCallOptions"]) { + return { + model: this.model, + }; + } + + _combineLLMOutput() { + return {}; + } + + /** + * Method to validate the environment. + */ + validateEnvironment() { + if (!this.cloudflareAccountId) { + throw new Error( + `No Cloudflare account ID found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_ACCOUNT_ID" in your environment variables.` + ); + } + if (!this.cloudflareApiToken) { + throw new Error( + `No Cloudflare API key found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_API_KEY" in your environment variables.` + ); + } + } + + async _request( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + stream?: boolean + ) { + this.validateEnvironment(); + const url = `${this.baseUrl}/${this.model}`; + const headers = { + Authorization: `Bearer ${this.cloudflareApiToken}`, + "Content-Type": "application/json", + }; + + const formattedMessages = this._formatMessages(messages); + + const data = { messages: formattedMessages, stream }; + return this.caller.call(async () => { + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(data), + signal: options.signal, + }); + if (!response.ok) { + const error = new Error( + `Cloudflare LLM call failed with status code ${response.status}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + return response; + }); + } + + async *_streamResponseChunks( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const response = await this._request(messages, options, true); + if (!response.body) { + throw new Error("Empty response from Cloudflare. Please try again."); + } + const stream = convertEventStreamToIterableReadableDataStream( + response.body + ); + for await (const chunk of stream) { + if (chunk !== "[DONE]") { + const parsedChunk = JSON.parse(chunk); + const generationChunk = new ChatGenerationChunk({ + message: new AIMessageChunk({ content: parsedChunk.response }), + text: parsedChunk.response, + }); + yield generationChunk; + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(generationChunk.text ?? ""); + } + } + } + + protected _formatMessages( + messages: BaseMessage[] + ): { role: string; content: string }[] { + const formattedMessages = messages.map((message) => { + let role; + if (message._getType() === "human") { + role = "user"; + } else if (message._getType() === "ai") { + role = "assistant"; + } else if (message._getType() === "system") { + role = "system"; + } else if (ChatMessage.isInstance(message)) { + role = message.role; + } else { + console.warn( + `Unsupported message type passed to Cloudflare: "${message._getType()}"` + ); + role = "user"; + } + if (typeof message.content !== "string") { + throw new Error( + "ChatCloudflareWorkersAI currently does not support non-string message content." + ); + } + return { + role, + content: message.content, + }; + }); + return formattedMessages; + } + + /** @ignore */ + async _call( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + if (!this.streaming) { + const response = await this._request(messages, options); + + const responseData = await response.json(); + + return responseData.result.response; + } else { + const stream = this._streamResponseChunks(messages, options, runManager); + let finalResult: ChatGenerationChunk | undefined; + for await (const chunk of stream) { + if (finalResult === undefined) { + finalResult = chunk; + } else { + finalResult = finalResult.concat(chunk); + } + } + const messageContent = finalResult?.message.content; + if (messageContent && typeof messageContent !== "string") { + throw new Error( + "Non-string output for ChatCloudflareWorkersAI is currently not supported." + ); + } + return messageContent ?? ""; + } + } +} diff --git a/libs/langchain-community/src/chat_models/fireworks.ts b/libs/langchain-community/src/chat_models/fireworks.ts new file mode 100644 index 000000000000..dfb225c74c4e --- /dev/null +++ b/libs/langchain-community/src/chat_models/fireworks.ts @@ -0,0 +1,139 @@ +import type { BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import type { OpenAIClient, ChatOpenAICallOptions, OpenAIChatInput } from "@langchain/openai"; + +import type { OpenAICoreRequestOptions } from "../types/openai-types.js"; +import { ChatOpenAI } from "@langchain/openai"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +type FireworksUnsupportedArgs = + | "frequencyPenalty" + | "presencePenalty" + | "logitBias" + | "functions"; + +type FireworksUnsupportedCallOptions = "functions" | "function_call" | "tools"; + +export type ChatFireworksCallOptions = Partial< + Omit +>; + +/** + * Wrapper around Fireworks API for large language models fine-tuned for chat + * + * Fireworks API is compatible to the OpenAI API with some limitations described in + * https://readme.fireworks.ai/docs/openai-compatibility. + * + * To use, you should have the `openai` package installed and + * the `FIREWORKS_API_KEY` environment variable set. + * @example + * ```typescript + * const model = new ChatFireworks({ + * temperature: 0.9, + * fireworksApiKey: "YOUR-API-KEY", + * }); + * + * const response = await model.invoke("Hello, how are you?"); + * console.log(response); + * ``` + */ +export class ChatFireworks extends ChatOpenAI { + static lc_name() { + return "ChatFireworks"; + } + + _llmType() { + return "fireworks"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + fireworksApiKey: "FIREWORKS_API_KEY", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + fireworksApiKey?: string; + + constructor( + fields?: Partial< + Omit + > & + BaseChatModelParams & { fireworksApiKey?: string } + ) { + const fireworksApiKey = + fields?.fireworksApiKey || getEnvironmentVariable("FIREWORKS_API_KEY"); + + if (!fireworksApiKey) { + throw new Error( + `Fireworks API key not found. Please set the FIREWORKS_API_KEY environment variable or provide the key into "fireworksApiKey"` + ); + } + + super({ + ...fields, + modelName: + fields?.modelName || "accounts/fireworks/models/llama-v2-13b-chat", + openAIApiKey: fireworksApiKey, + configuration: { + baseURL: "https://api.fireworks.ai/inference/v1", + }, + }); + + this.fireworksApiKey = fireworksApiKey; + } + + toJSON() { + const result = super.toJSON(); + + if ( + "kwargs" in result && + typeof result.kwargs === "object" && + result.kwargs != null + ) { + delete result.kwargs.openai_api_key; + delete result.kwargs.configuration; + } + + return result; + } + + async completionWithRetry( + request: OpenAIClient.Chat.ChatCompletionCreateParamsStreaming, + options?: OpenAICoreRequestOptions + ): Promise>; + + async completionWithRetry( + request: OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, + options?: OpenAICoreRequestOptions + ): Promise; + + /** + * Calls the Fireworks API with retry logic in case of failures. + * @param request The request to send to the Fireworks API. + * @param options Optional configuration for the API call. + * @returns The response from the Fireworks API. + */ + async completionWithRetry( + request: + | OpenAIClient.Chat.ChatCompletionCreateParamsStreaming + | OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, + options?: OpenAICoreRequestOptions + ): Promise< + | AsyncIterable + | OpenAIClient.Chat.Completions.ChatCompletion + > { + delete request.frequency_penalty; + delete request.presence_penalty; + delete request.logit_bias; + delete request.functions; + + if (request.stream === true) { + return super.completionWithRetry(request, options); + } + + return super.completionWithRetry(request, options); + } +} diff --git a/libs/langchain-community/src/chat_models/googlepalm.ts b/libs/langchain-community/src/chat_models/googlepalm.ts new file mode 100644 index 000000000000..0d05b306531e --- /dev/null +++ b/libs/langchain-community/src/chat_models/googlepalm.ts @@ -0,0 +1,342 @@ +import { DiscussServiceClient } from "@google-ai/generativelanguage"; +import type { protos } from "@google-ai/generativelanguage"; +import { GoogleAuth } from "google-auth-library"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + BaseMessage, + ChatMessage, + isBaseMessage, +} from "@langchain/core/messages"; +import { ChatResult } from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; + +export type BaseMessageExamplePair = { + input: BaseMessage; + output: BaseMessage; +}; + +/** + * An interface defining the input to the ChatGooglePaLM class. + */ +export interface GooglePaLMChatInput extends BaseChatModelParams { + /** + * Model Name to use + * + * Note: The format must follow the pattern - `models/{model}` + */ + modelName?: string; + + /** + * Controls the randomness of the output. + * + * Values can range from [0.0,1.0], inclusive. A value closer to 1.0 + * will produce responses that are more varied and creative, while + * a value closer to 0.0 will typically result in less surprising + * responses from the model. + * + * Note: The default value varies by model + */ + temperature?: number; + + /** + * Top-p changes how the model selects tokens for output. + * + * Tokens are selected from most probable to least until the sum + * of their probabilities equals the top-p value. + * + * For example, if tokens A, B, and C have a probability of + * .3, .2, and .1 and the top-p value is .5, then the model will + * select either A or B as the next token (using temperature). + * + * Note: The default value varies by model + */ + topP?: number; + + /** + * Top-k changes how the model selects tokens for output. + * + * A top-k of 1 means the selected token is the most probable among + * all tokens in the model’s vocabulary (also called greedy decoding), + * while a top-k of 3 means that the next token is selected from + * among the 3 most probable tokens (using temperature). + * + * Note: The default value varies by model + */ + topK?: number; + + examples?: + | protos.google.ai.generativelanguage.v1beta2.IExample[] + | BaseMessageExamplePair[]; + + /** + * Google Palm API key to use + */ + apiKey?: string; +} + +function getMessageAuthor(message: BaseMessage) { + const type = message._getType(); + if (ChatMessage.isInstance(message)) { + return message.role; + } + return message.name ?? type; +} + +/** + * A class that wraps the Google Palm chat model. + * @example + * ```typescript + * const model = new ChatGooglePaLM({ + * apiKey: "", + * temperature: 0.7, + * modelName: "models/chat-bison-001", + * topK: 40, + * topP: 1, + * examples: [ + * { + * input: new HumanMessage("What is your favorite sock color?"), + * output: new AIMessage("My favorite sock color be arrrr-ange!"), + * }, + * ], + * }); + * const questions = [ + * new SystemMessage( + * "You are a funny assistant that answers in pirate language." + * ), + * new HumanMessage("What is your favorite food?"), + * ]; + * const res = await model.call(questions); + * console.log({ res }); + * ``` + */ +export class ChatGooglePaLM + extends BaseChatModel + implements GooglePaLMChatInput +{ + static lc_name() { + return "ChatGooglePaLM"; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "GOOGLE_PALM_API_KEY", + }; + } + + modelName = "models/chat-bison-001"; + + temperature?: number; // default value chosen based on model + + topP?: number; // default value chosen based on model + + topK?: number; // default value chosen based on model + + examples: protos.google.ai.generativelanguage.v1beta2.IExample[] = []; + + apiKey?: string; + + private client: DiscussServiceClient; + + constructor(fields?: GooglePaLMChatInput) { + super(fields ?? {}); + + this.modelName = fields?.modelName ?? this.modelName; + + this.temperature = fields?.temperature ?? this.temperature; + if (this.temperature && (this.temperature < 0 || this.temperature > 1)) { + throw new Error("`temperature` must be in the range of [0.0,1.0]"); + } + + this.topP = fields?.topP ?? this.topP; + if (this.topP && this.topP < 0) { + throw new Error("`topP` must be a positive integer"); + } + + this.topK = fields?.topK ?? this.topK; + if (this.topK && this.topK < 0) { + throw new Error("`topK` must be a positive integer"); + } + + this.examples = + fields?.examples?.map((example) => { + if ( + (isBaseMessage(example.input) && + typeof example.input.content !== "string") || + (isBaseMessage(example.output) && + typeof example.output.content !== "string") + ) { + throw new Error( + "GooglePaLM example messages may only have string content." + ); + } + return { + input: { + ...example.input, + content: example.input?.content as string, + }, + output: { + ...example.output, + content: example.output?.content as string, + }, + }; + }) ?? this.examples; + + this.apiKey = + fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); + if (!this.apiKey) { + throw new Error( + "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" + ); + } + + this.client = new DiscussServiceClient({ + authClient: new GoogleAuth().fromAPIKey(this.apiKey), + }); + } + + _combineLLMOutput() { + return []; + } + + _llmType() { + return "googlepalm"; + } + + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const palmMessages = await this.caller.callWithOptions( + { signal: options.signal }, + this._generateMessage.bind(this), + this._mapBaseMessagesToPalmMessages(messages), + this._getPalmContextInstruction(messages), + this.examples + ); + const chatResult = this._mapPalmMessagesToChatResult(palmMessages); + + // Google Palm doesn't provide streaming as of now. But to support streaming handlers + // we call the handler with entire response text + void runManager?.handleLLMNewToken( + chatResult.generations.length > 0 ? chatResult.generations[0].text : "" + ); + + return chatResult; + } + + protected async _generateMessage( + messages: protos.google.ai.generativelanguage.v1beta2.IMessage[], + context?: string, + examples?: protos.google.ai.generativelanguage.v1beta2.IExample[] + ): Promise { + const [palmMessages] = await this.client.generateMessage({ + candidateCount: 1, + model: this.modelName, + temperature: this.temperature, + topK: this.topK, + topP: this.topP, + prompt: { + context, + examples, + messages, + }, + }); + return palmMessages; + } + + protected _getPalmContextInstruction( + messages: BaseMessage[] + ): string | undefined { + // get the first message and checks if it's a system 'system' messages + const systemMessage = + messages.length > 0 && getMessageAuthor(messages[0]) === "system" + ? messages[0] + : undefined; + if ( + systemMessage?.content !== undefined && + typeof systemMessage.content !== "string" + ) { + throw new Error("Non-string system message content is not supported."); + } + return systemMessage?.content; + } + + protected _mapBaseMessagesToPalmMessages( + messages: BaseMessage[] + ): protos.google.ai.generativelanguage.v1beta2.IMessage[] { + // remove all 'system' messages + const nonSystemMessages = messages.filter( + (m) => getMessageAuthor(m) !== "system" + ); + + // requires alternate human & ai messages. Throw error if two messages are consecutive + nonSystemMessages.forEach((msg, index) => { + if (index < 1) return; + if ( + getMessageAuthor(msg) === getMessageAuthor(nonSystemMessages[index - 1]) + ) { + throw new Error( + `Google PaLM requires alternate messages between authors` + ); + } + }); + + return nonSystemMessages.map((m) => { + if (typeof m.content !== "string") { + throw new Error( + "ChatGooglePaLM does not support non-string message content." + ); + } + return { + author: getMessageAuthor(m), + content: m.content, + citationMetadata: { + citationSources: m.additional_kwargs.citationSources as + | protos.google.ai.generativelanguage.v1beta2.ICitationSource[] + | undefined, + }, + }; + }); + } + + protected _mapPalmMessagesToChatResult( + msgRes: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse + ): ChatResult { + if ( + msgRes.candidates && + msgRes.candidates.length > 0 && + msgRes.candidates[0] + ) { + const message = msgRes.candidates[0]; + return { + generations: [ + { + text: message.content ?? "", + message: new AIMessage({ + content: message.content ?? "", + name: message.author === null ? undefined : message.author, + additional_kwargs: { + citationSources: message.citationMetadata?.citationSources, + filters: msgRes.filters, // content filters applied + }, + }), + }, + ], + }; + } + // if rejected or error, return empty generations with reason in filters + return { + generations: [], + llmOutput: { + filters: msgRes.filters, + }, + }; + } +} diff --git a/libs/langchain-community/src/chat_models/googlevertexai/common.ts b/libs/langchain-community/src/chat_models/googlevertexai/common.ts new file mode 100644 index 000000000000..86cbec62f2d5 --- /dev/null +++ b/libs/langchain-community/src/chat_models/googlevertexai/common.ts @@ -0,0 +1,405 @@ +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { BaseChatModel } from "@langchain/core/language_models/chat_models"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + AIMessageChunk, + BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { + ChatGeneration, + ChatGenerationChunk, + ChatResult, + LLMResult +} from "@langchain/core/outputs"; + +import { + GoogleVertexAILLMConnection, + GoogleVertexAIStream, +} from "../../util/googlevertexai-connection.js"; +import { + GoogleVertexAIBaseLLMInput, + GoogleVertexAIBasePrediction, + GoogleVertexAILLMPredictions, + GoogleVertexAIModelParams, +} from "../../types/googlevertexai-types.js"; + +/** + * Represents a single "example" exchange that can be provided to + * help illustrate what a model response should look like. + */ +export interface ChatExample { + input: BaseMessage; + output: BaseMessage; +} + +/** + * Represents a single example exchange in the Google Vertex AI chat + * model. + */ +interface GoogleVertexAIChatExample { + input: GoogleVertexAIChatMessage; + output: GoogleVertexAIChatMessage; +} + +/** + * Represents the author of a chat message in the Google Vertex AI chat + * model. + */ +export type GoogleVertexAIChatAuthor = + | "user" // Represents the human for Code and CodeChat models + | "bot" // Represents the AI for Code models + | "system" // Represents the AI for CodeChat models + | "context"; // Represents contextual instructions + +export type GoogleVertexAIChatMessageFields = { + author?: GoogleVertexAIChatAuthor; + content: string; + name?: string; +}; + +/** + * Represents a chat message in the Google Vertex AI chat model. + */ +export class GoogleVertexAIChatMessage { + public author?: GoogleVertexAIChatAuthor; + + public content: string; + + public name?: string; + + constructor(fields: GoogleVertexAIChatMessageFields) { + this.author = fields.author; + this.content = fields.content; + this.name = fields.name; + } + + /** + * Extracts the role of a generic message and maps it to a Google Vertex + * AI chat author. + * @param message The chat message to extract the role from. + * @returns The role of the message mapped to a Google Vertex AI chat author. + */ + static extractGenericMessageCustomRole(message: ChatMessage) { + if ( + message.role !== "system" && + message.role !== "bot" && + message.role !== "user" && + message.role !== "context" + ) { + console.warn(`Unknown message role: ${message.role}`); + } + + return message.role as GoogleVertexAIChatAuthor; + } + + /** + * Maps a message type to a Google Vertex AI chat author. + * @param message The message to map. + * @param model The model to use for mapping. + * @returns The message type mapped to a Google Vertex AI chat author. + */ + static mapMessageTypeToVertexChatAuthor( + message: BaseMessage, + model: string + ): GoogleVertexAIChatAuthor { + const type = message._getType(); + switch (type) { + case "ai": + return model.startsWith("codechat-") ? "system" : "bot"; + case "human": + return "user"; + case "system": + throw new Error( + `System messages are only supported as the first passed message for Google Vertex AI.` + ); + case "generic": { + if (!ChatMessage.isInstance(message)) + throw new Error("Invalid generic chat message"); + return GoogleVertexAIChatMessage.extractGenericMessageCustomRole( + message + ); + } + default: + throw new Error(`Unknown / unsupported message type: ${message}`); + } + } + + /** + * Creates a new Google Vertex AI chat message from a base message. + * @param message The base message to convert. + * @param model The model to use for conversion. + * @returns A new Google Vertex AI chat message. + */ + static fromChatMessage(message: BaseMessage, model: string) { + if (typeof message.content !== "string") { + throw new Error( + "ChatGoogleVertexAI does not support non-string message content." + ); + } + return new GoogleVertexAIChatMessage({ + author: GoogleVertexAIChatMessage.mapMessageTypeToVertexChatAuthor( + message, + model + ), + content: message.content, + }); + } +} + +/** + * Represents an instance of the Google Vertex AI chat model. + */ +export interface GoogleVertexAIChatInstance { + context?: string; + examples?: GoogleVertexAIChatExample[]; + messages: GoogleVertexAIChatMessage[]; +} + +/** + * Defines the prediction output of the Google Vertex AI chat model. + */ +export interface GoogleVertexAIChatPrediction + extends GoogleVertexAIBasePrediction { + candidates: GoogleVertexAIChatMessage[]; +} + +/** + * Defines the input to the Google Vertex AI chat model. + */ +export interface GoogleVertexAIChatInput + extends GoogleVertexAIBaseLLMInput { + /** Instructions how the model should respond */ + context?: string; + + /** Help the model understand what an appropriate response is */ + examples?: ChatExample[]; +} + +/** + * Base class for Google Vertex AI chat models. + * Implemented subclasses must provide a GoogleVertexAILLMConnection + * with appropriate auth client. + */ +export class BaseChatGoogleVertexAI + extends BaseChatModel + implements GoogleVertexAIChatInput +{ + lc_serializable = true; + + model = "chat-bison"; + + temperature = 0.2; + + maxOutputTokens = 1024; + + topP = 0.8; + + topK = 40; + + examples: ChatExample[] = []; + + connection: GoogleVertexAILLMConnection< + BaseLanguageModelCallOptions, + GoogleVertexAIChatInstance, + GoogleVertexAIChatPrediction, + AuthOptions + >; + + streamedConnection: GoogleVertexAILLMConnection< + BaseLanguageModelCallOptions, + GoogleVertexAIChatInstance, + GoogleVertexAIChatPrediction, + AuthOptions + >; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + get lc_aliases(): Record { + return { + model: "model_name", + }; + } + + constructor(fields?: GoogleVertexAIChatInput) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.temperature = fields?.temperature ?? this.temperature; + this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; + this.topP = fields?.topP ?? this.topP; + this.topK = fields?.topK ?? this.topK; + this.examples = fields?.examples ?? this.examples; + } + + _combineLLMOutput(): LLMResult["llmOutput"] { + // TODO: Combine the safetyAttributes + return []; + } + + async *_streamResponseChunks( + _messages: BaseMessage[], + _options: this["ParsedCallOptions"], + _runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + // Make the call as a streaming request + const instance: GoogleVertexAIChatInstance = this.createInstance(_messages); + const parameters = this.formatParameters(); + const result = await this.streamedConnection.request( + [instance], + parameters, + _options + ); + + // Get the streaming parser of the response + const stream = result.data as GoogleVertexAIStream; + + // Loop until the end of the stream + // During the loop, yield each time we get a chunk from the streaming parser + // that is either available or added to the queue + while (!stream.streamDone) { + const output = await stream.nextChunk(); + const chunk = + output !== null + ? BaseChatGoogleVertexAI.convertPredictionChunk(output) + : new ChatGenerationChunk({ + text: "", + message: new AIMessageChunk(""), + generationInfo: { finishReason: "stop" }, + }); + yield chunk; + } + } + + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"] + ): Promise { + const instance: GoogleVertexAIChatInstance = this.createInstance(messages); + const parameters: GoogleVertexAIModelParams = this.formatParameters(); + + const result = await this.connection.request( + [instance], + parameters, + options + ); + + const generations = + ( + result?.data as GoogleVertexAILLMPredictions + )?.predictions?.map((prediction) => + BaseChatGoogleVertexAI.convertPrediction(prediction) + ) ?? []; + return { + generations, + }; + } + + _llmType(): string { + return "vertexai"; + } + + /** + * Creates an instance of the Google Vertex AI chat model. + * @param messages The messages for the model instance. + * @returns A new instance of the Google Vertex AI chat model. + */ + createInstance(messages: BaseMessage[]): GoogleVertexAIChatInstance { + let context = ""; + let conversationMessages = messages; + if (messages[0]?._getType() === "system") { + if (typeof messages[0].content !== "string") { + throw new Error( + "ChatGoogleVertexAI does not support non-string message content." + ); + } + context = messages[0].content; + conversationMessages = messages.slice(1); + } + // https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts + if (conversationMessages.length % 2 === 0) { + throw new Error( + `Google Vertex AI requires an odd number of messages to generate a response.` + ); + } + const vertexChatMessages = conversationMessages.map((baseMessage, i) => { + const currMessage = GoogleVertexAIChatMessage.fromChatMessage( + baseMessage, + this.model + ); + const prevMessage = + i > 0 + ? GoogleVertexAIChatMessage.fromChatMessage( + conversationMessages[i - 1], + this.model + ) + : null; + + // https://cloud.google.com/vertex-ai/docs/generative-ai/chat/chat-prompts#messages + if (prevMessage && currMessage.author === prevMessage.author) { + throw new Error( + `Google Vertex AI requires AI and human messages to alternate.` + ); + } + return currMessage; + }); + + const examples = this.examples.map((example) => ({ + input: GoogleVertexAIChatMessage.fromChatMessage( + example.input, + this.model + ), + output: GoogleVertexAIChatMessage.fromChatMessage( + example.output, + this.model + ), + })); + + const instance: GoogleVertexAIChatInstance = { + context, + examples, + messages: vertexChatMessages, + }; + + return instance; + } + + formatParameters(): GoogleVertexAIModelParams { + return { + temperature: this.temperature, + topK: this.topK, + topP: this.topP, + maxOutputTokens: this.maxOutputTokens, + }; + } + + /** + * Converts a prediction from the Google Vertex AI chat model to a chat + * generation. + * @param prediction The prediction to convert. + * @returns The converted chat generation. + */ + static convertPrediction( + prediction: GoogleVertexAIChatPrediction + ): ChatGeneration { + const message = prediction?.candidates[0]; + return { + text: message?.content, + message: new AIMessage(message.content), + generationInfo: prediction, + }; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + static convertPredictionChunk(output: any): ChatGenerationChunk { + const generation: ChatGeneration = BaseChatGoogleVertexAI.convertPrediction( + output.outputs[0] + ); + return new ChatGenerationChunk({ + text: generation.text, + message: new AIMessageChunk(generation.message), + generationInfo: generation.generationInfo, + }); + } +} diff --git a/libs/langchain-community/src/chat_models/googlevertexai/index.ts b/libs/langchain-community/src/chat_models/googlevertexai/index.ts new file mode 100644 index 000000000000..e8a3a07da320 --- /dev/null +++ b/libs/langchain-community/src/chat_models/googlevertexai/index.ts @@ -0,0 +1,64 @@ +import { GoogleAuthOptions } from "google-auth-library"; +import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; +import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { GAuthClient } from "../../util/googlevertexai-gauth.js"; + +/** + * Enables calls to the Google Cloud's Vertex AI API to access + * Large Language Models in a chat-like fashion. + * + * To use, you will need to have one of the following authentication + * methods in place: + * - You are logged into an account permitted to the Google Cloud project + * using Vertex AI. + * - You are running this on a machine using a service account permitted to + * the Google Cloud project using Vertex AI. + * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the + * path of a credentials file for a service account permitted to the + * Google Cloud project using Vertex AI. + * @example + * ```typescript + * const model = new ChatGoogleVertexAI({ + * temperature: 0.7, + * }); + * const result = await model.invoke("What is the capital of France?"); + * ``` + */ +export class ChatGoogleVertexAI extends BaseChatGoogleVertexAI { + static lc_name() { + return "ChatVertexAI"; + } + + constructor(fields?: GoogleVertexAIChatInput) { + super(fields); + + const client = new GAuthClient({ + scopes: "https://www.googleapis.com/auth/cloud-platform", + ...fields?.authOptions, + }); + + this.connection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + false + ); + + this.streamedConnection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + true + ); + } +} + +export type { + ChatExample, + GoogleVertexAIChatAuthor, + GoogleVertexAIChatInput, + GoogleVertexAIChatInstance, + GoogleVertexAIChatMessage, + GoogleVertexAIChatMessageFields, + GoogleVertexAIChatPrediction, +} from "./common.js"; diff --git a/libs/langchain-community/src/chat_models/googlevertexai/web.ts b/libs/langchain-community/src/chat_models/googlevertexai/web.ts new file mode 100644 index 000000000000..acbaa9144f4c --- /dev/null +++ b/libs/langchain-community/src/chat_models/googlevertexai/web.ts @@ -0,0 +1,66 @@ +import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { + WebGoogleAuthOptions, + WebGoogleAuth, +} from "../../util/googlevertexai-webauth.js"; +import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; + +/** + * Enables calls to the Google Cloud's Vertex AI API to access + * Large Language Models in a chat-like fashion. + * + * This entrypoint and class are intended to be used in web environments like Edge + * functions where you do not have access to the file system. It supports passing + * service account credentials directly as a "GOOGLE_VERTEX_AI_WEB_CREDENTIALS" + * environment variable or directly as "authOptions.credentials". + * @example + * ```typescript + * const model = new ChatGoogleVertexAI({ + * temperature: 0.7, + * }); + * const result = await model.invoke( + * "How do I implement a binary search algorithm in Python?", + * ); + * ``` + */ +export class ChatGoogleVertexAI extends BaseChatGoogleVertexAI { + static lc_name() { + return "ChatVertexAI"; + } + + get lc_secrets(): { [key: string]: string } { + return { + "authOptions.credentials": "GOOGLE_VERTEX_AI_WEB_CREDENTIALS", + }; + } + + constructor(fields?: GoogleVertexAIChatInput) { + super(fields); + + const client = new WebGoogleAuth(fields?.authOptions); + + this.connection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + false + ); + + this.streamedConnection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + true + ); + } +} + +export type { + ChatExample, + GoogleVertexAIChatAuthor, + GoogleVertexAIChatInput, + GoogleVertexAIChatInstance, + GoogleVertexAIChatMessage, + GoogleVertexAIChatMessageFields, + GoogleVertexAIChatPrediction, +} from "./common.js"; diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts new file mode 100644 index 000000000000..7596c64a954a --- /dev/null +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts @@ -0,0 +1,490 @@ +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { + ChatGeneration, + ChatResult +} from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { IterableReadableStream } from "@langchain/core/utils/stream"; +import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + BaseWebSocketStream, + WebSocketStreamOptions, +} from "../../util/iflytek_websocket_stream.js"; + +/** + * Type representing the role of a message in the Xinghuo chat model. + */ +export type XinghuoMessageRole = "assistant" | "user"; + +/** + * Interface representing a message in the Xinghuo chat model. + */ +interface XinghuoMessage { + role: XinghuoMessageRole; + content: string; +} + +/** + * Interface representing the usage of tokens in a chat completion. + */ +interface TokenUsage { + completionTokens?: number; + promptTokens?: number; + totalTokens?: number; +} + +/** + * Interface representing a request for a chat completion. + */ +interface ChatCompletionRequest { + messages: XinghuoMessage[]; + temperature?: number; + max_tokens?: number; + top_k?: number; + chat_id?: string; +} + +export interface ChatCompletionChunk { + header: { + code: number; + message: string; + sid: string; + status: number; + }; + payload: { + choices: { + status: number; + seq: number; + text: { + content: string; + role: XinghuoMessageRole; + index: number; + }[]; + }; + usage?: { + text: { + question_tokens: number; + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; + }; + }; +} + +/** + * Interface representing a response from a chat completion. + */ +interface ChatCompletionResponse { + result: string; + usage?: { + completion_tokens: number; + prompt_tokens: number; + total_tokens: number; + }; +} + +/** + * Function that extracts the custom role of a generic chat message. + * @param message Chat message from which to extract the custom role. + * @returns The custom role of the chat message. + */ +function extractGenericMessageCustomRole(message: ChatMessage) { + if (message.role !== "assistant" && message.role !== "user") { + console.warn(`Unknown message role: ${message.role}`); + } + return message.role as XinghuoMessageRole; +} + +/** + * Function that converts a base message to a Xinghuo message role. + * @param message Base message to convert. + * @returns The Xinghuo message role. + */ +function messageToXinghuoRole(message: BaseMessage): XinghuoMessageRole { + const type = message._getType(); + switch (type) { + case "ai": + return "assistant"; + case "human": + return "user"; + case "system": + throw new Error("System messages should not be here"); + case "function": + throw new Error("Function messages not supported"); + case "generic": { + if (!ChatMessage.isInstance(message)) + throw new Error("Invalid generic chat message"); + return extractGenericMessageCustomRole(message); + } + default: + throw new Error(`Unknown message type: ${type}`); + } +} + +declare interface IflytekXinghuoChatInput { + /** Model version to use. Available options are: v1.1, v2.1, v3.1 + * @default "v2.1" + */ + version: string; + + /** + * ID of the end-user who made requests. + */ + userId?: string; + + /** + * APPID to use when making requests. Defaults to the value of + * `IFLYTEK_APPID` environment variable. + */ + iflytekAppid?: string; + + /** + * API key to use when making requests. Defaults to the value of + * `IFLYTEK_API_KEY` environment variable. + */ + iflytekApiKey?: string; + + /** + * API Secret to use when making requests. Defaults to the value of + * `IFLYTEK_API_SECRET` environment variable. + */ + iflytekApiSecret?: string; + + /** Amount of randomness injected into the response. Ranges + * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / + * multiple choice, and temp closer to 1 for creative + * and generative tasks. Defaults to 0.5. + */ + temperature?: number; + + max_tokens?: number; + + top_k?: number; + + streaming?: boolean; +} + +/** + * Wrapper around IflytekXingHuo large language models that use the Chat endpoint. + * + * To use you should have the `IFLYTEK_API_KEY` and `IFLYTEK_API_SECRET` and `IFLYTEK_APPID` + * environment variable set. + * + * @augments BaseChatModel + * @augments IflytekXinghuoChatInput + */ +export abstract class BaseChatIflytekXinghuo + extends BaseChatModel + implements IflytekXinghuoChatInput +{ + static lc_name() { + return "ChatIflytekXinghuo"; + } + + get callKeys(): string[] { + return ["stop", "signal", "options"]; + } + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + iflytekApiKey: "IFLYTEK_API_KEY", + iflytekApiSecret: "IFLYTEK_API_SECRET", + }; + } + + get lc_aliases(): { [key: string]: string } | undefined { + return undefined; + } + + lc_serializable = true; + + version = "v2.1"; + + iflytekAppid: string; + + iflytekApiKey: string; + + iflytekApiSecret: string; + + userId?: string; + + apiUrl: string; + + domain: string; + + temperature = 0.5; + + max_tokens = 2048; + + top_k = 4; + + streaming = false; + + constructor(fields?: Partial & BaseChatModelParams) { + super(fields ?? {}); + + const iflytekAppid = + fields?.iflytekAppid ?? getEnvironmentVariable("IFLYTEK_APPID"); + if (!iflytekAppid) { + throw new Error("Iflytek APPID not found"); + } else { + this.iflytekAppid = iflytekAppid; + } + + const iflytekApiKey = + fields?.iflytekApiKey ?? getEnvironmentVariable("IFLYTEK_API_KEY"); + if (!iflytekApiKey) { + throw new Error("Iflytek API key not found"); + } else { + this.iflytekApiKey = iflytekApiKey; + } + + const iflytekApiSecret = + fields?.iflytekApiSecret ?? getEnvironmentVariable("IFLYTEK_API_SECRET"); + if (!iflytekApiSecret) { + throw new Error("Iflytek API secret not found"); + } else { + this.iflytekApiSecret = iflytekApiSecret; + } + + this.userId = fields?.userId ?? this.userId; + this.streaming = fields?.streaming ?? this.streaming; + this.temperature = fields?.temperature ?? this.temperature; + this.max_tokens = fields?.max_tokens ?? this.max_tokens; + this.top_k = fields?.top_k ?? this.top_k; + + this.version = fields?.version ?? this.version; + if (["v1.1", "v2.1", "v3.1"].includes(this.version)) { + switch (this.version) { + case "v1.1": + this.domain = "general"; + break; + case "v2.1": + this.domain = "generalv2"; + break; + case "v3.1": + this.domain = "generalv3"; + break; + default: + this.domain = "generalv2"; + } + this.apiUrl = `wss://spark-api.xf-yun.com/${this.version}/chat`; + } else { + throw new Error(`Invalid model version: ${this.version}`); + } + } + + /** + * Get the identifying parameters for the model + */ + identifyingParams() { + return { + version: this.version, + ...this.invocationParams(), + }; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams(): Omit & { + streaming: boolean; + } { + return { + streaming: this.streaming, + temperature: this.temperature, + top_k: this.top_k, + }; + } + + /** + * Method that retrieves the auth websocketStream for making requests to the Iflytek Xinghuo API. + * @returns The auth websocketStream for making requests to the Iflytek Xinghuo API. + */ + abstract openWebSocketStream>( + options: WebSocketStreamOptions + ): Promise; + + /** + * Calls the Xinghuo API completion. + * @param request The request to send to the Xinghuo API. + * @param signal The signal for the API call. + * @returns The response from the Xinghuo API. + */ + async completion( + request: ChatCompletionRequest, + stream: true, + signal?: AbortSignal + ): Promise>; + + async completion( + request: ChatCompletionRequest, + stream: false, + signal?: AbortSignal + ): Promise; + + async completion( + request: ChatCompletionRequest, + stream: boolean, + signal?: AbortSignal + ): Promise | ChatCompletionResponse> { + const webSocketStream = await this.openWebSocketStream({ + signal, + }); + const connection = await webSocketStream.connection; + const header = { + app_id: this.iflytekAppid, + uid: this.userId, + }; + const parameter = { + chat: { + domain: this.domain, + temperature: request.temperature ?? this.temperature, + max_tokens: request.max_tokens ?? this.max_tokens, + top_k: request.top_k ?? this.top_k, + }, + }; + const payload = { + message: { + text: request.messages, + }, + }; + const message = JSON.stringify({ + header, + parameter, + payload, + }); + const { writable, readable } = connection; + const writer = writable.getWriter(); + await writer.write(message); + const streams = IterableReadableStream.fromReadableStream(readable); + if (stream) { + return streams; + } else { + let response: ChatCompletionResponse = { result: "" }; + for await (const chunk of streams) { + const data = JSON.parse(chunk) as ChatCompletionChunk; + const { header, payload } = data; + if (header.code === 0) { + if (header.status === 0) { + response.result = payload.choices?.text[0]?.content ?? ""; + } else if (header.status === 1) { + response.result += payload.choices?.text[0]?.content ?? ""; + } else if (header.status === 2) { + response = { ...response, usage: payload.usage?.text }; + break; + } + } else { + break; + } + } + void streams.cancel(); + void webSocketStream.close(); + return response; + } + } + + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun | undefined + ): Promise { + const tokenUsage: TokenUsage = {}; + const params = this.invocationParams(); + const messagesMapped: XinghuoMessage[] = messages.map((message) => { + if (typeof message.content !== "string") { + throw new Error( + "ChatIflytekXinghuo does not support non-string message content." + ); + } + return { + role: messageToXinghuoRole(message), + content: message.content, + }; + }); + const data = params.streaming + ? await (async () => { + const streams = await this.completion( + { messages: messagesMapped, ...params }, + true, + options.signal + ); + let response: ChatCompletionResponse = { result: "" }; + for await (const chunk of streams) { + const data = JSON.parse(chunk) as ChatCompletionChunk; + const { header, payload } = data; + if (header.code === 0) { + if (header.status === 0) { + response.result = payload.choices?.text[0]?.content ?? ""; + } else if (header.status === 1) { + response.result += payload.choices?.text[0]?.content ?? ""; + } else if (header.status === 2) { + response = { ...response, usage: payload.usage?.text }; + break; + } + void runManager?.handleLLMNewToken( + payload.choices?.text[0]?.content + ); + } else { + break; + } + } + void streams.cancel(); + return response; + })() + : await this.completion( + { messages: messagesMapped, ...params }, + false, + options.signal + ); + + const { + completion_tokens: completionTokens, + prompt_tokens: promptTokens, + total_tokens: totalTokens, + } = data.usage ?? {}; + + if (completionTokens) { + tokenUsage.completionTokens = + (tokenUsage.completionTokens ?? 0) + completionTokens; + } + + if (promptTokens) { + tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; + } + + if (totalTokens) { + tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; + } + + const generations: ChatGeneration[] = []; + const text = data.result ?? ""; + generations.push({ + text, + message: new AIMessage(text), + }); + + return { + generations, + llmOutput: { tokenUsage }, + }; + } + + /** @ignore */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _combineLLMOutput(): Record | undefined { + return []; + } + + _llmType(): string { + return "iflytek_xinghuo"; + } +} diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts new file mode 100644 index 000000000000..ac54461be18a --- /dev/null +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts @@ -0,0 +1,43 @@ +import WebSocket from "ws"; +import { BaseChatIflytekXinghuo } from "./common.js"; +import { + BaseWebSocketStream, + WebSocketStreamOptions, +} from "../../util/iflytek_websocket_stream.js"; + +class WebSocketStream extends BaseWebSocketStream { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + openWebSocket(url: string, options: WebSocketStreamOptions): WebSocket { + return new WebSocket(url, options.protocols ?? []); + } +} + +/** + * @example + * ```typescript + * const model = new ChatIflytekXinghuo(); + * const response = await model.call([new HumanMessage("Nice to meet you!")]); + * console.log(response); + * ``` + */ +export class ChatIflytekXinghuo extends BaseChatIflytekXinghuo { + async openWebSocketStream( + options: WebSocketStreamOptions + ): Promise { + const host = "spark-api.xf-yun.com"; + const date = new Date().toUTCString(); + const url = `GET /${this.version}/chat HTTP/1.1`; + const { createHmac } = await import("node:crypto"); + const hash = createHmac("sha256", this.iflytekApiSecret) + .update(`host: ${host}\ndate: ${date}\n${url}`) + .digest("base64"); + const authorization_origin = `api_key="${this.iflytekApiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${hash}"`; + const authorization = Buffer.from(authorization_origin).toString("base64"); + let authWebSocketUrl = this.apiUrl; + authWebSocketUrl += `?authorization=${authorization}`; + authWebSocketUrl += `&host=${encodeURIComponent(host)}`; + authWebSocketUrl += `&date=${encodeURIComponent(date)}`; + return new WebSocketStream(authWebSocketUrl, options) as WebSocketStream; + } +} diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts new file mode 100644 index 000000000000..87b372b802ad --- /dev/null +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts @@ -0,0 +1,49 @@ +import { BaseChatIflytekXinghuo } from "./common.js"; +import { + WebSocketStreamOptions, + BaseWebSocketStream, +} from "../../util/iflytek_websocket_stream.js"; + +class WebSocketStream extends BaseWebSocketStream { + openWebSocket(url: string, options: WebSocketStreamOptions): WebSocket { + return new WebSocket(url, options.protocols ?? []); + } +} + +/** + * @example + * ```typescript + * const model = new ChatIflytekXinghuo(); + * const response = await model.call([new HumanMessage("Nice to meet you!")]); + * console.log(response); + * ``` + */ +export class ChatIflytekXinghuo extends BaseChatIflytekXinghuo { + async openWebSocketStream( + options: WebSocketStreamOptions + ): Promise { + const host = "spark-api.xf-yun.com"; + const date = new Date().toUTCString(); + const url = `GET /${this.version}/chat HTTP/1.1`; + const keyBuffer = new TextEncoder().encode(this.iflytekApiSecret); + const dataBuffer = new TextEncoder().encode( + `host: ${host}\ndate: ${date}\n${url}` + ); + const cryptoKey = await crypto.subtle.importKey( + "raw", + keyBuffer, + { name: "HMAC", hash: "SHA-256" }, + false, + ["sign"] + ); + const signature = await crypto.subtle.sign("HMAC", cryptoKey, dataBuffer); + const hash = window.btoa(String.fromCharCode(...new Uint8Array(signature))); + const authorization_origin = `api_key="${this.iflytekApiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${hash}"`; + const authorization = window.btoa(authorization_origin); + let authWebSocketUrl = this.apiUrl; + authWebSocketUrl += `?authorization=${authorization}`; + authWebSocketUrl += `&host=${encodeURIComponent(host)}`; + authWebSocketUrl += `&date=${encodeURIComponent(date)}`; + return new WebSocketStream(authWebSocketUrl, options) as WebSocketStream; + } +} diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts new file mode 100644 index 000000000000..0579e8b39204 --- /dev/null +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -0,0 +1,327 @@ +import { + LlamaModel, + LlamaContext, + LlamaChatSession, + type ConversationInteraction, +} from "node-llama-cpp"; + +import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + LlamaBaseCppInputs, + createLlamaModel, + createLlamaContext, +} from "../util/llama_cpp.js"; +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + BaseMessage, + AIMessageChunk, + ChatMessage, +} from "@langchain/core/messages"; +import { ChatGenerationChunk } from "@langchain/core/outputs"; + +/** + * Note that the modelPath is the only required parameter. For testing you + * can set this in the environment variable `LLAMA_PATH`. + */ +export interface LlamaCppInputs + extends LlamaBaseCppInputs, + BaseChatModelParams {} + +export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions { + /** The maximum number of tokens the response should contain. */ + maxTokens?: number; + /** A function called when matching the provided token array */ + onToken?: (tokens: number[]) => void; +} + +/** + * To use this model you need to have the `node-llama-cpp` module installed. + * This can be installed using `npm install -S node-llama-cpp` and the minimum + * version supported in version 2.0.0. + * This also requires that have a locally built version of Llama2 installed. + * @example + * ```typescript + * // Initialize the ChatLlamaCpp model with the path to the model binary file. + * const model = new ChatLlamaCpp({ + * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin", + * temperature: 0.5, + * }); + * + * // Call the model with a message and await the response. + * const response = await model.call([ + * new HumanMessage({ content: "My name is John." }), + * ]); + * + * // Log the response to the console. + * console.log({ response }); + * + * ``` + */ +export class ChatLlamaCpp extends SimpleChatModel { + declare CallOptions: LlamaCppCallOptions; + + static inputs: LlamaCppInputs; + + maxTokens?: number; + + temperature?: number; + + topK?: number; + + topP?: number; + + trimWhitespaceSuffix?: boolean; + + _model: LlamaModel; + + _context: LlamaContext; + + _session: LlamaChatSession | null; + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + static lc_name() { + return "ChatLlamaCpp"; + } + + constructor(inputs: LlamaCppInputs) { + super(inputs); + this.maxTokens = inputs?.maxTokens; + this.temperature = inputs?.temperature; + this.topK = inputs?.topK; + this.topP = inputs?.topP; + this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix; + this._model = createLlamaModel(inputs); + this._context = createLlamaContext(this._model, inputs); + this._session = null; + } + + _llmType() { + return "llama2_cpp"; + } + + /** @ignore */ + _combineLLMOutput() { + return {}; + } + + invocationParams() { + return { + maxTokens: this.maxTokens, + temperature: this.temperature, + topK: this.topK, + topP: this.topP, + trimWhitespaceSuffix: this.trimWhitespaceSuffix, + }; + } + + /** @ignore */ + async _call( + messages: BaseMessage[], + options: this["ParsedCallOptions"] + ): Promise { + let prompt = ""; + + if (messages.length > 1) { + // We need to build a new _session + prompt = this._buildSession(messages); + } else if (!this._session) { + prompt = this._buildSession(messages); + } else { + if (typeof messages[0].content !== "string") { + throw new Error( + "ChatLlamaCpp does not support non-string message content in sessions." + ); + } + // If we already have a session then we should just have a single prompt + prompt = messages[0].content; + } + + try { + const promptOptions = { + onToken: options.onToken, + maxTokens: this?.maxTokens, + temperature: this?.temperature, + topK: this?.topK, + topP: this?.topP, + trimWhitespaceSuffix: this?.trimWhitespaceSuffix, + }; + // @ts-expect-error - TS2531: Object is possibly 'null'. + const completion = await this._session.prompt(prompt, promptOptions); + return completion; + } catch (e) { + throw new Error("Error getting prompt completion."); + } + } + + async *_streamResponseChunks( + input: BaseMessage[], + _options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const promptOptions = { + temperature: this?.temperature, + topK: this?.topK, + topP: this?.topP, + }; + + const prompt = this._buildPrompt(input); + + const stream = await this.caller.call(async () => + this._context.evaluate(this._context.encode(prompt), promptOptions) + ); + + for await (const chunk of stream) { + yield new ChatGenerationChunk({ + text: this._context.decode([chunk]), + message: new AIMessageChunk({ + content: this._context.decode([chunk]), + }), + generationInfo: {}, + }); + await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? ""); + } + } + + // This constructs a new session if we need to adding in any sys messages or previous chats + protected _buildSession(messages: BaseMessage[]): string { + let prompt = ""; + let sysMessage = ""; + let noSystemMessages: BaseMessage[] = []; + let interactions: ConversationInteraction[] = []; + + // Let's see if we have a system message + if (messages.findIndex((msg) => msg._getType() === "system") !== -1) { + const sysMessages = messages.filter( + (message) => message._getType() === "system" + ); + + const systemMessageContent = sysMessages[sysMessages.length - 1].content; + + if (typeof systemMessageContent !== "string") { + throw new Error( + "ChatLlamaCpp does not support non-string message content in sessions." + ); + } + // Only use the last provided system message + sysMessage = systemMessageContent; + + // Now filter out the system messages + noSystemMessages = messages.filter( + (message) => message._getType() !== "system" + ); + } else { + noSystemMessages = messages; + } + + // Lets see if we just have a prompt left or are their previous interactions? + if (noSystemMessages.length > 1) { + // Is the last message a prompt? + if ( + noSystemMessages[noSystemMessages.length - 1]._getType() === "human" + ) { + const finalMessageContent = + noSystemMessages[noSystemMessages.length - 1].content; + if (typeof finalMessageContent !== "string") { + throw new Error( + "ChatLlamaCpp does not support non-string message content in sessions." + ); + } + prompt = finalMessageContent; + interactions = this._convertMessagesToInteractions( + noSystemMessages.slice(0, noSystemMessages.length - 1) + ); + } else { + interactions = this._convertMessagesToInteractions(noSystemMessages); + } + } else { + if (typeof noSystemMessages[0].content !== "string") { + throw new Error( + "ChatLlamaCpp does not support non-string message content in sessions." + ); + } + // If there was only a single message we assume it's a prompt + prompt = noSystemMessages[0].content; + } + + // Now lets construct a session according to what we got + if (sysMessage !== "" && interactions.length > 0) { + this._session = new LlamaChatSession({ + context: this._context, + conversationHistory: interactions, + systemPrompt: sysMessage, + }); + } else if (sysMessage !== "" && interactions.length === 0) { + this._session = new LlamaChatSession({ + context: this._context, + systemPrompt: sysMessage, + }); + } else if (sysMessage === "" && interactions.length > 0) { + this._session = new LlamaChatSession({ + context: this._context, + conversationHistory: interactions, + }); + } else { + this._session = new LlamaChatSession({ + context: this._context, + }); + } + + return prompt; + } + + // This builds a an array of interactions + protected _convertMessagesToInteractions( + messages: BaseMessage[] + ): ConversationInteraction[] { + const result: ConversationInteraction[] = []; + + for (let i = 0; i < messages.length; i += 2) { + if (i + 1 < messages.length) { + const prompt = messages[i].content; + const response = messages[i + 1].content; + if (typeof prompt !== "string" || typeof response !== "string") { + throw new Error( + "ChatLlamaCpp does not support non-string message content." + ); + } + result.push({ + prompt, + response, + }); + } + } + + return result; + } + + protected _buildPrompt(input: BaseMessage[]): string { + const prompt = input + .map((message) => { + let messageText; + if (message._getType() === "human") { + messageText = `[INST] ${message.content} [/INST]`; + } else if (message._getType() === "ai") { + messageText = message.content; + } else if (message._getType() === "system") { + messageText = `<> ${message.content} <>`; + } else if (ChatMessage.isInstance(message)) { + messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice( + 1 + )}: ${message.content}`; + } else { + console.warn( + `Unsupported message type passed to llama_cpp: "${message._getType()}"` + ); + messageText = ""; + } + return messageText; + }) + .join("\n"); + + return prompt; + } +} diff --git a/libs/langchain-community/src/chat_models/minimax.ts b/libs/langchain-community/src/chat_models/minimax.ts new file mode 100644 index 000000000000..fb5d9c422010 --- /dev/null +++ b/libs/langchain-community/src/chat_models/minimax.ts @@ -0,0 +1,881 @@ +import type { OpenAIClient } from "@langchain/openai"; + +import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + AIMessage, + BaseMessage, + ChatMessage, + HumanMessage, +} from "@langchain/core/messages"; +import { ChatResult, ChatGeneration } from "@langchain/core/outputs"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { StructuredTool } from "@langchain/core/tools"; +import { BaseFunctionCallOptions } from "@langchain/core/language_models/base"; +import { formatToOpenAIFunction } from "@langchain/openai"; + +/** + * Type representing the sender_type of a message in the Minimax chat model. + */ +export type MinimaxMessageRole = "BOT" | "USER" | "FUNCTION"; + +/** + * Interface representing a message in the Minimax chat model. + */ +interface MinimaxChatCompletionRequestMessage { + sender_type: MinimaxMessageRole; + sender_name?: string; + text: string; +} + +/** + * Interface representing a request for a chat completion. + */ +interface MinimaxChatCompletionRequest { + model: string; + messages: MinimaxChatCompletionRequestMessage[]; + stream?: boolean; + prompt?: string; + temperature?: number; + top_p?: number; + tokens_to_generate?: number; + skip_info_mask?: boolean; + mask_sensitive_info?: boolean; + beam_width?: number; + use_standard_sse?: boolean; + role_meta?: RoleMeta; + bot_setting?: BotSetting[]; + reply_constraints?: ReplyConstraints; + sample_messages?: MinimaxChatCompletionRequestMessage[]; + /** + * A list of functions the model may generate JSON inputs for. + * @type {Array} + */ + functions?: OpenAIClient.Chat.ChatCompletionCreateParams.Function[]; + plugins?: string[]; +} + +interface RoleMeta { + role_meta: string; + bot_name: string; +} + +interface RawGlyph { + type: "raw"; + raw_glyph: string; +} + +interface JsonGlyph { + type: "json_value"; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + json_properties: any; +} + +type ReplyConstraintsGlyph = RawGlyph | JsonGlyph; + +interface ReplyConstraints { + sender_type: string; + sender_name: string; + glyph?: ReplyConstraintsGlyph; +} + +interface BotSetting { + content: string; + bot_name: string; +} + +export declare interface ConfigurationParameters { + basePath?: string; + headers?: Record; +} + +/** + * Interface defining the input to the ChatMinimax class. + */ +declare interface MinimaxChatInputBase { + /** Model name to use + * @default "abab5.5-chat" + */ + modelName: string; + + /** Whether to stream the results or not. Defaults to false. */ + streaming?: boolean; + + prefixMessages?: MinimaxChatCompletionRequestMessage[]; + + /** + * API key to use when making requests. Defaults to the value of + * `MINIMAX_GROUP_ID` environment variable. + */ + minimaxGroupId?: string; + + /** + * Secret key to use when making requests. Defaults to the value of + * `MINIMAX_API_KEY` environment variable. + */ + minimaxApiKey?: string; + + /** Amount of randomness injected into the response. Ranges + * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / + * multiple choice, and temp closer to 1 for creative + * and generative tasks. Defaults to 0.95. + */ + temperature?: number; + + /** + * The smaller the sampling method, the more determinate the result; + * the larger the number, the more random the result. + */ + topP?: number; + + /** + * Enable Chatcompletion pro + */ + proVersion?: boolean; + + /** + * Pay attention to the maximum number of tokens generated, + * this parameter does not affect the generation effect of the model itself, + * but only realizes the function by truncating the tokens exceeding the limit. + * It is necessary to ensure that the number of tokens of the input context plus this value is less than 6144 or 16384, + * otherwise the request will fail. + */ + tokensToGenerate?: number; +} + +declare interface MinimaxChatInputNormal { + /** + * Dialogue setting, characters, or functionality setting. + */ + prompt?: string; + /** + * Sensitize text information in the output that may involve privacy issues, + * currently including but not limited to emails, domain names, + * links, ID numbers, home addresses, etc. Default false, ie. enable sensitization. + */ + skipInfoMask?: boolean; + + /** + * Whether to use the standard SSE format, when set to true, + * the streaming results will be separated by two line breaks. + * This parameter only takes effect when stream is set to true. + */ + useStandardSse?: boolean; + + /** + * If it is true, this indicates that the current request is set to continuation mode, + * and the response is a continuation of the last sentence in the incoming messages; + * at this time, the last sender is not limited to USER, it can also be BOT. + * Assuming the last sentence of incoming messages is {"sender_type": " U S E R", "text": "天生我材"}, + * the completion of the reply may be "It must be useful." + */ + continueLastMessage?: boolean; + + /** + * How many results to generate; the default is 1 and the maximum is not more than 4. + * Because beamWidth generates multiple results, it will consume more tokens. + */ + beamWidth?: number; + + /** + * Dialogue Metadata + */ + roleMeta?: RoleMeta; +} + +declare interface MinimaxChatInputPro extends MinimaxChatInputBase { + /** + * For the text information in the output that may involve privacy issues, + * code masking is currently included but not limited to emails, domains, links, ID numbers, home addresses, etc., + * with the default being true, that is, code masking is enabled. + */ + maskSensitiveInfo?: boolean; + + /** + * Default bot name + */ + defaultBotName?: string; + + /** + * Default user name + */ + defaultUserName?: string; + + /** + * Setting for each robot, only available for pro version. + */ + botSetting?: BotSetting[]; + + replyConstraints?: ReplyConstraints; +} + +type MinimaxChatInput = MinimaxChatInputNormal & MinimaxChatInputPro; + +/** + * Function that extracts the custom sender_type of a generic chat message. + * @param message Chat message from which to extract the custom sender_type. + * @returns The custom sender_type of the chat message. + */ +function extractGenericMessageCustomRole(message: ChatMessage) { + if (message.role !== "ai" && message.role !== "user") { + console.warn(`Unknown message role: ${message.role}`); + } + if (message.role === "ai") { + return "BOT" as MinimaxMessageRole; + } + if (message.role === "user") { + return "USER" as MinimaxMessageRole; + } + return message.role as MinimaxMessageRole; +} + +/** + * Function that converts a base message to a Minimax message sender_type. + * @param message Base message to convert. + * @returns The Minimax message sender_type. + */ +function messageToMinimaxRole(message: BaseMessage): MinimaxMessageRole { + const type = message._getType(); + switch (type) { + case "ai": + return "BOT"; + case "human": + return "USER"; + case "system": + throw new Error("System messages not supported"); + case "function": + return "FUNCTION"; + case "generic": { + if (!ChatMessage.isInstance(message)) + throw new Error("Invalid generic chat message"); + return extractGenericMessageCustomRole(message); + } + default: + throw new Error(`Unknown message type: ${type}`); + } +} + +export interface ChatMinimaxCallOptions extends BaseFunctionCallOptions { + tools?: StructuredTool[]; + defaultUserName?: string; + defaultBotName?: string; + plugins?: string[]; + botSetting?: BotSetting[]; + replyConstraints?: ReplyConstraints; + sampleMessages?: BaseMessage[]; +} + +/** + * Wrapper around Minimax large language models that use the Chat endpoint. + * + * To use you should have the `MINIMAX_GROUP_ID` and `MINIMAX_API_KEY` + * environment variable set. + * @example + * ```typescript + * // Define a chat prompt with a system message setting the context for translation + * const chatPrompt = ChatPromptTemplate.fromMessages([ + * SystemMessagePromptTemplate.fromTemplate( + * "You are a helpful assistant that translates {input_language} to {output_language}.", + * ), + * HumanMessagePromptTemplate.fromTemplate("{text}"), + * ]); + * + * // Create a new LLMChain with the chat model and the defined prompt + * const chainB = new LLMChain({ + * prompt: chatPrompt, + * llm: new ChatMinimax({ temperature: 0.01 }), + * }); + * + * // Call the chain with the input language, output language, and the text to translate + * const resB = await chainB.call({ + * input_language: "English", + * output_language: "Chinese", + * text: "I love programming.", + * }); + * + * // Log the result + * console.log({ resB }); + * + * ``` + */ +export class ChatMinimax + extends BaseChatModel + implements MinimaxChatInput +{ + static lc_name() { + return "ChatMinimax"; + } + + get callKeys(): (keyof ChatMinimaxCallOptions)[] { + return [ + ...(super.callKeys as (keyof ChatMinimaxCallOptions)[]), + "functions", + "tools", + "defaultBotName", + "defaultUserName", + "plugins", + "replyConstraints", + "botSetting", + "sampleMessages", + ]; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + minimaxApiKey: "MINIMAX_API_KEY", + minimaxGroupId: "MINIMAX_GROUP_ID", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + minimaxGroupId?: string; + + minimaxApiKey?: string; + + streaming = false; + + prompt?: string; + + modelName = "abab5.5-chat"; + + defaultBotName?: string = "Assistant"; + + defaultUserName?: string = "I"; + + prefixMessages?: MinimaxChatCompletionRequestMessage[]; + + apiUrl: string; + + basePath?: string = "https://api.minimax.chat/v1"; + + headers?: Record; + + temperature?: number = 0.9; + + topP?: number = 0.8; + + tokensToGenerate?: number; + + skipInfoMask?: boolean; + + proVersion?: boolean = true; + + beamWidth?: number; + + botSetting?: BotSetting[]; + + continueLastMessage?: boolean; + + maskSensitiveInfo?: boolean; + + roleMeta?: RoleMeta; + + useStandardSse?: boolean; + + replyConstraints?: ReplyConstraints; + + constructor( + fields?: Partial & + BaseChatModelParams & { + configuration?: ConfigurationParameters; + } + ) { + super(fields ?? {}); + + this.minimaxGroupId = + fields?.minimaxGroupId ?? getEnvironmentVariable("MINIMAX_GROUP_ID"); + if (!this.minimaxGroupId) { + throw new Error("Minimax GroupID not found"); + } + + this.minimaxApiKey = + fields?.minimaxApiKey ?? getEnvironmentVariable("MINIMAX_API_KEY"); + + if (!this.minimaxApiKey) { + throw new Error("Minimax ApiKey not found"); + } + + this.streaming = fields?.streaming ?? this.streaming; + this.prompt = fields?.prompt ?? this.prompt; + this.temperature = fields?.temperature ?? this.temperature; + this.topP = fields?.topP ?? this.topP; + this.skipInfoMask = fields?.skipInfoMask ?? this.skipInfoMask; + this.prefixMessages = fields?.prefixMessages ?? this.prefixMessages; + this.maskSensitiveInfo = + fields?.maskSensitiveInfo ?? this.maskSensitiveInfo; + this.beamWidth = fields?.beamWidth ?? this.beamWidth; + this.continueLastMessage = + fields?.continueLastMessage ?? this.continueLastMessage; + this.tokensToGenerate = fields?.tokensToGenerate ?? this.tokensToGenerate; + this.roleMeta = fields?.roleMeta ?? this.roleMeta; + this.botSetting = fields?.botSetting ?? this.botSetting; + this.useStandardSse = fields?.useStandardSse ?? this.useStandardSse; + this.replyConstraints = fields?.replyConstraints ?? this.replyConstraints; + this.defaultBotName = fields?.defaultBotName ?? this.defaultBotName; + + this.modelName = fields?.modelName ?? this.modelName; + this.basePath = fields?.configuration?.basePath ?? this.basePath; + this.headers = fields?.configuration?.headers ?? this.headers; + this.proVersion = fields?.proVersion ?? this.proVersion; + + const modelCompletion = this.proVersion + ? "chatcompletion_pro" + : "chatcompletion"; + this.apiUrl = `${this.basePath}/text/${modelCompletion}`; + } + + fallbackBotName(options?: this["ParsedCallOptions"]) { + let botName = options?.defaultBotName ?? this.defaultBotName ?? "Assistant"; + if (this.botSetting) { + botName = this.botSetting[0].bot_name; + } + return botName; + } + + defaultReplyConstraints(options?: this["ParsedCallOptions"]) { + const constraints = options?.replyConstraints ?? this.replyConstraints; + if (!constraints) { + let botName = + options?.defaultBotName ?? this.defaultBotName ?? "Assistant"; + if (this.botSetting) { + botName = this.botSetting[0].bot_name; + } + + return { + sender_type: "BOT", + sender_name: botName, + }; + } + return constraints; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams( + options?: this["ParsedCallOptions"] + ): Omit { + return { + model: this.modelName, + stream: this.streaming, + prompt: this.prompt, + temperature: this.temperature, + top_p: this.topP, + tokens_to_generate: this.tokensToGenerate, + skip_info_mask: this.skipInfoMask, + mask_sensitive_info: this.maskSensitiveInfo, + beam_width: this.beamWidth, + use_standard_sse: this.useStandardSse, + role_meta: this.roleMeta, + bot_setting: options?.botSetting ?? this.botSetting, + reply_constraints: this.defaultReplyConstraints(options), + sample_messages: this.messageToMinimaxMessage( + options?.sampleMessages, + options + ), + functions: + options?.functions ?? + (options?.tools + ? options?.tools.map(formatToOpenAIFunction) + : undefined), + plugins: options?.plugins, + }; + } + + /** + * Get the identifying parameters for the model + */ + identifyingParams() { + return { + ...this.invocationParams(), + }; + } + + /** + * Convert a list of messages to the format expected by the model. + * @param messages + * @param options + */ + messageToMinimaxMessage( + messages?: BaseMessage[], + options?: this["ParsedCallOptions"] + ): MinimaxChatCompletionRequestMessage[] | undefined { + return messages + ?.filter((message) => { + if (ChatMessage.isInstance(message)) { + return message.role !== "system"; + } + return message._getType() !== "system"; + }) + ?.map((message) => { + const sender_type = messageToMinimaxRole(message); + if (typeof message.content !== "string") { + throw new Error( + "ChatMinimax does not support non-string message content." + ); + } + return { + sender_type, + text: message.content, + sender_name: + message.name ?? + (sender_type === "BOT" + ? this.fallbackBotName() + : options?.defaultUserName ?? this.defaultUserName), + }; + }); + } + + /** @ignore */ + async _generate( + messages: BaseMessage[], + options?: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const tokenUsage = { totalTokens: 0 }; + this.botSettingFallback(options, messages); + + const params = this.invocationParams(options); + const messagesMapped: MinimaxChatCompletionRequestMessage[] = [ + ...(this.messageToMinimaxMessage(messages, options) ?? []), + ...(this.prefixMessages ?? []), + ]; + + const data = params.stream + ? await new Promise((resolve, reject) => { + let response: ChatCompletionResponse; + let rejected = false; + let resolved = false; + this.completionWithRetry( + { + ...params, + messages: messagesMapped, + }, + true, + options?.signal, + (event) => { + const data = JSON.parse(event.data); + + if (data?.error_code) { + if (rejected) { + return; + } + rejected = true; + reject(data); + return; + } + + const message = data as ChatCompletionResponse; + // on the first message set the response properties + + if (!message.choices[0].finish_reason) { + // the last stream message + let streamText; + if (this.proVersion) { + const messages = message.choices[0].messages ?? []; + streamText = messages[0].text; + } else { + streamText = message.choices[0].delta; + } + + // TODO this should pass part.index to the callback + // when that's supported there + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(streamText ?? ""); + return; + } + + response = message; + if (!this.proVersion) { + response.choices[0].text = message.reply; + } + + if (resolved || rejected) { + return; + } + resolved = true; + resolve(response); + } + ).catch((error) => { + if (!rejected) { + rejected = true; + reject(error); + } + }); + }) + : await this.completionWithRetry( + { + ...params, + messages: messagesMapped, + }, + false, + options?.signal + ); + + const { total_tokens: totalTokens } = data.usage ?? {}; + + if (totalTokens) { + tokenUsage.totalTokens = totalTokens; + } + + if (data.base_resp?.status_code !== 0) { + throw new Error(`Minimax API error: ${data.base_resp?.status_msg}`); + } + const generations: ChatGeneration[] = []; + + if (this.proVersion) { + for (const choice of data.choices) { + const messages = choice.messages ?? []; + // 取最后一条消息 + if (messages) { + const message = messages[messages.length - 1]; + const text = message?.text ?? ""; + generations.push({ + text, + message: minimaxResponseToChatMessage(message), + }); + } + } + } else { + for (const choice of data.choices) { + const text = choice?.text ?? ""; + generations.push({ + text, + message: minimaxResponseToChatMessage({ + sender_type: "BOT", + sender_name: + options?.defaultBotName ?? this.defaultBotName ?? "Assistant", + text, + }), + }); + } + } + return { + generations, + llmOutput: { tokenUsage }, + }; + } + + /** @ignore */ + async completionWithRetry( + request: MinimaxChatCompletionRequest, + stream: boolean, + signal?: AbortSignal, + onmessage?: (event: MessageEvent) => void + ) { + // The first run will get the accessToken + const makeCompletionRequest = async () => { + const url = `${this.apiUrl}?GroupId=${this.minimaxGroupId}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.minimaxApiKey}`, + ...this.headers, + }, + body: JSON.stringify(request), + signal, + }); + + if (!stream) { + const json = await response.json(); + return json as ChatCompletionResponse; + } else { + if (response.body) { + const reader = response.body.getReader(); + + const decoder = new TextDecoder("utf-8"); + let data = ""; + + let continueReading = true; + while (continueReading) { + const { done, value } = await reader.read(); + if (done) { + continueReading = false; + break; + } + data += decoder.decode(value); + + let continueProcessing = true; + while (continueProcessing) { + const newlineIndex = data.indexOf("\n"); + if (newlineIndex === -1) { + continueProcessing = false; + break; + } + const line = data.slice(0, newlineIndex); + data = data.slice(newlineIndex + 1); + + if (line.startsWith("data:")) { + const event = new MessageEvent("message", { + data: line.slice("data:".length).trim(), + }); + onmessage?.(event); + } + } + } + return {} as ChatCompletionResponse; + } + return {} as ChatCompletionResponse; + } + }; + return this.caller.call(makeCompletionRequest); + } + + _llmType() { + return "minimax"; + } + + /** @ignore */ + _combineLLMOutput() { + return []; + } + + private botSettingFallback( + options?: this["ParsedCallOptions"], + messages?: BaseMessage[] + ) { + const botSettings = options?.botSetting ?? this.botSetting; + if (!botSettings) { + const systemMessages = messages?.filter((message) => { + if (ChatMessage.isInstance(message)) { + return message.role === "system"; + } + return message._getType() === "system"; + }); + + // get the last system message + if (!systemMessages?.length) { + return; + } + const lastSystemMessage = systemMessages[systemMessages.length - 1]; + + if (typeof lastSystemMessage.content !== "string") { + throw new Error( + "ChatMinimax does not support non-string message content." + ); + } + + // setting the default botSetting. + this.botSetting = [ + { + content: lastSystemMessage.content, + bot_name: + options?.defaultBotName ?? this.defaultBotName ?? "Assistant", + }, + ]; + } + } +} + +function minimaxResponseToChatMessage( + message: ChatCompletionResponseMessage +): BaseMessage { + switch (message.sender_type) { + case "USER": + return new HumanMessage(message.text || ""); + case "BOT": + return new AIMessage(message.text || "", { + function_call: message.function_call, + }); + case "FUNCTION": + return new AIMessage(message.text || ""); + default: + return new ChatMessage( + message.text || "", + message.sender_type ?? "unknown" + ); + } +} + +/** ---Response Model---* */ +/** + * Interface representing a message responsed in the Minimax chat model. + */ +interface ChatCompletionResponseMessage { + sender_type: MinimaxMessageRole; + sender_name?: string; + text: string; + function_call?: ChatCompletionResponseMessageFunctionCall; +} + +/** + * Interface representing the usage of tokens in a chat completion. + */ +interface TokenUsage { + total_tokens?: number; +} + +interface BaseResp { + status_code?: number; + status_msg?: string; +} + +/** + * The name and arguments of a function that should be called, as generated by the model. + * @export + * @interface ChatCompletionResponseMessageFunctionCall + */ +export interface ChatCompletionResponseMessageFunctionCall { + /** + * The name of the function to call. + * @type {string} + * @memberof ChatCompletionResponseMessageFunctionCall + */ + name?: string; + /** + * The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. + * @type {string} + * @memberof ChatCompletionResponseMessageFunctionCall + */ + arguments?: string; +} + +/** + * + * @export + * @interface ChatCompletionResponseChoices + */ +export interface ChatCompletionResponseChoicesPro { + /** + * + * @type {string} + * @memberof ChatCompletionResponseChoices + */ + messages?: ChatCompletionResponseMessage[]; + + /** + * + * @type {string} + * @memberof ChatCompletionResponseChoices + */ + finish_reason?: string; +} + +interface ChatCompletionResponseChoices { + delta?: string; + text?: string; + index?: number; + finish_reason?: string; +} + +/** + * Interface representing a response from a chat completion. + */ +interface ChatCompletionResponse { + model: string; + created: number; + reply: string; + input_sensitive?: boolean; + input_sensitive_type?: number; + output_sensitive?: boolean; + output_sensitive_type?: number; + usage?: TokenUsage; + base_resp?: BaseResp; + choices: Array< + ChatCompletionResponseChoicesPro & ChatCompletionResponseChoices + >; +} diff --git a/libs/langchain-community/src/chat_models/ollama.ts b/libs/langchain-community/src/chat_models/ollama.ts new file mode 100644 index 000000000000..a7a1c8cda3b7 --- /dev/null +++ b/libs/langchain-community/src/chat_models/ollama.ts @@ -0,0 +1,301 @@ +import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessageChunk, + BaseMessage, + ChatMessage, +} from "@langchain/core/messages"; +import { ChatGenerationChunk } from "@langchain/core/outputs"; +import type { StringWithAutocomplete } from "@langchain/core/utils/types"; + +import { createOllamaStream, OllamaInput } from "../util/ollama.js"; + +/** + * An interface defining the options for an Ollama API call. It extends + * the BaseLanguageModelCallOptions interface. + */ +export interface OllamaCallOptions extends BaseLanguageModelCallOptions {} + +/** + * A class that enables calls to the Ollama API to access large language + * models in a chat-like fashion. It extends the SimpleChatModel class and + * implements the OllamaInput interface. + * @example + * ```typescript + * const prompt = ChatPromptTemplate.fromMessages([ + * [ + * "system", + * `You are an expert translator. Format all responses as JSON objects with two keys: "original" and "translated".`, + * ], + * ["human", `Translate "{input}" into {language}.`], + * ]); + * + * const model = new ChatOllama({ + * baseUrl: "http://api.example.com", + * model: "llama2", + * format: "json", + * }); + * + * const chain = prompt.pipe(model); + * + * const result = await chain.invoke({ + * input: "I love programming", + * language: "German", + * }); + * + * ``` + */ +export class ChatOllama + extends SimpleChatModel + implements OllamaInput +{ + static lc_name() { + return "ChatOllama"; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + model = "llama2"; + + baseUrl = "http://localhost:11434"; + + embeddingOnly?: boolean; + + f16KV?: boolean; + + frequencyPenalty?: number; + + logitsAll?: boolean; + + lowVram?: boolean; + + mainGpu?: number; + + mirostat?: number; + + mirostatEta?: number; + + mirostatTau?: number; + + numBatch?: number; + + numCtx?: number; + + numGpu?: number; + + numGqa?: number; + + numKeep?: number; + + numThread?: number; + + penalizeNewline?: boolean; + + presencePenalty?: number; + + repeatLastN?: number; + + repeatPenalty?: number; + + ropeFrequencyBase?: number; + + ropeFrequencyScale?: number; + + temperature?: number; + + stop?: string[]; + + tfsZ?: number; + + topK?: number; + + topP?: number; + + typicalP?: number; + + useMLock?: boolean; + + useMMap?: boolean; + + vocabOnly?: boolean; + + format?: StringWithAutocomplete<"json">; + + constructor(fields: OllamaInput & BaseChatModelParams) { + super(fields); + this.model = fields.model ?? this.model; + this.baseUrl = fields.baseUrl?.endsWith("/") + ? fields.baseUrl.slice(0, -1) + : fields.baseUrl ?? this.baseUrl; + this.embeddingOnly = fields.embeddingOnly; + this.f16KV = fields.f16KV; + this.frequencyPenalty = fields.frequencyPenalty; + this.logitsAll = fields.logitsAll; + this.lowVram = fields.lowVram; + this.mainGpu = fields.mainGpu; + this.mirostat = fields.mirostat; + this.mirostatEta = fields.mirostatEta; + this.mirostatTau = fields.mirostatTau; + this.numBatch = fields.numBatch; + this.numCtx = fields.numCtx; + this.numGpu = fields.numGpu; + this.numGqa = fields.numGqa; + this.numKeep = fields.numKeep; + this.numThread = fields.numThread; + this.penalizeNewline = fields.penalizeNewline; + this.presencePenalty = fields.presencePenalty; + this.repeatLastN = fields.repeatLastN; + this.repeatPenalty = fields.repeatPenalty; + this.ropeFrequencyBase = fields.ropeFrequencyBase; + this.ropeFrequencyScale = fields.ropeFrequencyScale; + this.temperature = fields.temperature; + this.stop = fields.stop; + this.tfsZ = fields.tfsZ; + this.topK = fields.topK; + this.topP = fields.topP; + this.typicalP = fields.typicalP; + this.useMLock = fields.useMLock; + this.useMMap = fields.useMMap; + this.vocabOnly = fields.vocabOnly; + this.format = fields.format; + } + + _llmType() { + return "ollama"; + } + + /** + * A method that returns the parameters for an Ollama API call. It + * includes model and options parameters. + * @param options Optional parsed call options. + * @returns An object containing the parameters for an Ollama API call. + */ + invocationParams(options?: this["ParsedCallOptions"]) { + return { + model: this.model, + format: this.format, + options: { + embedding_only: this.embeddingOnly, + f16_kv: this.f16KV, + frequency_penalty: this.frequencyPenalty, + logits_all: this.logitsAll, + low_vram: this.lowVram, + main_gpu: this.mainGpu, + mirostat: this.mirostat, + mirostat_eta: this.mirostatEta, + mirostat_tau: this.mirostatTau, + num_batch: this.numBatch, + num_ctx: this.numCtx, + num_gpu: this.numGpu, + num_gqa: this.numGqa, + num_keep: this.numKeep, + num_thread: this.numThread, + penalize_newline: this.penalizeNewline, + presence_penalty: this.presencePenalty, + repeat_last_n: this.repeatLastN, + repeat_penalty: this.repeatPenalty, + rope_frequency_base: this.ropeFrequencyBase, + rope_frequency_scale: this.ropeFrequencyScale, + temperature: this.temperature, + stop: options?.stop ?? this.stop, + tfs_z: this.tfsZ, + top_k: this.topK, + top_p: this.topP, + typical_p: this.typicalP, + use_mlock: this.useMLock, + use_mmap: this.useMMap, + vocab_only: this.vocabOnly, + }, + }; + } + + _combineLLMOutput() { + return {}; + } + + async *_streamResponseChunks( + input: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const stream = await this.caller.call(async () => + createOllamaStream( + this.baseUrl, + { + ...this.invocationParams(options), + prompt: this._formatMessagesAsPrompt(input), + }, + options + ) + ); + for await (const chunk of stream) { + if (!chunk.done) { + yield new ChatGenerationChunk({ + text: chunk.response, + message: new AIMessageChunk({ content: chunk.response }), + }); + await runManager?.handleLLMNewToken(chunk.response ?? ""); + } else { + yield new ChatGenerationChunk({ + text: "", + message: new AIMessageChunk({ content: "" }), + generationInfo: { + model: chunk.model, + total_duration: chunk.total_duration, + load_duration: chunk.load_duration, + prompt_eval_count: chunk.prompt_eval_count, + prompt_eval_duration: chunk.prompt_eval_duration, + eval_count: chunk.eval_count, + eval_duration: chunk.eval_duration, + }, + }); + } + } + } + + protected _formatMessagesAsPrompt(messages: BaseMessage[]): string { + const formattedMessages = messages + .map((message) => { + let messageText; + if (message._getType() === "human") { + messageText = `[INST] ${message.content} [/INST]`; + } else if (message._getType() === "ai") { + messageText = message.content; + } else if (message._getType() === "system") { + messageText = `<> ${message.content} <>`; + } else if (ChatMessage.isInstance(message)) { + messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice( + 1 + )}: ${message.content}`; + } else { + console.warn( + `Unsupported message type passed to Ollama: "${message._getType()}"` + ); + messageText = ""; + } + return messageText; + }) + .join("\n"); + return formattedMessages; + } + + /** @ignore */ + async _call( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const chunks = []; + for await (const chunk of this._streamResponseChunks( + messages, + options, + runManager + )) { + chunks.push(chunk.message.content); + } + return chunks.join(""); + } +} diff --git a/libs/langchain-community/src/chat_models/portkey.ts b/libs/langchain-community/src/chat_models/portkey.ts new file mode 100644 index 000000000000..e026a5a16ded --- /dev/null +++ b/libs/langchain-community/src/chat_models/portkey.ts @@ -0,0 +1,187 @@ +import { LLMOptions } from "portkey-ai"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + AIMessageChunk, + BaseMessage, + ChatMessage, + ChatMessageChunk, + FunctionMessageChunk, + HumanMessage, + HumanMessageChunk, + SystemMessage, + SystemMessageChunk, +} from "@langchain/core/messages"; +import { + ChatResult, + ChatGeneration, + ChatGenerationChunk +} from "@langchain/core/outputs"; +import { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +import { PortkeySession, getPortkeySession } from "../llms/portkey.js"; + +interface Message { + role?: string; + content?: string; +} + +function portkeyResponseToChatMessage(message: Message): BaseMessage { + switch (message.role) { + case "user": + return new HumanMessage(message.content || ""); + case "assistant": + return new AIMessage(message.content || ""); + case "system": + return new SystemMessage(message.content || ""); + default: + return new ChatMessage(message.content || "", message.role ?? "unknown"); + } +} + +function _convertDeltaToMessageChunk( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + delta: Record +) { + const { role } = delta; + const content = delta.content ?? ""; + let additional_kwargs; + if (delta.function_call) { + additional_kwargs = { + function_call: delta.function_call, + }; + } else { + additional_kwargs = {}; + } + if (role === "user") { + return new HumanMessageChunk({ content }); + } else if (role === "assistant") { + return new AIMessageChunk({ content, additional_kwargs }); + } else if (role === "system") { + return new SystemMessageChunk({ content }); + } else if (role === "function") { + return new FunctionMessageChunk({ + content, + additional_kwargs, + name: delta.name, + }); + } else { + return new ChatMessageChunk({ content, role }); + } +} + +export class PortkeyChat extends BaseChatModel { + apiKey?: string = undefined; + + baseURL?: string = undefined; + + mode?: string = undefined; + + llms?: [LLMOptions] | null = undefined; + + session: PortkeySession; + + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + + constructor(init?: Partial) { + super(init ?? {}); + this.apiKey = init?.apiKey; + this.baseURL = init?.baseURL; + this.mode = init?.mode; + this.llms = init?.llms; + this.session = getPortkeySession({ + apiKey: this.apiKey, + baseURL: this.baseURL, + llms: this.llms, + mode: this.mode, + }); + } + + _llmType() { + return "portkey"; + } + + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + _?: CallbackManagerForLLMRun + ): Promise { + const messagesList = messages.map((message) => { + if (typeof message.content !== "string") { + throw new Error( + "PortkeyChat does not support non-string message content." + ); + } + return { + role: message._getType() as string, + content: message.content, + }; + }); + const response = await this.session.portkey.chatCompletions.create({ + messages: messagesList, + ...options, + stream: false, + }); + const generations: ChatGeneration[] = []; + for (const data of response.choices ?? []) { + const text = data.message?.content ?? ""; + const generation: ChatGeneration = { + text, + message: portkeyResponseToChatMessage(data.message ?? {}), + }; + if (data.finish_reason) { + generation.generationInfo = { finish_reason: data.finish_reason }; + } + generations.push(generation); + } + + return { + generations, + }; + } + + async *_streamResponseChunks( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const messagesList = messages.map((message) => { + if (typeof message.content !== "string") { + throw new Error( + "PortkeyChat does not support non-string message content." + ); + } + return { + role: message._getType() as string, + content: message.content, + }; + }); + const response = await this.session.portkey.chatCompletions.create({ + messages: messagesList, + ...options, + stream: true, + }); + for await (const data of response) { + const choice = data?.choices[0]; + if (!choice) { + continue; + } + const chunk = new ChatGenerationChunk({ + message: _convertDeltaToMessageChunk(choice.delta ?? {}), + text: choice.message?.content ?? "", + generationInfo: { + finishReason: choice.finish_reason, + }, + }); + yield chunk; + void runManager?.handleLLMNewToken(chunk.text ?? ""); + } + if (options.signal?.aborted) { + throw new Error("AbortError"); + } + } + + _combineLLMOutput() { + return {}; + } +} diff --git a/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts new file mode 100644 index 000000000000..cfc572c9584d --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts @@ -0,0 +1,313 @@ +/* eslint-disable no-process-env */ + +import { expect, test } from "@jest/globals"; +import { HUMAN_PROMPT } from "@anthropic-ai/sdk"; +import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { ChatPromptValue } from "../../prompts/chat.js"; +import { + PromptTemplate, + ChatPromptTemplate, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ChatAnthropic } from "../anthropic.js"; +import { CallbackManager } from "../../callbacks/index.js"; + +test("Test ChatAnthropic", async () => { + const chat = new ChatAnthropic({ modelName: "claude-instant-v1" }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); + +test("Test ChatAnthropic Generate", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message], [message]]); + expect(res.generations.length).toBe(2); + for (const generation of res.generations) { + expect(generation.length).toBe(1); + for (const message of generation) { + console.log(message.text); + } + } + console.log({ res }); +}); + +test("Test ChatAnthropic Generate w/ ClientOptions", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + clientOptions: { + defaultHeaders: { + "Helicone-Auth": "HELICONE_API_KEY", + }, + }, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message], [message]]); + expect(res.generations.length).toBe(2); + for (const generation of res.generations) { + expect(generation.length).toBe(1); + for (const message of generation) { + console.log(message.text); + } + } + console.log({ res }); +}); + +test("Test ChatAnthropic Generate with a signal in call options", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + }); + const controller = new AbortController(); + const message = new HumanMessage( + "How is your day going? Be extremely verbose!" + ); + await expect(() => { + const res = chat.generate([[message], [message]], { + signal: controller.signal, + }); + setTimeout(() => { + controller.abort(); + }, 1000); + return res; + }).rejects.toThrow(); +}, 10000); + +test("Test ChatAnthropic tokenUsage with a batch", async () => { + const model = new ChatAnthropic({ + temperature: 0, + modelName: "claude-instant-v1", + }); + const res = await model.generate([ + [new HumanMessage(`Hello!`)], + [new HumanMessage(`Hi!`)], + ]); + console.log({ res }); +}); + +test("Test ChatAnthropic in streaming mode", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new ChatAnthropic({ + modelName: "claude-instant-v1", + streaming: true, + callbacks: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }), + }); + const message = new HumanMessage("Hello!"); + const res = await model.call([message]); + console.log({ res }); + + expect(nrNewTokens > 0).toBe(true); + expect(res.content).toBe(streamedCompletion); +}); + +test("Test ChatAnthropic in streaming mode with a signal", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new ChatAnthropic({ + modelName: "claude-instant-v1", + streaming: true, + callbacks: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }), + }); + const controller = new AbortController(); + const message = new HumanMessage( + "Hello! Give me an extremely verbose response" + ); + await expect(() => { + const res = model.call([message], { + signal: controller.signal, + }); + setTimeout(() => { + controller.abort(); + }, 500); + return res; + }).rejects.toThrow(); + + console.log({ nrNewTokens, streamedCompletion }); +}, 5000); + +test("Test ChatAnthropic prompt value", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generatePrompt([new ChatPromptValue([message])]); + expect(res.generations.length).toBe(1); + for (const generation of res.generations) { + for (const g of generation) { + console.log(g.text); + } + } + console.log({ res }); +}); + +test("ChatAnthropic, docs, prompt templates", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + temperature: 0, + }); + + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant that translates {input_language} to {output_language}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + input_language: "English", + output_language: "French", + text: "I love programming.", + }), + ]); + + console.log(responseA.generations); +}); + +test("ChatAnthropic, longer chain of messages", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-v1", + temperature: 0, + }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); +}); + +test("ChatAnthropic, Anthropic apiUrl set manually via constructor", async () => { + // Pass the default URL through (should use this, and work as normal) + const anthropicApiUrl = "https://api.anthropic.com"; + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + anthropicApiUrl, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); + +test("ChatAnthropic, Claude V2", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-2", + temperature: 0, + }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); +}); + +test("ChatAnthropic with specific roles in ChatMessage", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + maxTokensToSample: 10, + }); + const user_message = new ChatMessage("Hello!", HUMAN_PROMPT); + const res = await chat.call([user_message]); + console.log({ res }); +}); + +test("Test ChatAnthropic stream method", async () => { + const model = new ChatAnthropic({ + maxTokensToSample: 50, + modelName: "claude-instant-v1", + }); + const stream = await model.stream("Print hello world."); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test("Test ChatAnthropic stream method with abort", async () => { + await expect(async () => { + const model = new ChatAnthropic({ + maxTokensToSample: 500, + modelName: "claude-instant-v1", + }); + const stream = await model.stream( + "How is your day going? Be extremely verbose.", + { + signal: AbortSignal.timeout(1000), + } + ); + for await (const chunk of stream) { + console.log(chunk); + } + }).rejects.toThrow(); +}); + +test("Test ChatAnthropic stream method with early break", async () => { + const model = new ChatAnthropic({ + maxTokensToSample: 50, + modelName: "claude-instant-v1", + }); + const stream = await model.stream( + "How is your day going? Be extremely verbose." + ); + let i = 0; + for await (const chunk of stream) { + console.log(chunk); + i += 1; + if (i > 10) { + break; + } + } +}); + +test("Test ChatAnthropic headers passed through", async () => { + const chat = new ChatAnthropic({ + modelName: "claude-instant-v1", + anthropicApiKey: "NOT_REAL", + invocationKwargs: { + headers: { + "X-Api-Key": process.env.ANTHROPIC_API_KEY, + }, + }, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts new file mode 100644 index 000000000000..3608fd273c15 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts @@ -0,0 +1,136 @@ +import { test, expect } from "@jest/globals"; +import { ChatBaiduWenxin } from "../baiduwenxin.js"; +import { SystemMessage, HumanMessage } from "../../schema/index.js"; + +interface TestConfig { + modelName: string | undefined; + config: { + description?: string; + temperature?: number; + topP?: number; + penaltyScore?: number; + streaming?: boolean; + callbacks?: Array<{ + nrNewTokens?: number; + streamedCompletion?: string; + handleLLMNewToken?: (token: string) => Promise; + }>; + }; + system?: string; + message?: string; + shouldThrow?: boolean; +} + +const runTest = async ({ + modelName, + config, + system = "", + message = "Hello!", + shouldThrow = false, +}: TestConfig) => { + const description = `Test ChatBaiduWenxin ${modelName || "default model"} ${ + config.description || "" + }`.trim(); + let nrNewTokens = 0; + let streamedCompletion = ""; + if (config.streaming) { + // eslint-disable-next-line no-param-reassign + config.callbacks = [ + { + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }, + ]; + } + test.skip(description, async () => { + const chat = new ChatBaiduWenxin({ + modelName, + ...config, + }); + + const messages = []; + if (system) { + messages.push(new SystemMessage(system)); + } + messages.push(new HumanMessage(message)); + + if (shouldThrow) { + await expect(chat.call(messages)).rejects.toThrow(); + return; + } + + const res = await chat.call(messages); + console.log({ res }); + + if (config.streaming) { + expect(nrNewTokens > 0).toBe(true); + expect(res.text).toBe(streamedCompletion); + } + }); +}; + +const testConfigs: TestConfig[] = [ + { modelName: undefined, config: {} }, + { modelName: "ERNIE-Bot", config: {} }, + { + modelName: "ERNIE-Bot", + config: { description: "with temperature", temperature: 1 }, + }, + { modelName: "ERNIE-Bot", config: { description: "with topP", topP: 1 } }, + { + modelName: "ERNIE-Bot", + config: { description: "with penaltyScore", penaltyScore: 1 }, + }, + { + modelName: "ERNIE-Bot", + config: { + description: "in streaming mode", + streaming: true, + }, + message: "您好,请讲个长笑话", + }, + { + modelName: "ERNIE-Bot", + config: { + description: "illegal input should throw an error", + temperature: 0, + }, + shouldThrow: true, + }, + { + modelName: "ERNIE-Bot", + config: { + description: "illegal input in streaming mode should throw an error", + streaming: true, + temperature: 0, + }, + shouldThrow: true, + }, + { modelName: "ERNIE-Bot-turbo", config: {} }, + { + modelName: "ERNIE-Bot-turbo", + config: { + description: "in streaming mode", + streaming: true, + }, + message: "您好,请讲个长笑话", + }, + { + modelName: "ERNIE-Bot-turbo", + config: { + description: "with system message", + }, + system: "你是一个说文言文的人", + }, + { + modelName: "ERNIE-Bot-4", + config: {}, + }, +]; + +testConfigs.forEach((testConfig) => { + // eslint-disable-next-line no-void + void runTest(testConfig); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts new file mode 100644 index 000000000000..0c5db6bacb15 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts @@ -0,0 +1,186 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +import { test, expect } from "@jest/globals"; +import { BedrockChat } from "../bedrock/web.js"; +import { HumanMessage } from "../../schema/index.js"; + +// void testChatModel( +// "Test Bedrock chat model: Llama2 13B v1", +// "us-east-1", +// "meta.llama2-13b-chat-v1", +// "What is your name?" +// ); +// void testChatStreamingModel( +// "Test Bedrock streaming chat model: Llama2 13B v1", +// "us-east-1", +// "meta.llama2-13b-chat-v1", +// "What is your name and something about yourself?" +// ); + +void testChatModel( + "Test Bedrock chat model: Claude-v2", + "us-east-1", + "anthropic.claude-v2", + "What is your name?" +); +void testChatStreamingModel( + "Test Bedrock chat model streaming: Claude-v2", + "us-east-1", + "anthropic.claude-v2", + "What is your name and something about yourself?" +); + +void testChatHandleLLMNewToken( + "Test Bedrock chat model HandleLLMNewToken: Claude-v2", + "us-east-1", + "anthropic.claude-v2", + "What is your name and something about yourself?" +); +// void testChatHandleLLMNewToken( +// "Test Bedrock chat model HandleLLMNewToken: Llama2 13B v1", +// "us-east-1", +// "meta.llama2-13b-chat-v1", +// "What is your name and something about yourself?" +// ); + +/** + * Tests a BedrockChat model + * @param title The name of the test to run + * @param defaultRegion The AWS region to default back to if not set via environment + * @param model The model string to test + * @param message The prompt test to send to the LLM + */ +async function testChatModel( + title: string, + defaultRegion: string, + model: string, + message: string +) { + test(title, async () => { + const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; + + const bedrock = new BedrockChat({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const res = await bedrock.call([new HumanMessage(message)]); + console.log(res); + }); +} +/** + * Tests a BedrockChat model with a streaming response + * @param title The name of the test to run + * @param defaultRegion The AWS region to default back to if not set via environment + * @param model The model string to test + * @param message The prompt test to send to the LLM + */ +async function testChatStreamingModel( + title: string, + defaultRegion: string, + model: string, + message: string +) { + test(title, async () => { + const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; + + const bedrock = new BedrockChat({ + maxTokens: 200, + region, + model, + maxRetries: 0, + credentials: { + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const stream = await bedrock.stream([ + new HumanMessage({ + content: message, + }), + ]); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + }); +} +/** + * Tests a BedrockChat model with a streaming response using a new token callback + * @param title The name of the test to run + * @param defaultRegion The AWS region to default back to if not set via environment + * @param model The model string to test + * @param message The prompt test to send to the LLM + */ +async function testChatHandleLLMNewToken( + title: string, + defaultRegion: string, + model: string, + message: string +) { + test(title, async () => { + const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; + const tokens: string[] = []; + + const bedrock = new BedrockChat({ + maxTokens: 200, + region, + model, + maxRetries: 0, + credentials: { + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + streaming: true, + callbacks: [ + { + handleLLMNewToken: (token) => { + tokens.push(token); + }, + }, + ], + }); + const stream = await bedrock.call([new HumanMessage(message)]); + expect(tokens.length).toBeGreaterThan(1); + expect(stream.content).toEqual(tokens.join("")); + }); +} + +test.skip.each([ + "amazon.titan-text-express-v1", + // These models should be supported in the future + // "amazon.titan-text-lite-v1", + // "amazon.titan-text-agile-v1", +])("Test Bedrock base chat model: %s", async (model) => { + const region = process.env.BEDROCK_AWS_REGION ?? "us-east-1"; + + const bedrock = new BedrockChat({ + region, + model, + maxRetries: 0, + modelKwargs: {}, + credentials: { + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const res = await bedrock.call([new HumanMessage("What is your name?")]); + console.log(res); + + expect(res.content.length).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts new file mode 100644 index 000000000000..7d0b3357add5 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts @@ -0,0 +1,131 @@ +import { describe, test } from "@jest/globals"; +import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { + PromptTemplate, + ChatPromptTemplate, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ChatCloudflareWorkersAI } from "../cloudflare_workersai.js"; +import { getEnvironmentVariable } from "../../util/env.js"; + +describe("ChatCloudflareWorkersAI", () => { + test("call", async () => { + const chat = new ChatCloudflareWorkersAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("generate", async () => { + const chat = new ChatCloudflareWorkersAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message]]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("generate with streaming true", async () => { + const chat = new ChatCloudflareWorkersAI({ + streaming: true, + }); + const message = new HumanMessage("What is 2 + 2?"); + const tokens: string[] = []; + const res = await chat.generate([[message]], { + callbacks: [ + { + handleLLMNewToken: (token) => { + tokens.push(token); + }, + }, + ], + }); + expect(tokens.length).toBeGreaterThan(1); + expect(tokens.join("")).toEqual(res.generations[0][0].text); + }); + + test("stream", async () => { + const chat = new ChatCloudflareWorkersAI(); + const message = new HumanMessage("What is 2 + 2?"); + const stream = await chat.stream([message]); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk.content); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + console.log(chunks.map((chunk) => chunk.content).join("")); + expect( + chunks.map((chunk) => chunk.content).join("").length + ).toBeGreaterThan(1); + }); + + test("custom messages", async () => { + const chat = new ChatCloudflareWorkersAI(); + const res = await chat.call([new ChatMessage("Hello!", "user")]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("prompt templates", async () => { + const chat = new ChatCloudflareWorkersAI(); + + // PaLM doesn't support translation yet + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a {job}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + job: "pirate", + text: "What would be a good company name a company that makes colorful socks?", + }), + ]); + + console.log(responseA.generations); + }); + + test("longer chain of messages", async () => { + const chat = new ChatCloudflareWorkersAI(); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); + }); + + test.skip("custom base url", async () => { + const chat = new ChatCloudflareWorkersAI({ + baseUrl: `https://gateway.ai.cloudflare.com/v1/${getEnvironmentVariable( + "CLOUDFLARE_ACCOUNT_ID" + )}/lang-chainjs/workers-ai/`, + }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); + }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts new file mode 100644 index 000000000000..7a0e268d90dd --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts @@ -0,0 +1,73 @@ +import { describe, test } from "@jest/globals"; +import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { + PromptTemplate, + ChatPromptTemplate, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ChatFireworks } from "../fireworks.js"; + +describe("ChatFireworks", () => { + test("call", async () => { + const chat = new ChatFireworks(); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("generate", async () => { + const chat = new ChatFireworks(); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message]]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("custom messages", async () => { + const chat = new ChatFireworks(); + const res = await chat.call([new ChatMessage("Hello!", "user")]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("prompt templates", async () => { + const chat = new ChatFireworks(); + + // PaLM doesn't support translation yet + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a {job}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + job: "pirate", + text: "What would be a good company name a company that makes colorful socks?", + }), + ]); + + console.log(responseA.generations); + }); + + test("longer chain of messages", async () => { + const chat = new ChatFireworks(); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); + }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts new file mode 100644 index 000000000000..967335eb1072 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts @@ -0,0 +1,138 @@ +import { test } from "@jest/globals"; +import { HumanMessage, AIMessage } from "../../schema/index.js"; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ConversationChain } from "../../chains/conversation.js"; +import { BufferMemory } from "../../memory/buffer_memory.js"; +import { ChatGooglePaLM } from "../googlepalm.js"; + +test.skip("Test ChatGooglePalm", async () => { + const chat = new ChatGooglePaLM({ + maxRetries: 1, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); + +test.skip("Test ChatGooglePalm generate", async () => { + const chat = new ChatGooglePaLM({ + maxRetries: 1, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message]]); + console.log(JSON.stringify(res, null, 2)); +}); + +test.skip("ChatGooglePalm, prompt templates", async () => { + const chat = new ChatGooglePaLM({ + maxRetries: 1, + examples: [ + { + input: new HumanMessage("What is your favorite sock color?"), + output: new AIMessage("My favorite sock color be arrrr-ange!"), + }, + ], + }); + + // PaLM doesn't support translation yet + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a {job}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + job: "pirate", + text: "What would be a good company name a company that makes colorful socks?", + }), + ]); + + console.log(responseA.generations); +}); + +test.skip("ChatGooglePalm, longer chain of messages", async () => { + const chat = new ChatGooglePaLM({ + maxRetries: 1, + }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + AIMessagePromptTemplate.fromTemplate( + `Hello there! I'm Droid, your personal assistant.` + ), + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate( + `Nice to meet you, Joe! How can I help you today?` + ), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); +}); + +test.skip("ChatGooglePalm, with a memory in a chain", async () => { + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a pirate" + ), + new MessagesPlaceholder("history"), + HumanMessagePromptTemplate.fromTemplate("{input}"), + ]); + + const chain = new ConversationChain({ + memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), + prompt: chatPrompt, + llm: new ChatGooglePaLM({ + maxRetries: 1, + }), + }); + + const response = await chain.call({ + input: "Hi, my name is afirstenberg!", + }); + + console.log(response); + + const response2 = await chain.call({ + input: "What did I say my name was?", + }); + + console.log(response2); +}); + +test.skip("ChatGooglePalm, chain of messages on code", async () => { + const chat = new ChatGooglePaLM({ + maxRetries: 1, + }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + `Answer all questions using Python and just show the code without an explanation.` + ), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "How can I write a for loop counting to 10?", + }), + ]); + + console.log(JSON.stringify(responseA.generations, null, 1)); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts new file mode 100644 index 000000000000..11d99dd77f01 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts @@ -0,0 +1,252 @@ +import { protos } from "@google-ai/generativelanguage"; +import { expect, test } from "@jest/globals"; +import { + AIMessage, + BaseMessage, + HumanMessage, + SystemMessage, +} from "../../schema/index.js"; +import { ChatGooglePaLM } from "../googlepalm.js"; + +// Test class extending actual class to test private & protected methods +class ChatGooglePaLMTest extends ChatGooglePaLM { + public _getPalmContextInstruction(messages: BaseMessage[]) { + return super._getPalmContextInstruction(messages); + } + + public _mapBaseMessagesToPalmMessages(messages: BaseMessage[]) { + return super._mapBaseMessagesToPalmMessages(messages); + } + + public _mapPalmMessagesToChatResult( + msgRes: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse + ) { + return super._mapPalmMessagesToChatResult(msgRes); + } +} + +test("Google Palm Chat - `temperature` must be in range [0.0,1.0]", async () => { + expect( + () => + new ChatGooglePaLMTest({ + temperature: -1.0, + }) + ).toThrow(); + expect( + () => + new ChatGooglePaLMTest({ + temperature: 1.1, + }) + ).toThrow(); +}); + +test("Google Palm Chat - `topP` must be positive", async () => { + expect( + () => + new ChatGooglePaLMTest({ + topP: -1, + }) + ).toThrow(); +}); + +test("Google Palm Chat - `topK` must be positive", async () => { + expect( + () => + new ChatGooglePaLMTest({ + topK: -1, + }) + ).toThrow(); +}); + +test("Google Palm Chat - gets the Palm prompt context from 'system' messages", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("system-1"), + new AIMessage("ai-1"), + new HumanMessage("human-1"), + new SystemMessage("system-2"), + ]; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + const context = model._getPalmContextInstruction(messages); + expect(context).toBe("system-1"); +}); + +test("Google Palm Chat - maps `BaseMessage` to Palm message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("system-1"), + new AIMessage("ai-1"), + new HumanMessage("human-1"), + new AIMessage({ + content: "ai-2", + name: "droid", + additional_kwargs: { + citationSources: [ + { + startIndex: 0, + endIndex: 5, + uri: "https://example.com", + license: "MIT", + }, + ], + }, + }), + new HumanMessage({ + content: "human-2", + name: "skywalker", + }), + ]; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + const palmMessages = model._mapBaseMessagesToPalmMessages(messages); + expect(palmMessages.length).toEqual(4); + expect(palmMessages[0]).toEqual({ + author: "ai", + content: "ai-1", + citationMetadata: { + citationSources: undefined, + }, + }); + expect(palmMessages[1]).toEqual({ + author: "human", + content: "human-1", + citationMetadata: { + citationSources: undefined, + }, + }); + expect(palmMessages[2]).toEqual({ + author: "droid", + content: "ai-2", + citationMetadata: { + citationSources: [ + { + startIndex: 0, + endIndex: 5, + uri: "https://example.com", + license: "MIT", + }, + ], + }, + }); + expect(palmMessages[3]).toEqual({ + author: "skywalker", + content: "human-2", + citationMetadata: { + citationSources: undefined, + }, + }); +}); + +test("Google Palm Chat - removes 'system' messages while mapping `BaseMessage` to Palm message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("system-1"), + new AIMessage("ai-1"), + new HumanMessage("human-1"), + new SystemMessage("system-2"), + ]; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + const palmMessages = model._mapBaseMessagesToPalmMessages(messages); + expect(palmMessages.length).toEqual(2); + expect(palmMessages[0].content).toEqual("ai-1"); + expect(palmMessages[1].content).toEqual("human-1"); +}); + +test("Google Palm Chat - throws error for consecutive 'ai'/'human' messages while mapping `BaseMessage` to Palm message", async () => { + const messages: BaseMessage[] = [ + new AIMessage("ai-1"), + new HumanMessage("human-1"), + new AIMessage("ai-2"), + new HumanMessage("human-2"), + new HumanMessage("human-3"), + ]; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + expect(() => model._mapBaseMessagesToPalmMessages(messages)).toThrow(); +}); + +test("Google Palm Chat - maps Palm generated message to `AIMessage` chat result", async () => { + const generations: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse = + { + candidates: [ + { + author: "droid", + content: "ai-1", + citationMetadata: { + citationSources: [ + { + startIndex: 0, + endIndex: 5, + uri: "https://example.com", + license: "MIT", + }, + ], + }, + }, + ], + filters: [ + { + message: "potential problem", + reason: "SAFETY", + }, + ], + }; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + const chatResult = model._mapPalmMessagesToChatResult(generations); + expect(chatResult.generations.length).toEqual(1); + expect(chatResult.generations[0].text).toBe("ai-1"); + expect(chatResult.generations[0].message._getType()).toBe("ai"); + expect(chatResult.generations[0].message.name).toBe("droid"); + expect(chatResult.generations[0].message.content).toBe("ai-1"); + expect( + chatResult.generations[0].message.additional_kwargs.citationSources + ).toEqual([ + { + startIndex: 0, + endIndex: 5, + uri: "https://example.com", + license: "MIT", + }, + ]); + expect(chatResult.generations[0].message.additional_kwargs.filters).toEqual([ + { + message: "potential problem", + reason: "SAFETY", + }, + ]); +}); + +test("Google Palm Chat - gets empty chat result & reason if generation failed", async () => { + const generations: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse = + { + candidates: [], + filters: [ + { + message: "potential problem", + reason: "SAFETY", + }, + ], + }; + const model = new ChatGooglePaLMTest({ + apiKey: "GOOGLE_PALM_API_KEY", + }); + + const chatResult = model._mapPalmMessagesToChatResult(generations); + expect(chatResult.generations.length).toEqual(0); + expect(chatResult.llmOutput?.filters).toEqual([ + { + message: "potential problem", + reason: "SAFETY", + }, + ]); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts new file mode 100644 index 000000000000..f691ae72ce55 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts @@ -0,0 +1,145 @@ +import { describe, expect, test } from "@jest/globals"; +import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ConversationChain } from "../../chains/conversation.js"; +import { BufferMemory } from "../../memory/buffer_memory.js"; +import { ChatGoogleVertexAI } from "../googlevertexai/index.js"; + +describe("ChatGoogleVertexAI", () => { + test("call", async () => { + const chat = new ChatGoogleVertexAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("32k", async () => { + const chat = new ChatGoogleVertexAI({ + model: "chat-bison-32k", + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("generate", async () => { + const chat = new ChatGoogleVertexAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message]]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("custom messages", async () => { + const chat = new ChatGoogleVertexAI(); + const res = await chat.call([new ChatMessage("Hello!", "user")]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("prompt templates", async () => { + const chat = new ChatGoogleVertexAI(); + + // PaLM doesn't support translation yet + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a {job}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + job: "pirate", + text: "What would be a good company name a company that makes colorful socks?", + }), + ]); + + console.log(responseA.generations); + }); + + test("longer chain of messages", async () => { + const chat = new ChatGoogleVertexAI(); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); + }); + + test("with a memory in a chain", async () => { + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a pirate" + ), + new MessagesPlaceholder("history"), + HumanMessagePromptTemplate.fromTemplate("{input}"), + ]); + + const chain = new ConversationChain({ + memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), + prompt: chatPrompt, + llm: new ChatGoogleVertexAI(), + }); + + const response = await chain.call({ + input: "Hi, my name is afirstenberg!", + }); + + console.log(response); + + const response2 = await chain.call({ + input: "What did I say my name was?", + }); + + console.log(response2); + }); + + test("code, chain of messages", async () => { + const chat = new ChatGoogleVertexAI({ model: "codechat-bison" }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + `Answer all questions using Python and just show the code without an explanation.` + ), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "How can I write a for loop counting to 10?", + }), + ]); + + console.log(JSON.stringify(responseA.generations, null, 1)); + }); + + test("stream method", async () => { + const model = new ChatGoogleVertexAI(); + const stream = await model.stream( + "What is the answer to life, the universe, and everything? Be verbose." + ); + const chunks = []; + for await (const chunk of stream) { + console.log("chunk", chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts new file mode 100644 index 000000000000..fe05d8092f19 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts @@ -0,0 +1,116 @@ +import { test, expect } from "@jest/globals"; +import { + BaseMessage, + SystemMessage, + HumanMessage, + AIMessage, +} from "../../schema/index.js"; +import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/index.js"; + +test("Google messages", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI(); + const instance = model.createInstance(messages); + expect(instance.context).toBe(""); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("bot"); +}); + +test("Google messages with a system message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI(); + const instance = model.createInstance(messages); + expect(instance.context).toBe("System1"); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("bot"); +}); + +test("Google examples", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const examples: ChatExample[] = [ + { + input: new HumanMessage("Example Human1"), + output: new AIMessage("Example AI1"), + }, + ]; + const model = new ChatGoogleVertexAI({ + examples, + }); + const instance = model.createInstance(messages); + console.log(JSON.stringify(instance, null, 2)); + expect(instance.examples?.[0].input.author).toBe("user"); + expect(instance.examples?.[0].output.author).toBe("bot"); +}); + +test("Google Throw an error for input messages where SystemMessage is not first", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new SystemMessage("System1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI(); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google Throw an error for input messages where messages the same type of message occurs twice in a row", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new HumanMessage("Human2"), + new AIMessage("AI1"), + ]; + const model = new ChatGoogleVertexAI(); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google Throw an error for an even number of non-system input messages", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human2"), + new AIMessage("AI1"), + ]; + const model = new ChatGoogleVertexAI(); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google code messages", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ model: "codechat-bison" }); + const instance = model.createInstance(messages); + expect(instance.context).toBe(""); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("system"); +}); + +test("Google code messages with a system message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ model: "codechat-bison" }); + const instance = model.createInstance(messages); + expect(instance.context).toBe("System1"); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("system"); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts new file mode 100644 index 000000000000..4ed02cd8554c --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts @@ -0,0 +1,146 @@ +// Requires corepack: https://nodejs.org/api/corepack.html +// Enable with: +// $ corepack enable +import { describe, expect, test } from "@jest/globals"; +import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, + AIMessagePromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { ConversationChain } from "../../chains/conversation.js"; +import { BufferMemory } from "../../memory/buffer_memory.js"; +import { ChatGoogleVertexAI } from "../googlevertexai/web.js"; + +describe("ChatGoogleVertexAIWeb", () => { + test("call", async () => { + const chat = new ChatGoogleVertexAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("32k", async () => { + const chat = new ChatGoogleVertexAI({ + model: "chat-bison-32k", + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); + }); + + test("generate", async () => { + const chat = new ChatGoogleVertexAI(); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message]]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("custom messages", async () => { + const chat = new ChatGoogleVertexAI(); + const res = await chat.call([new ChatMessage("Hello!", "user")]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("prompt templates", async () => { + const chat = new ChatGoogleVertexAI(); + + // PaLM doesn't support translation yet + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a {job}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + job: "pirate", + text: "What would be a good company name a company that makes colorful socks?", + }), + ]); + + console.log(responseA.generations); + }); + + test("longer chain of messages", async () => { + const chat = new ChatGoogleVertexAI(); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), + AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "What did I just say my name was?", + }), + ]); + + console.log(responseA.generations); + }); + + test("with a memory in a chain", async () => { + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + "You are a helpful assistant who must always respond like a pirate" + ), + new MessagesPlaceholder("history"), + HumanMessagePromptTemplate.fromTemplate("{input}"), + ]); + + const chain = new ConversationChain({ + memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), + prompt: chatPrompt, + llm: new ChatGoogleVertexAI(), + }); + + const response = await chain.call({ + input: "Hi, my name is afirstenberg!", + }); + + console.log(response); + + const response2 = await chain.call({ + input: "What did I say my name was?", + }); + + console.log(response2); + }); + + test("code, chain of messages", async () => { + const chat = new ChatGoogleVertexAI({ model: "codechat-bison" }); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + SystemMessagePromptTemplate.fromTemplate( + `Answer all questions using Python and just show the code without an explanation.` + ), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + text: "How can I write a for loop counting to 10?", + }), + ]); + + console.log(JSON.stringify(responseA.generations, null, 1)); + }); + + test("stream method", async () => { + const model = new ChatGoogleVertexAI({}); + const stream = await model.stream("Print hello world."); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts new file mode 100644 index 000000000000..028265a89fcd --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts @@ -0,0 +1,149 @@ +import { test, expect } from "@jest/globals"; +import { + BaseMessage, + SystemMessage, + HumanMessage, + AIMessage, +} from "../../schema/index.js"; +import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/web.js"; + +test("Google messages", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ + authOptions: { + credentials: "{}", + }, + }); + const instance = model.createInstance(messages); + expect(instance.context).toBe(""); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("bot"); +}); + +test("Google messages with a system message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ + authOptions: { + credentials: "{}", + }, + }); + const instance = model.createInstance(messages); + expect(instance.context).toBe("System1"); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("bot"); +}); + +test("Google examples", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const examples: ChatExample[] = [ + { + input: new HumanMessage("Example Human1"), + output: new AIMessage("Example AI1"), + }, + ]; + const model = new ChatGoogleVertexAI({ + examples, + authOptions: { + credentials: "{}", + }, + }); + const instance = model.createInstance(messages); + console.log(JSON.stringify(instance, null, 2)); + expect(instance.examples?.[0].input.author).toBe("user"); + expect(instance.examples?.[0].output.author).toBe("bot"); +}); + +test("Google Throw an error for input messages where SystemMessage is not first", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new SystemMessage("System1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ + authOptions: { + credentials: "{}", + }, + }); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google Throw an error for input messages where messages the same type of message occurs twice in a row", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new HumanMessage("Human2"), + new AIMessage("AI1"), + ]; + const model = new ChatGoogleVertexAI({ + authOptions: { + credentials: "{}", + }, + }); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google Throw an error for an even number of non-system input messages", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human2"), + new AIMessage("AI1"), + ]; + const model = new ChatGoogleVertexAI({ + authOptions: { + credentials: "{}", + }, + }); + expect(() => model.createInstance(messages)).toThrow(); +}); + +test("Google code messages", async () => { + const messages: BaseMessage[] = [ + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ + model: "codechat-bison", + authOptions: { + credentials: "{}", + }, + }); + const instance = model.createInstance(messages); + expect(instance.context).toBe(""); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("system"); +}); + +test("Google code messages with a system message", async () => { + const messages: BaseMessage[] = [ + new SystemMessage("System1"), + new HumanMessage("Human1"), + new AIMessage("AI1"), + new HumanMessage("Human2"), + ]; + const model = new ChatGoogleVertexAI({ + model: "codechat-bison", + authOptions: { + credentials: "{}", + }, + }); + const instance = model.createInstance(messages); + expect(instance.context).toBe("System1"); + expect(instance.messages[0].author).toBe("user"); + expect(instance.messages[1].author).toBe("system"); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts new file mode 100644 index 000000000000..0e3135a88d1f --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts @@ -0,0 +1,12 @@ +import { HumanMessage } from "../../schema/index.js"; +import { ChatIflytekXinghuo } from "../iflytek_xinghuo/index.js"; + +test.skip("Iflytek Xinghuo Call", async () => { + const model = new ChatIflytekXinghuo({ + iflytekAppid: "", + iflytekApiKey: "", + iflytekApiSecret: "", + }); + const messages = [new HumanMessage("Nice to meet you!")]; + await model.call(messages); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts new file mode 100644 index 000000000000..69116823751b --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts @@ -0,0 +1,118 @@ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +import { test } from "@jest/globals"; +import { getEnvironmentVariable } from "../../util/env.js"; +import { ChatLlamaCpp } from "../llama_cpp.js"; +import { SystemMessage, AIMessage, HumanMessage } from "../../schema/index.js"; +import { LLMChain } from "../../chains/llm_chain.js"; +import { ConversationChain } from "../../chains/index.js"; +import { PromptTemplate } from "../../prompts/prompt.js"; +import { BufferMemory } from "../../memory/buffer_memory.js"; + +const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; + +test.skip("Test predict", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); + + const response = await llamaCpp.predict("Where do Llamas come from?"); + console.log({ response }); +}); + +test.skip("Test call", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); + + const response = await llamaCpp.call([ + new HumanMessage({ content: "My name is Nigel." }), + ]); + console.log({ response }); +}); + +test.skip("Test multiple messages", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); + + const response = await llamaCpp.call([ + new HumanMessage("My name is Nigel."), + new AIMessage( + "Hello Nigel! It is great to meet you, how can I help you today?" + ), + new HumanMessage("What did I say my name was?"), + ]); + console.log({ response }); +}); + +test.skip("Test system message", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); + + const response = await llamaCpp.call([ + new SystemMessage( + "You are a pirate, responses must be very verbose and in pirate dialect, add 'Arr, m'hearty!' to each sentence." + ), + new HumanMessage("Tell me where Llamas come from?"), + ]); + console.log({ response }); +}); + +test.skip("Test basic chain", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.5 }); + const prompt = PromptTemplate.fromTemplate( + "What is a good name for a company that makes {product}?" + ); + const chain = new LLMChain({ llm: llamaCpp, prompt }); + + const response = await chain.call({ product: "colorful socks" }); + + console.log({ response }); +}); + +test.skip("Test chain with memory", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); + + const chain = new ConversationChain({ + llm: llamaCpp, + memory: new BufferMemory(), + }); + + const response1 = await chain.call({ input: "My name is Nigel." }); + console.log({ response1 }); + + const response2 = await chain.call({ input: "What did I say my name was?" }); + console.log({ response2 }); + + const response3 = await chain.call({ input: "What is your name?" }); + console.log({ response3 }); +}); + +test.skip("test streaming call", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); + + const stream = await llamaCpp.stream( + "Tell me a short story about a happy Llama." + ); + + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk.content); + console.log(chunk.content); + } + + expect(chunks.length).toBeGreaterThan(1); +}); + +test.skip("test multi-mesage streaming call", async () => { + const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); + + const stream = await llamaCpp.stream([ + new SystemMessage( + "You are a pirate, responses must be very verbose and in pirate dialect." + ), + new HumanMessage("Tell me about Llamas?"), + ]); + + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk.content); + console.log(chunk.content); + } + + expect(chunks.length).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts new file mode 100644 index 000000000000..09ec23767ebe --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts @@ -0,0 +1,156 @@ +import { test } from "@jest/globals"; +import { ChatOllama } from "../ollama.js"; +import { AIMessage, HumanMessage } from "../../schema/index.js"; +import { LLMChain } from "../../chains/llm_chain.js"; +import { PromptTemplate } from "../../prompts/prompt.js"; +import { BufferMemory } from "../../memory/buffer_memory.js"; +import { + BytesOutputParser, + StringOutputParser, +} from "../../schema/output_parser.js"; + +test.skip("test call", async () => { + const ollama = new ChatOllama({}); + const result = await ollama.predict( + "What is a good name for a company that makes colorful socks?" + ); + console.log({ result }); +}); + +test.skip("test call with callback", async () => { + const ollama = new ChatOllama({ + baseUrl: "http://localhost:11434", + }); + const tokens: string[] = []; + const result = await ollama.predict( + "What is a good name for a company that makes colorful socks?", + { + callbacks: [ + { + handleLLMNewToken(token) { + tokens.push(token); + }, + }, + ], + } + ); + expect(tokens.length).toBeGreaterThan(1); + expect(result).toEqual(tokens.join("")); +}); + +test.skip("test streaming call", async () => { + const ollama = new ChatOllama({ + baseUrl: "http://localhost:11434", + }); + const stream = await ollama.stream( + `Translate "I love programming" into German.` + ); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test.skip("should abort the request", async () => { + const ollama = new ChatOllama({ + baseUrl: "http://localhost:11434", + }); + const controller = new AbortController(); + + await expect(() => { + const ret = ollama.predict("Respond with an extremely verbose response", { + signal: controller.signal, + }); + controller.abort(); + return ret; + }).rejects.toThrow("This operation was aborted"); +}); + +test.skip("Test multiple messages", async () => { + const model = new ChatOllama({ baseUrl: "http://localhost:11434" }); + const res = await model.call([ + new HumanMessage({ content: "My name is Jonas" }), + ]); + console.log({ res }); + const res2 = await model.call([ + new HumanMessage("My name is Jonas"), + new AIMessage( + "Hello Jonas! It's nice to meet you. Is there anything I can help you with?" + ), + new HumanMessage("What did I say my name was?"), + ]); + console.log({ res2 }); +}); + +test.skip("Test chain with memory", async () => { + const template = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. + +Current conversation: +{history} +Human: {input}`; + const model = new ChatOllama({ baseUrl: "http://localhost:11434" }); + const chain = new LLMChain({ + prompt: PromptTemplate.fromTemplate(template), + llm: model, + memory: new BufferMemory({}), + }); + const res = await chain.call({ input: "My name is Jonas" }); + console.log({ res }); + const res2 = await chain.call({ + input: "What did I say my name was?", + }); + console.log({ res2 }); + const res3 = await chain.call({ + input: "What is your name?", + }); + console.log({ res3 }); +}); + +test.skip("should stream through with a bytes output parser", async () => { + const TEMPLATE = `You are a pirate named Patchy. All responses must be extremely verbose and in pirate dialect. + + User: {input} + AI:`; + + // Infer the input variables from the template + const prompt = PromptTemplate.fromTemplate(TEMPLATE); + + const ollama = new ChatOllama({ + model: "llama2", + baseUrl: "http://127.0.0.1:11434", + }); + const outputParser = new BytesOutputParser(); + const chain = prompt.pipe(ollama).pipe(outputParser); + const stream = await chain.stream({ + input: `Translate "I love programming" into German.`, + }); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + console.log(chunks.join("")); + expect(chunks.length).toBeGreaterThan(1); +}); + +test.skip("JSON mode", async () => { + const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value. + + User: {input} + AI:`; + + // Infer the input variables from the template + const prompt = PromptTemplate.fromTemplate(TEMPLATE); + + const ollama = new ChatOllama({ + model: "llama2", + baseUrl: "http://127.0.0.1:11434", + format: "json", + }); + const outputParser = new StringOutputParser(); + const chain = prompt.pipe(ollama).pipe(outputParser); + const res = await chain.invoke({ + input: `Translate "I love programming" into German.`, + }); + expect(JSON.parse(res).response).toBeDefined(); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts new file mode 100644 index 000000000000..995da52a22f1 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts @@ -0,0 +1,176 @@ +import { test, expect } from "@jest/globals"; +import { ChatOpenAI } from "../openai.js"; +import { HumanMessage, ToolMessage } from "../../schema/index.js"; + +test("Test ChatOpenAI JSON mode", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo-1106", + maxTokens: 128, + }).bind({ + response_format: { + type: "json_object", + }, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.invoke([["system", "Only return JSON"], message]); + console.log(JSON.stringify(res)); +}); + +test("Test ChatOpenAI seed", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo-1106", + maxTokens: 128, + temperature: 1, + }).bind({ + seed: 123454930394983, + }); + const message = new HumanMessage("Say something random!"); + const res = await chat.invoke([message]); + console.log(JSON.stringify(res)); + const res2 = await chat.invoke([message]); + expect(res).toEqual(res2); +}); + +test("Test ChatOpenAI tool calling", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo-1106", + maxTokens: 128, + }).bind({ + tools: [ + { + type: "function", + function: { + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, + }, + required: ["location"], + }, + }, + }, + ], + tool_choice: "auto", + }); + const res = await chat.invoke([ + ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], + ]); + console.log(JSON.stringify(res)); + expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); +}); + +test("Test ChatOpenAI tool calling with ToolMessages", async () => { + function getCurrentWeather(location: string) { + if (location.toLowerCase().includes("tokyo")) { + return JSON.stringify({ location, temperature: "10", unit: "celsius" }); + } else if (location.toLowerCase().includes("san francisco")) { + return JSON.stringify({ + location, + temperature: "72", + unit: "fahrenheit", + }); + } else { + return JSON.stringify({ location, temperature: "22", unit: "celsius" }); + } + } + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo-1106", + maxTokens: 128, + }).bind({ + tools: [ + { + type: "function", + function: { + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, + }, + required: ["location"], + }, + }, + }, + ], + tool_choice: "auto", + }); + const res = await chat.invoke([ + ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], + ]); + console.log(JSON.stringify(res)); + expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const toolMessages = res.additional_kwargs.tool_calls!.map( + (toolCall) => + new ToolMessage({ + tool_call_id: toolCall.id, + name: toolCall.function.name, + content: getCurrentWeather( + JSON.parse(toolCall.function.arguments).location + ), + }) + ); + const finalResponse = await chat.invoke([ + ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], + res, + ...toolMessages, + ]); + console.log(finalResponse); +}); + +test("Test ChatOpenAI tool calling with streaming", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo-1106", + maxTokens: 256, + }).bind({ + tools: [ + { + type: "function", + function: { + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, + }, + required: ["location"], + }, + }, + }, + ], + tool_choice: "auto", + }); + const stream = await chat.stream([ + ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], + ]); + let finalChunk; + const chunks = []; + for await (const chunk of stream) { + console.log(chunk.additional_kwargs.tool_calls); + chunks.push(chunk); + if (!finalChunk) { + finalChunk = chunk; + } else { + finalChunk = finalChunk.concat(chunk); + } + } + expect(chunks.length).toBeGreaterThan(1); + console.log(finalChunk?.additional_kwargs.tool_calls); + expect(finalChunk?.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts new file mode 100644 index 000000000000..56dc7c381d25 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts @@ -0,0 +1,54 @@ +import { test } from "@jest/globals"; +import * as fs from "node:fs/promises"; +import { fileURLToPath } from "node:url"; +import * as path from "node:path"; +import { ChatOpenAI } from "../openai.js"; +import { HumanMessage } from "../../schema/index.js"; + +test("Test ChatOpenAI with a file", async () => { + const __filename = fileURLToPath(import.meta.url); + const __dirname = path.dirname(__filename); + const imageData = await fs.readFile(path.join(__dirname, "/data/hotdog.jpg")); + const chat = new ChatOpenAI({ + modelName: "gpt-4-vision-preview", + maxTokens: 1024, + }); + const message = new HumanMessage({ + content: [ + { + type: "text", + text: "What's in this image?", + }, + { + type: "image_url", + image_url: { + url: `data:image/jpeg;base64,${imageData.toString("base64")}`, + }, + }, + ], + }); + const res = await chat.invoke([message]); + console.log({ res }); +}); + +test("Test ChatOpenAI with a URL", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-4-vision-preview", + maxTokens: 1024, + }); + const message = new HumanMessage({ + content: [ + { + type: "text", + text: "What does this image say?", + }, + { + type: "image_url", + image_url: + "https://www.freecodecamp.org/news/content/images/2023/05/Screenshot-2023-05-29-at-5.40.38-PM.png", + }, + ], + }); + const res = await chat.invoke([message]); + console.log({ res }); +}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts new file mode 100644 index 000000000000..5d712f2b3bb2 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts @@ -0,0 +1,777 @@ +import { test, jest, expect } from "@jest/globals"; +import { ChatOpenAI } from "../openai.js"; +import { + BaseMessage, + ChatMessage, + ChatGeneration, + HumanMessage, + LLMResult, + SystemMessage, +} from "../../schema/index.js"; +import { ChatPromptValue } from "../../prompts/chat.js"; +import { + PromptTemplate, + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; +import { CallbackManager } from "../../callbacks/index.js"; +import { NewTokenIndices } from "../../callbacks/base.js"; +import { InMemoryCache } from "../../cache/index.js"; + +test("Test ChatOpenAI", async () => { + const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); + +test("Test ChatOpenAI with SystemChatMessage", async () => { + const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); + const system_message = new SystemMessage("You are to chat with a user."); + const message = new HumanMessage("Hello!"); + const res = await chat.call([system_message, message]); + console.log({ res }); +}); + +test("Test ChatOpenAI Generate", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 10, + n: 2, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message], [message]]); + expect(res.generations.length).toBe(2); + for (const generation of res.generations) { + expect(generation.length).toBe(2); + for (const message of generation) { + console.log(message.text); + expect(typeof message.text).toBe("string"); + } + } + console.log({ res }); +}); + +test("Test ChatOpenAI Generate throws when one of the calls fails", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 10, + n: 2, + }); + const message = new HumanMessage("Hello!"); + await expect(() => + chat.generate([[message], [message]], { + signal: AbortSignal.timeout(10), + }) + ).rejects.toThrow(); +}); + +test("Test ChatOpenAI tokenUsage", async () => { + let tokenUsage = { + completionTokens: 0, + promptTokens: 0, + totalTokens: 0, + }; + + const model = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 10, + callbackManager: CallbackManager.fromHandlers({ + async handleLLMEnd(output: LLMResult) { + tokenUsage = output.llmOutput?.tokenUsage; + }, + }), + }); + const message = new HumanMessage("Hello"); + const res = await model.call([message]); + console.log({ res }); + + expect(tokenUsage.promptTokens).toBeGreaterThan(0); +}); + +test("Test ChatOpenAI tokenUsage with a batch", async () => { + let tokenUsage = { + completionTokens: 0, + promptTokens: 0, + totalTokens: 0, + }; + + const model = new ChatOpenAI({ + temperature: 0, + modelName: "gpt-3.5-turbo", + callbackManager: CallbackManager.fromHandlers({ + async handleLLMEnd(output: LLMResult) { + tokenUsage = output.llmOutput?.tokenUsage; + }, + }), + }); + const res = await model.generate([ + [new HumanMessage("Hello")], + [new HumanMessage("Hi")], + ]); + console.log(res); + + expect(tokenUsage.promptTokens).toBeGreaterThan(0); +}); + +test("Test ChatOpenAI in streaming mode", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: true, + maxTokens: 10, + callbacks: [ + { + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }, + ], + }); + const message = new HumanMessage("Hello!"); + const result = await model.call([message]); + console.log(result); + + expect(nrNewTokens > 0).toBe(true); + expect(result.content).toBe(streamedCompletion); +}, 10000); + +test("Test ChatOpenAI in streaming mode with n > 1 and multiple prompts", async () => { + let nrNewTokens = 0; + const streamedCompletions = [ + ["", ""], + ["", ""], + ]; + + const model = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: true, + maxTokens: 10, + n: 2, + callbacks: [ + { + async handleLLMNewToken(token: string, idx: NewTokenIndices) { + nrNewTokens += 1; + streamedCompletions[idx.prompt][idx.completion] += token; + }, + }, + ], + }); + const message1 = new HumanMessage("Hello!"); + const message2 = new HumanMessage("Bye!"); + const result = await model.generate([[message1], [message2]]); + console.log(result.generations); + + expect(nrNewTokens > 0).toBe(true); + expect(result.generations.map((g) => g.map((gg) => gg.text))).toEqual( + streamedCompletions + ); +}, 10000); + +test("Test ChatOpenAI prompt value", async () => { + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 10, + n: 2, + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generatePrompt([new ChatPromptValue([message])]); + expect(res.generations.length).toBe(1); + for (const generation of res.generations) { + expect(generation.length).toBe(2); + for (const g of generation) { + console.log(g.text); + } + } + console.log({ res }); +}); + +test("OpenAI Chat, docs, prompt templates", async () => { + const chat = new ChatOpenAI({ temperature: 0, maxTokens: 10 }); + + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant that translates {input_language} to {output_language}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + input_language: "English", + output_language: "French", + text: "I love programming.", + }), + ]); + + console.log(responseA.generations); +}, 5000); + +test("Test OpenAI with stop", async () => { + const model = new ChatOpenAI({ maxTokens: 5 }); + const res = await model.call( + [new HumanMessage("Print hello world")], + ["world"] + ); + console.log({ res }); +}); + +test("Test OpenAI with stop in object", async () => { + const model = new ChatOpenAI({ maxTokens: 5 }); + const res = await model.call([new HumanMessage("Print hello world")], { + stop: ["world"], + }); + console.log({ res }); +}); + +test("Test OpenAI with timeout in call options", async () => { + const model = new ChatOpenAI({ maxTokens: 5 }); + await expect(() => + model.call([new HumanMessage("Print hello world")], { + options: { timeout: 10 }, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with timeout in call options and node adapter", async () => { + const model = new ChatOpenAI({ maxTokens: 5 }); + await expect(() => + model.call([new HumanMessage("Print hello world")], { + options: { timeout: 10 }, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options", async () => { + const model = new ChatOpenAI({ maxTokens: 5 }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call([new HumanMessage("Print hello world")], { + options: { signal: controller.signal }, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options and node adapter", async () => { + const model = new ChatOpenAI({ maxTokens: 5, modelName: "text-ada-001" }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call([new HumanMessage("Print hello world")], { + options: { signal: controller.signal }, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +function createSystemChatMessage(text: string, name?: string) { + const msg = new SystemMessage(text); + msg.name = name; + return msg; +} + +function createSampleMessages(): BaseMessage[] { + // same example as in https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb + return [ + createSystemChatMessage( + "You are a helpful, pattern-following assistant that translates corporate jargon into plain English." + ), + createSystemChatMessage( + "New synergies will help drive top-line growth.", + "example_user" + ), + createSystemChatMessage( + "Things working well together will increase revenue.", + "example_assistant" + ), + createSystemChatMessage( + "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", + "example_user" + ), + createSystemChatMessage( + "Let's talk later when we're less busy about how to do better.", + "example_assistant" + ), + new HumanMessage( + "This late pivot means we don't have time to boil the ocean for the client deliverable." + ), + ]; +} + +test("getNumTokensFromMessages gpt-3.5-turbo-0301 model for sample input", async () => { + const messages: BaseMessage[] = createSampleMessages(); + + const chat = new ChatOpenAI({ + openAIApiKey: "dummy", + modelName: "gpt-3.5-turbo-0301", + }); + + const { totalCount } = await chat.getNumTokensFromMessages(messages); + + expect(totalCount).toBe(127); +}); + +test("getNumTokensFromMessages gpt-4-0314 model for sample input", async () => { + const messages: BaseMessage[] = createSampleMessages(); + + const chat = new ChatOpenAI({ + openAIApiKey: "dummy", + modelName: "gpt-4-0314", + }); + + const { totalCount } = await chat.getNumTokensFromMessages(messages); + + expect(totalCount).toBe(129); +}); + +test("Test OpenAI with specific roles in ChatMessage", async () => { + const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); + const system_message = new ChatMessage( + "You are to chat with a user.", + "system" + ); + const user_message = new ChatMessage("Hello!", "user"); + const res = await chat.call([system_message, user_message]); + console.log({ res }); +}); + +test("Test ChatOpenAI stream method", async () => { + const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream("Print hello world."); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test("Test ChatOpenAI stream method with abort", async () => { + await expect(async () => { + const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream( + "How is your day going? Be extremely verbose.", + { + signal: AbortSignal.timeout(1000), + } + ); + for await (const chunk of stream) { + console.log(chunk); + } + }).rejects.toThrow(); +}); + +test("Test ChatOpenAI stream method with early break", async () => { + const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream( + "How is your day going? Be extremely verbose." + ); + let i = 0; + for await (const chunk of stream) { + console.log(chunk); + i += 1; + if (i > 10) { + break; + } + } +}); + +test("Test ChatOpenAI stream method, timeout error thrown from SDK", async () => { + await expect(async () => { + const model = new ChatOpenAI({ + maxTokens: 50, + modelName: "gpt-3.5-turbo", + timeout: 1, + }); + const stream = await model.stream( + "How is your day going? Be extremely verbose." + ); + for await (const chunk of stream) { + console.log(chunk); + } + }).rejects.toThrow(); +}); + +test("Function calling with streaming", async () => { + let finalResult: BaseMessage | undefined; + const modelForFunctionCalling = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + temperature: 0, + callbacks: [ + { + handleLLMEnd(output: LLMResult) { + finalResult = (output.generations[0][0] as ChatGeneration).message; + }, + }, + ], + }); + + const stream = await modelForFunctionCalling.stream( + "What is the weather in New York?", + { + functions: [ + { + name: "get_current_weather", + description: "Get the current weather in a given location", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA", + }, + unit: { type: "string", enum: ["celsius", "fahrenheit"] }, + }, + required: ["location"], + }, + }, + ], + function_call: { + name: "get_current_weather", + }, + } + ); + + const chunks = []; + let streamedOutput; + for await (const chunk of stream) { + chunks.push(chunk); + if (!streamedOutput) { + streamedOutput = chunk; + } else if (chunk) { + streamedOutput = streamedOutput.concat(chunk); + } + } + + expect(finalResult).toEqual(streamedOutput); + expect(chunks.length).toBeGreaterThan(1); + expect(finalResult?.additional_kwargs?.function_call?.name).toBe( + "get_current_weather" + ); + console.log( + JSON.parse(finalResult?.additional_kwargs?.function_call?.arguments ?? "") + .location + ); +}); + +test("ChatOpenAI can cache generations", async () => { + const memoryCache = new InMemoryCache(); + const lookupSpy = jest.spyOn(memoryCache, "lookup"); + const updateSpy = jest.spyOn(memoryCache, "update"); + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 10, + n: 2, + cache: memoryCache, + }); + const message = new HumanMessage("Hello"); + const res = await chat.generate([[message], [message]]); + expect(res.generations.length).toBe(2); + + expect(lookupSpy).toHaveBeenCalledTimes(2); + expect(updateSpy).toHaveBeenCalledTimes(2); + + lookupSpy.mockRestore(); + updateSpy.mockRestore(); +}); + +test("ChatOpenAI can write and read cached generations", async () => { + const memoryCache = new InMemoryCache(); + const lookupSpy = jest.spyOn(memoryCache, "lookup"); + const updateSpy = jest.spyOn(memoryCache, "update"); + + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 100, + n: 1, + cache: memoryCache, + }); + const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); + + const messages = [ + [ + new HumanMessage("what color is the sky?"), + new HumanMessage("what color is the ocean?"), + ], + [new HumanMessage("hello")], + ]; + + const response1 = await chat.generate(messages); + expect(generateUncachedSpy).toHaveBeenCalledTimes(1); + generateUncachedSpy.mockRestore(); + + const response2 = await chat.generate(messages); + expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should be cached, no need to generate. + generateUncachedSpy.mockRestore(); + + expect(response1.generations.length).toBe(2); + expect(response2.generations).toEqual(response1.generations); + expect(lookupSpy).toHaveBeenCalledTimes(4); + expect(updateSpy).toHaveBeenCalledTimes(2); + + lookupSpy.mockRestore(); + updateSpy.mockRestore(); +}); + +test("ChatOpenAI should not reuse cache if function call args have changed", async () => { + const memoryCache = new InMemoryCache(); + const lookupSpy = jest.spyOn(memoryCache, "lookup"); + const updateSpy = jest.spyOn(memoryCache, "update"); + + const chat = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + maxTokens: 100, + n: 1, + cache: memoryCache, + }); + + const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); + + const messages = [ + [ + new HumanMessage("what color is the sky?"), + new HumanMessage("what color is the ocean?"), + ], + [new HumanMessage("hello")], + ]; + + const response1 = await chat.generate(messages); + expect(generateUncachedSpy).toHaveBeenCalledTimes(1); + generateUncachedSpy.mockRestore(); + + const response2 = await chat.generate(messages, { + functions: [ + { + name: "extractor", + description: "Extract fields from the input", + parameters: { + type: "object", + properties: { + tone: { + type: "string", + description: "the tone of the input", + }, + }, + required: ["tone"], + }, + }, + ], + function_call: { + name: "extractor", + }, + }); + + expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should not be cached since it's being called with different function call args + + expect(response1.generations.length).toBe(2); + expect( + (response2.generations[0][0] as ChatGeneration).message.additional_kwargs + .function_call?.name ?? "" + ).toEqual("extractor"); + + const response3 = await chat.generate(messages, { + functions: [ + { + name: "extractor", + description: "Extract fields from the input", + parameters: { + type: "object", + properties: { + tone: { + type: "string", + description: "the tone of the input", + }, + }, + required: ["tone"], + }, + }, + ], + function_call: { + name: "extractor", + }, + }); + + expect(response2.generations).toEqual(response3.generations); + + expect(lookupSpy).toHaveBeenCalledTimes(6); + expect(updateSpy).toHaveBeenCalledTimes(4); + + lookupSpy.mockRestore(); + updateSpy.mockRestore(); +}); + +test("Test ChatOpenAI token usage reporting for streaming function calls", async () => { + let streamingTokenUsed = -1; + let nonStreamingTokenUsed = -1; + + const humanMessage = "What a beautiful day!"; + const extractionFunctionSchema = { + name: "extractor", + description: "Extracts fields from the input.", + parameters: { + type: "object", + properties: { + tone: { + type: "string", + enum: ["positive", "negative"], + description: "The overall tone of the input", + }, + word_count: { + type: "number", + description: "The number of words in the input", + }, + chat_response: { + type: "string", + description: "A response to the human's input", + }, + }, + required: ["tone", "word_count", "chat_response"], + }, + }; + + const streamingModel = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: true, + maxRetries: 10, + maxConcurrency: 10, + temperature: 0, + topP: 0, + callbacks: [ + { + handleLLMEnd: async (output) => { + streamingTokenUsed = + output.llmOutput?.estimatedTokenUsage?.totalTokens; + console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); + }, + handleLLMError: async (err) => { + console.error(err); + }, + }, + ], + }).bind({ + functions: [extractionFunctionSchema], + function_call: { name: "extractor" }, + }); + + const nonStreamingModel = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: false, + maxRetries: 10, + maxConcurrency: 10, + temperature: 0, + topP: 0, + callbacks: [ + { + handleLLMEnd: async (output) => { + nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; + console.log("non-streaming usage", output.llmOutput?.tokenUsage); + }, + handleLLMError: async (err) => { + console.error(err); + }, + }, + ], + }).bind({ + functions: [extractionFunctionSchema], + function_call: { name: "extractor" }, + }); + + const [nonStreamingResult, streamingResult] = await Promise.all([ + nonStreamingModel.invoke([new HumanMessage(humanMessage)]), + streamingModel.invoke([new HumanMessage(humanMessage)]), + ]); + + if ( + nonStreamingResult.additional_kwargs.function_call?.arguments && + streamingResult.additional_kwargs.function_call?.arguments + ) { + const nonStreamingArguments = JSON.stringify( + JSON.parse(nonStreamingResult.additional_kwargs.function_call.arguments) + ); + const streamingArguments = JSON.stringify( + JSON.parse(streamingResult.additional_kwargs.function_call.arguments) + ); + if (nonStreamingArguments === streamingArguments) { + expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); + } + } + + expect(streamingTokenUsed).toBeGreaterThan(-1); +}); + +test("Test ChatOpenAI token usage reporting for streaming calls", async () => { + let streamingTokenUsed = -1; + let nonStreamingTokenUsed = -1; + const systemPrompt = "You are a helpful assistant"; + const question = "What is the color of the night sky?"; + + const streamingModel = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: true, + maxRetries: 10, + maxConcurrency: 10, + temperature: 0, + topP: 0, + callbacks: [ + { + handleLLMEnd: async (output) => { + streamingTokenUsed = + output.llmOutput?.estimatedTokenUsage?.totalTokens; + console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); + }, + handleLLMError: async (err) => { + console.error(err); + }, + }, + ], + }); + + const nonStreamingModel = new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: false, + maxRetries: 10, + maxConcurrency: 10, + temperature: 0, + topP: 0, + callbacks: [ + { + handleLLMEnd: async (output) => { + nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; + console.log("non-streaming usage", output.llmOutput?.estimated); + }, + handleLLMError: async (err) => { + console.error(err); + }, + }, + ], + }); + + const [nonStreamingResult, streamingResult] = await Promise.all([ + nonStreamingModel.generate([ + [new SystemMessage(systemPrompt), new HumanMessage(question)], + ]), + streamingModel.generate([ + [new SystemMessage(systemPrompt), new HumanMessage(question)], + ]), + ]); + + expect(streamingTokenUsed).toBeGreaterThan(-1); + if ( + nonStreamingResult.generations[0][0].text === + streamingResult.generations[0][0].text + ) { + expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); + } +}); diff --git a/libs/langchain-community/src/chat_models/tests/data/hotdog.jpg b/libs/langchain-community/src/chat_models/tests/data/hotdog.jpg new file mode 100644 index 0000000000000000000000000000000000000000..dfab265903befae368bf814d734a6bee158232aa GIT binary patch literal 28191 zcmeFXcTk&6vNtLWI0ME+6K%j?lXDbcn`E-dAd<<*LWqpWPM9nVm>{CbQDB0|!6u2E z5lMi_28;|M=gYgh-}%m0ch9+J|G0IlZf#ddnrEixr)PS4X1aU&a`N&g;GUYYsxpA+ zig<~50f5T|fT0o`Y!3ivY67?c0KiQE2@wN;^r}R3^^qfD{ExCS(Gvji-|DXc0N1Gg zQT~hTZ}b=1Uw!`md$|SRedP%AfVn%uTpo(>3j!oxsA`gs{Ed}@i3kKBW_+x|bj5!S zKyvjKci%aBc(_Ol2spd*TiL>_?f7kAPyx7=i+~Wnpa1|U4|lP$f!KLGw6=2qJIiwY zX=vek2)32wG7{4i)O1m>a|FNib+gm+)zY`|h1f{ha>>ix2EwJ`P#373ht)$k)XCXh z8ZOKAH*o2z@?X^gTo3=o;sKH6QiWPM*hx$OB@BHS%;}+!Ait0x*A$FCJNp&F?T3HEP_%Qm zaRa+}fML!L|H8DghIx9(a&g^$_z!3gu>JqW{y*gKSIgf7afSBqA@Hw9X$;4>@6=z(1@04|e1K%`Yb+C@3KG5B~pL{~ydI{{{1ZuD=?Uz+c1oPs9435A|wV zG&TQ2j{nD4|JR&Dp#py&?caE%6=61>P&;Q2RYkeKPoXW$25c+s?g6ubNK4p>S=&ik ziSgMA3fuCD3Rw&BSqa%l@!8tg2-}HC2-}I+O8^D_Z$N*I_wSXe&h8#o&Ng;e=vNi| z;45T7TR}-dDPc)IdvO~PK2cF&2|g)XYbib}L3;^nD`9af8!M~-h54Vl|5LI2T~b#{ zbfr!L|DlwB&E^05^zT3Umt_2Jas69d|B?j$CFXx?*T2Q}FG=8EV*a;w{aalBk_7%G z=KmjQ*Z+1W?3}NB2=6Or;c^C`03anMAt50qz4{_0CB1&*?#&xlL~-ZNt-F*I_wQ3u zP*PISGSN{{Gtf{{(m$eSU}k1zWu>BH<6vXqU}9lq`3r>Ts_TvGH^^_^Bxj+fq-Ob- z>GC^(@+OhvH6~&rO29QrB4SFS%XR?sm2Y*8_%FZh@9x&sH!;bztCu9YdF!efcn?5C zLPUIx^co4t^=sFz5&w;yl7#gBBcbQlsn~R^Znz4+i^-&Be^Gu@_s8(UHjTB28^?~q z``D}Aj8~|Cf&T~em0LzkL~@0G)hJGR)%X_?T_YhSxkhy58xmb3rX+cEUx@U%4i%fQ z)w`JMFI+Q+so8&QmoJFYh`7b>TuuY-5?}SBB&Gz&1I`oP{+s?klEC{si_EB~wt0t1 zKBv?Y_zBWHn)J_lX-fND=y#=3f5ZQiR2I}oIw$8EoWp)i* zus>?D#xQoBroVGsJHAWri^N?5xNL&A%?8Yz(PM!Q`6-+8=7iKWm2AfyMH$u4gx!?TIQXK-TRrzn?->L8{XM3=lP|FFBP@ zZcI?;WF5;{-#qcs@PX@SKOil2{MCe&>DZ(ViRo6&&8t{x1n*Nl+n%wcT6^FVbqP3n zjb&i^Y?BxDTUxa1X(Ql2wR3fB*(H?$au2M z^f}Jd4+Ids9IEE!)tOX{aW7$GfBWn-?Mh5a^rmnn;N0^fLgUatoQwlNNwGwkdl5HaoB3j;hi7~ z@$rrGHX{M@YJ8K#(I_V7UILe?+NSOfeAaAc1T6g}+%fd#|C~tP@|I})(&-tqj-;ge zkoL!)kLhefugIIP#2;dMl+1f>m4#a;4m|{tcJ=n4^Su()>8LEVAg{1#?4kEf4nR^; zjQjiuIiR7eXc=;(=jWlZY&bSFWgvg6QfP_!>7Xqwy0xiCS-wDx?~u=9r#d2;4AEYq z$!^XUp}Kl(Rn_KDc&N8unuI+lPI*FE_s5Df|Gl;W;|Z>`L$zuH#hG8`<3daJ)qh`q zhJQ(Gka3AU(9jjsiQ>2p`r*zgn93ZKPbwBikKM!D0szHE&-2MQ)QbCC#d2i%L@0orGP=>c}p|pB$PIv zn}|RC3!647KIJap{no(29U}<=7Jf<83SY*7igwi95e>#bKGPMXFN^#g7Wr30%MXNc zssax^SFDrPIBFWRnZUqT%3Uch(#EEL*>r#~nx?kM1Sv|N$mtUo*@SlO2#fuQ@{Ztg zmx22e)Agf2q!LmTOSq&BymTmdR|l28*o~b_Z(HOpZs~P@^fCU;n3uLCV=UXOdSGac zk>bi1SZ-EfrC^P3x?s(((v%~kQt237iXXiT|EL-B(}ehUy?CP`n>t&hdIEIt7_p+H~A*?_;-2E;=QJJnX)^KTaq8$WYTYU ze8xGcDr2d0~23ZdB3m(TuZ^qfU;=UG2N zl4#PKyRi-&E&~4|V#Y;C{0xEv)%IQvzJLp{C%jiB($<@gJUKW$} z#FNJO8cTml*T$97TIdhv!|3j#@c~sndi^pVBD&?LAf`RDy>HlfhFO4LePV%>QNB z=nJjtOOS0M-Q(l7%%o~BGrdWnY`8;{zKGrk~b z*w9gZE$S6OGx}K;UfyU|+&PNay=ky#zB`t^0Iofs;O(Q=o!q^FhVq3QXyPkzPu@Io zdU0*hZ%Aj$K9ghsU;v72^)!sZ84MrlpcAk(t0}N0?v&-wg9^0rdWlQ66sK;u_#D|7 zxm?Ah>V?_Ghr{3X@(;SU`T9iMrM4jL!!Ibd|1h4= z#jRfg;*~A*Rmz-es1{Mq1Yg&B#JAr_vt6MW{6&^w@UCWlP0rFzVR(JX8wR9}f5o+* zYsTp0B|`d)W#PR6&I+OROf$aU@WtjY7P}mHn@OelT2q>Jl<%>huDyJmK+@pGmOl@t z*F>bNiG^*>H?nc_7sckNx%ky2QrY#WzGko{!>6*Sw{C22HF=Ef;%AJ%YwP-v-mFQ> z`j#(t3D8PtuF3zkb>mn5$FCpydSU}q;9v@20AO={24An!im?o6lwdzo>8{!+U)-b5 zZXez06sM-NX`PFVdY7Hw+Fw?hi8-I*>e;Q+?e3QM)Sn7)EVW%cAG06MjbU7;M$EP% z4pNol^WtEnFR&v$o#T-WWm5)p(|?$G`k#mkfDJ%9vb-pd{1|oxFS=`fG^p4e)*8sV z$wA4T=@GM^DziqEj-{)5NnvSe38zJ&CMFJ?Vu~cTJ%RfNnw#n z8QvmiO|pEJ+gMC{)~K|(1deW1;!yk_s1toJJDZgsj$s&AVEGu#VwfI_vL7vQ_nm8D zWcc+HE)O6CUi1#S2kP1bUF*-Z5lH;@qGy1YPY`jnMBb3EfJ|aMB#0~M#{Dmj$?LhF z>jpeoJVb$) zNNxLpHWriN1DAl<2lLKWF|);pUJ)7WR0+fvjFvT}BORcU>U`d-@Di#Dw}sdc=+s=WJ2jkA5^Ye^QNM^-OefRac>+6YgqLu~&p3A{gi1w5 zxSp+6P~|h-QI=P&s(ej>-8ZK!G`^oaxkLpt0 zFw-_DTv-_%TEb8xGEtM3k6byU_3)*bPC`1?R6IL3X{IulXp%ozsKe&fc+**oz2el= zMzImrb2KC6uDP~WxMSMGWX*#`kngJly~g&;PjOm_xFy4YcweXDroEfMlJ-{dG;TUo z-c}|rX;l^l5H^D2ln7st(l?OF2x5~Ea6Hl5)AjEX7V(FE`VHs&7({k6(W%etr}sT+ z7w+g5rKS%bYUdn%!X1bA(X8pct0VCFtcH+O)565!wc(HFzM8?rP$BCk3IFm*l5zksJ?HG8~AO| z8HT3AXKhK#{-9%yp7X3%FP4LM5ThmDBUP2+y{b#Khx#)ditA9Y$aG0?c3Qh`o_+H{ zJ)40W%SbQzX^}99Q?@imEp^ZpRtMuQ42~Z)PyD>79;6ds;9iZDZdacE)wLT*iiN$W zAo1fVpKwdpDqcQh&AWca9H#Jg?cD*+sQxJ95W@K4BAX}I05;t<1!hzRrKciuiIeL7XNum5J|_uBWryVLU0yn-kxl3hzV<+mrp2*1BOK!x#DGTs5pha%*XfNAC4x zdrVbY)A@LO=$J-;Na0*==GcC<3VYGA&z3)_g~l*>KHZW)ESp1t8>ku0f#-{LQTG*O zBm>p-iXRFnVSbL@4V-j6p8iyLraHwAEI;SizXKs>^3M00Zaj;yswqv_J4N4P(?O7fJ2{SsEfaZ++ zW@LcykUv!vi$2QUtGs3+VW*9v>h%TX?${ZBCySgM5Qe+3ls~>Y;0LR)O?o?*0DcVn zL#m@OR9ujAfDpJeZDzI`R1;NQJR;F7nKpysq

aAS<{m zcRc&%tTVnS0&0&YUdbEgVFA78snbcehL)C6!0eO=u<4Tt^VXc$dlS&5DTt2-Og)Rn z{0Z?G)L@0deCbF(!JevC-tl_fuS}8q)jWb_{MxTtpX}#GoxkYHUnxSK+VT^R_@0m0 zK*J9itS6^UpkH?f7VxDx9>ITPF0|cGUQbYvfLHeG`d#I(UZ`oXQ@OSZZW*7HZoYpB zXp%MAJVKWtKUpRiIgW$HtxLb8YgeWnB7@fT_^z2o?XlSxscj2%`@PgdAMeQP!f-EZ5ba zGKTRj7cOAT5_iJ(IyK@ze+ch+KP}n4TzW7K*I%l#?uGPm`D(6H-j5&g*-5noLY}$6Tf;HILvi}#Czo}(la7<;xUCSZYyYJWuy09gNYY5WATP->r;8s zZ5EIGk{kgeyy7RtJ3o<8y>z`SnQ@aph(0|t8<9;^xqFeJ<-oO#*yuHphDr)v*$91NArZ@f1uGFlmdGu6xvuKpUcZb0YWZb@p*4iZ? zKyGC@1cYs>u4ERS=zanuKIJy0ylc+IdRuQj^rJu=BGa%^ZL~{uDL%qdoyP!ly*GLl z9TZXLq@r;*GipN(lWH_0jWp=#%8PYkwKLR+qeOqnA4wf~Kh0WJpLIZ3he8VVQXZwe z=R=W<~JtjXe0O6{6>M<^bF^@&7+oBItH)ydE(;ufuEFZT#%(X!e z8{Un{c0QJ1jSsM9)2Y%p`RHmJBpbO*+|L6(`U2C+v&`}*V@+J(6RINp&Wst9-)Ne@ z-3;_qA^0C=$SlvFo90%H{kMV2Tcw3fw@s2i3W{Oi-*|T~0Z{j2rW2d6323AlWX>!j z6sW}RWWz|)8zmqyaLgMw>*oy*Vz%6^W9lnscZLqQAM?bON#id8&P_I*o|d`SaeGn_ zP&vq_T`_=b@ZrT>Oi^@ekfV;~wf7#~m zqV5cHgXu+ol|+31dAfJO(Kl@*&trb~kR_k>lL8DYU-ls3w-FcXjo)F|&zK}!kx&=Y zt)GaE!Zye1T(sGEmGc91&TItB#in%<`pJOiJN&mh@GW)cn9^L$k^z-_BaY|sDF)69 zYL1H4>VWJYw`cc9LDu$uwiAHo%`eJkghrR+Zm;ynfz%wfBgmO%)un-CGPF)5$h2z7 zEU)WRBl|V_NNpqXxMVzZD*wH}b+q0unZa32{DEEZjfXvPo0bOD1 zq#MU)XlO!Oa{X#2E0d^sg~LQ_QSZ{VZ#>S~g$UIlkm2p>mb|$+VYeRY)EkF7DZ)nT z+$|r6Ic6Af2R?=ApxAba6%HBM%kil2ZrOD$6MBMk2Z8905G(kP>Nn;!t*)8g-VXql zwP193! zb;o|`X{k)cCE)HiCR5sQTY=&Y6@y&p;9TxdE`(K`epP`9_L(X$!xPHso8bszcfS|E zt3o_?3+^h;^SLavR4T1_yzCyr|MRi&hIf}9I%^`#x+*CwSR=pFfrRG7*+Y65H&62?8{X0;?R^J0xnR{^{I^~dq$y^maq?=~~g7&gw#4DGJUn{FSalS}3WH8rbUc908Q{6&7X|^WFrNK-xgFeh>J_aKqBAA_} zh4W#gbZ63($}9y^l$!!0MH~qb7avdtS$zUfPPQLPJmvVtJbCW%gD>UnP8>XtyPBDM zgAFwsm8kOO&&}%Pb!v|_OA3ykebc@z0{h`n7-dQ5{s{OSOWrh41e{dvsg z1KuyWnKYhPQrID5W<7iGn2CwO$2R>U8*f>3kp^c~+P;o(*(3apEY$iPSqCCsmcs?o zBEAQ1maFMSSuzB94_^6Zk5ZQ1d@Kv1uS}8_=$xSu6KPo-cPhMULi6!tWlxp^v>r2E z?y}k+cWl9k6|9tj=A zW}>|F^mtMO^?6L@W*BjAXb2UzAgMhoJ%jF5#pa&e6}C5jC0a7yPRxzbe#|QG@!snxg@U}$4l>pQud`|bz=W{Vizvt!to88;r{eucOU8AnVmHmPCEiGo@dCBkS?HWMG zl{mPwA?>_#6c_8Y=ozKogbxyyZA8E=V2NEQhhALU&`#}ily8VcTtP2S$0XMth-OeC zgU4h3oWAT;E<39r7-;fK>ebjh$h0*>p4gU^RApQIZ? zOtfJ4P9vy5KbMr|Hoa+|ltImq!1|A|ofvYbxFNoxfn+K03PU1a(N9||`@5mjYgHMl zSGy=Mcm$ImVEMp}uOr!Zw>ay2ikFP)x0QizhRS?|uLEzXIODVQaOwCsM+J^-Fvsq8 zWlJyBQ_)CZl!H;n$hVw#$OpPNmqfbQ?J~Ss!bKx2hK+SH_ym_y6ZTmVD;vplNAMkT z!>Z2B`WZKRTBT-S9>2;5%JBf3DWP#YA9j)6-mjJX$O5oLojtmSL z7Htw}JJA!}V!h1-^tvA7T>f&T^1z`|6S+i<`$S|g3IjfemrgyoE^u} zSM9nK8hoS6%qh7nUSDgi8WXAb$o&T^kL=^ZXKVK)i}Bf2Ri9KQxJ9H*?!6;i^DSCq zbJZ8FFL|%Tje?q%ssOQL z6ZSsBhv1Cg4$1QZ0V0F!2uC zFY=6<)9+k7nalX{Z+GCKELl0Vg$uGI+`@z`0U36f>!BF>cBut`$1B1dFN4gqf*8GS*mvE z{&8j$Aw;y{Y~UBQtFF6cC$rDFoSf`tf!j7b+^8-KETMMMzXqFiS~=QOUX^{?K}aHK zBdVsYx`gef*8u9O;6on6q|>n)2Z=452;o=S*PV&@8Z|?9H9nsNUWeksjgs}ys?(Pn z4+*Nb>XiKqyS!Et8pbV|c&r1*Kr+v1st@XeJ#-0S2Z*Xp)#TCcW@=7r@(EGPyYP&5 z_^$0Q5T~=py%wif#a@WHkSGjptF{RKkzj=t&WI0 zERUG(eG?Y{a0DVH>Y1xyDjD$@+}( zL5ulI_YOoW*rt1Dd-Z05{1|?AbKx zEF$!btbPN=*k1w`Oem|Z#imPQ&4QaV15{cEtCk9;>3V?u?@x8mPli#M$rjbqD{5TI z@k!)|l5ySVG-yN&(t#egOYQDy5JimrU4v$>vtXf z?mV!suU;=)Xycwx#~_4db%Lu*-qRCO7p^KYHvQm%S1WkwN}r0Y(HYFJktXN4|yK$E*M;o`pplwEcXGxysA|`G+jWX2#J0@~C{9*ETQ#q1OJdwfkJS(lKn3Z|LaSgSrQG zM!Vf$q(fSQ*9o)1Hd@31x&L&bZ62zntm7?n2@uP6ii5e$=E?G?%Z9h~+XMy281RO? zSFJZ##(UML$LG4&5Z4zFAT<@Gz6TX-TqDA#qGViOH1_&bUe0ZkUfr~1-4E4Ic9ECc zG@N=A0OlT9jtMNCZ79$(IV%Z&s+zz$MmwPn+Kub>w;3s3!#PP3P2kI^>P)F8s`DkD z)U66G)Oz1;YVUnhyU(cDExtrsuhpb9bz8qQTVuu@ruIUc^ZpP%?_IM&JR)Jx9sQc{ zz%-rz529FlRD?19Y3`%%&K!r8`Xyx=Sx(gqw;nYzKDw1w-sPv!9%6aBD^w*w5OQ?@ zZ;0RI?ooL;?fK+vewFgi%#jfz5ucXob4f4%;0l5{iuwnIS0e{qrgoK|t?d;g#dORuFoTT-@L zn$Yxaf%_aAcIx^QzMw)Jl=+#NoVSer+^ifIIW%zL``U(k^9!kmyW>6@ji>!**3!OC z{-6$TKKEr|;0ODYqAk`!7;E7{&5@0xy}LPMW4@qdkmfil`sUS*%ZzoUkbIr88PfO% zRj-*-Oo!2G`N|tZw%O81&ni4k?^d2sZ`~|$0M@$l%v5_~YlNw<#GTlrY3H}>OT#%H zu?ZAc1kLKX^1;Z)Nu_2Dy;KP`bneQt?@3ucDXwfknhpHjjKeznHEHv2j?Eh=&bw`m zGD8?Xg@`=qlAyBnYA&QfnyxI44LnYGMr?NLCut=JlH(wt>gvj!>pnN(%=rBLD)++E z=2%R4K0xkG(%mkMUz4W0W>hJUH#s;r6`{>i$F93}#yz+|SbbNX@O}#y=df>zD~!PM zjxSs-y9uqPGI%HeGV7KVQ==vD#Qt#==-Hu>$}7a-6LWT(=t-IgOWWtU)^DU&^Jx>a z7kiN@QfyCbid_M5V-wHxc@R^4G$npUDdIrUR3w)U#jvGj;5+cNy;cSa9wIeG2=&&? zR}UZLkm}3AI5$kIE*m{$p@UIij9AKi<_Yki&zFGvmb|9444W-}X{VQf-l=a7zmJIi z4v@Kab7!Y~iL#B6QPW1{*|p}W^_SrWYtR#9nM5PrVLHLy3a4})TpDP$D?HQjA+Opg z(`E^4|8aj_NH z`q+S+yEn2$mI_bR*x!sU-w+-&j5pMXd9!5)E*wE9ewU=`y66RAKXveMS7=|w25|2) zo3S0#c1dE(hX)M}1`zedhF}RhAYGy{7utn7%V8_%qjp~9BYeV(t)F7~Uw`R)R`&%? zDy|(nEu(*+n&wu_X~w{;ZY5LdQFWc)&l0Q)H8yBglQmI}HLSt5Led9sF99O1U@*CC zQ`48GK@D%p89Kug@cQV??@GDh6U8-s1Q};HwS0XAn_SYood)yw_sal_^>4SYuK za!LCn8w>b+y`fu{D{~>Pn-%b(vJ< zoh9#tWIWCTHeUiVYD^9mD;qmKD%1lI$JYs;ZGTmZ`KAlL~NeV zVt{<=Zn8@3(4wD*b&v`X09kkhWQX<=U`vRCLoDOb9T#>c_V=A*LwQkcU*+v=11~NC zw>mtFE(|K|zO{D7wzjGOiv?Kn&AAZGr&}dnMF+yEP6*Py+w{98!7%wmC>bHZP3BT!I|G5g%WM2`>pNn9Aq73Q~5-`hBSOGixyy3Ln*2`^gUxK-C&VT z3HFc7d#iND>qD0Kp;2Iwm0<(+efxrIM_BRuu`1lg209+Ay)~{)erL71m`n>MLqQ(n zUs(_sf1tt^wV1=&Mi`qsY6es^>o>F{FdEt6{w~w8}b8!p?FBZ$`X{eCUtChFYnD>p3(e&u3 zxF!%G*GmZ>6}kV(VV)zoC6LCj`8?lPKW$tkl6I&kL~wjni*Pu#7jX?^fGnwgN_j8;?jS;U<{Ngh z#jVnK#B1yaa3*<(h$E1T2aPswFP0h*g4SQ(OfS4QsNs*1PvIk5ESq1^Vlz%pQufC- zjTtlQ%h6hkw_l~x*h5{!NiAo!ctobDq@S-j0}ijEXxTNmx6JPpq74Go&Rj=|XKL*B z+}q|m-$afP*c+W3%)BK1P6HzvcKo$v0&|-bJ`m0ZZ(pPZ^zM0mAsF*{|0cvvKN0-r z32z~sUW%<|8$@8YI-!?ICecjOzV1_Xh_sa<#Vyq4(_ixyKW8M;YK$*R^MFDx$Ma7l zXC2sk`1vIo3hK`$xDpfhHbvy8rk9ooV+tyq8gA%*J3sQ?G_&zRDjHZ|>QWLCBE0Nw zJ%8^8bT-(`&^kseS~GkDS{zWIwc3AEd~PbG9Fa7R5)x;Y{b!4h}T;xNFF>^ z=W{5C&KsnoUPkU9R&oc%`-Bp1=mnl-)WbUKkzU*W0c`$x68ta`!(A4TrRUbviwR_u zLavz*b6vj=Y5*f7bs+sdDA&nVK=RM>$f~Y@Nl7hKogDbG`ZzY^dMu0My&IsbJD|j3 zOpx5(Hga8(4|NGJJZ<&K==_eYXm>SSRGK{IiRoQDvUJ-!le4w!pdv>_&cev94XXb} z1081k1|EIviv%{__I~lAhGjp;UdI?LL&4o$GFT1FAVI}^(dMBt_v(nz0FXG-QnVPz z^YQ4q&$Ku}9}jv1+*H?FOYQBVHfOvSTJqCHc9g5Oc(jJyvSXsiuoe5VzDx#aJoOeE zGC4G_OZE4KkDm0dF*Vyeduj6wTIw8`4fcya^K>2Jw{TWo)8P^P`6jQ6zrykqC1V)` z-B_9!iQJv~8Mj@SQih_L7t9{~6$c-&cyc!-bc1O9jgW1gXa~VpTUPpM|L{nl zS0oVEV~DK}7FJ+#1ZZ=uRpd#VZ0)HnKp$QUvL#ID!pntC*nWs+H&vpzm%FRJeK7a+ z=Ao^^(~dNJDwDPvsxPo)Hqfiaa-)47u)8aO7PT!i?9!XW*R>iT4!tg4?p@#fb3T^E zN!wv%$*tK2QTaMmIc0C{nN!S-`to){XJ0Zl;6mQ+2_mURuTTj%{*)H9@k`s&p-^SI za?O3&5wPXBk{z71*Cw!d=LfGV*{mgX?p<|GJF&SB>tH4T|Celw&uN%972l-W>WX zE)DVdAN#ZG;B`@6V;j6-kr!NYn*HE~5AQAkJRx0}D>cRS%UoEVZZ*BfBDXtO$G?v6 zTX8u1-}QNWPaJWBSrYW1P6tlN(T>Re5V0U#b+9Pr|3~EUX)=>}2R{W@%!*G~)Yj~e z5Q8Xn&DaH+YR=C~&ZV0mp-%Z3V<^`Dw9=$tIgCB#y|(eg2;lp&#*Sj4W~?XJUebfjs^Sqf4aC`MNVS7qibFzRs*iU~6*WXJSZQ_) zt?|3`;^M>ZwTEVY11odITyo!;i167oBYrQ|42IY6C1EN}#j5JFLbFq6An6Uk4h=O<> zNsoN6)iuuXo;g<70UGWwC1;aNaXrdlg0_KXVIik|(DxGH{b}%mV`aIdL*Drk(5SMo zZX)H>@loL>*2b?c>e1ru>q zS#1l$s_DQPR|L&+^67a3-_s6*l^~ESD?S^nMoAaGT%y<+_0a!&EaQ-Wi4SH}$g)R5 zTe*8}%4QI6vSZ}`_QOPY{q3eL$*8OKwm~MoV6>$so@4?#FFyY9m3XLVwHFvffrz>_lIB7v@aL zzC6+mHK$~S>K~Ks0$X9=WuhNPCSk$pBI_ihQuC294PRN-{ot4#!F|LDMk%E*=vprbx5I+P0qBx&6dF*8oWi*Pcr(S zugek8o=bq<;9$jpKa0e<+>^2y%vm(gKEYX5VZ2d3SQ8RzDxS!1H_|?Y4F>TJ{j>1m zecyW;2jzK2$&O}ciHEUetQJTy)HQ=Uvr&V3hOu8~Zg>)h@ij6GB7+>DBEq{g%Ns0*P{``BBX4w` zBthHGB5to;V{WllT^SjsANRES1kr<8llo30h95xEI11~n6ckLxQBrOEU74_#$J$DCqIXx*XYWs z?7#|E+Z6Obc-n3V^NEexzFVSWLP@g%?5|5H?fS>gUikSUN;K`qI-$5K|6b?-|2v&? ztrOrYIo(_2;Z*~(3iQ|es?7`5dMK~s7$L}Wej@araeJ#|Lr#xe;7-D5s<|0< z1sNKTMr9Txzc%~T76FSm)GblT>rRYVO!ET)BCj0(74LuXiXH@KHrY7rr9qkM5nd9 z^B`WU5%+Q*;Y@Fv^|g%pd8D#oA>%U`x8@TxbCoq%Jn1izm!ZmMj!euzSN{?GN~sLE zT^A2g2mG^G;)h$Q^sU-TT7Dhis$#~3>{l^Ux|j&cg;gB>JNafr>~8w-%TwpYvUM!X zzO18^uv2~(mH+yo`=mc#P;AMOq3gLgoOycA(wWY5^0i0S^Jdgl^r2!GSJaNR=c);c z7FoHsBBn7+&F^(WH(EbZ0-9AocPwd{u46XpQ_G?{O5III{v7Bbw@ZumjwHu03?ame z{@tdL>kJlaen?T3v~xD6K1T>}xXj3)AtTDnEt}k@+r0Npe}h7G zh$O<>jkZb!4C+ex7Sj-&-qX{bEv`<_V)M))zmRLnwDdF5n343}lah9|hL?xe2y;9r zo=s+`QQ&aP$R=#x6D<{$y;3C*h`Go6K3m+u)Gix*T#v7d&RO5GAyk-G3$9&{C|xeV z^7(|fmP~=)$HiV`o*4bA86lHsdt8rgJn|%a6(SZ>b>jy;dyJxQ+L)!b-N@2HsldGR z%HoCPs+(KTmeoc}##KliQ)ssuI?etXLO-ktfdjZj2=;cE4$QpU)v7R!Pyw0Etl-v% zF`wA0t9_12(Rg%OfO3kEfs}puQOgRX|L~xKUF}a=`*^`|kA+~^=n%1Iib+JDGBOKJRjgV(qAuC|xud2qGy{8&2Wg&lfiL~Gc`V`a_Kd-R0c4Jn(QLpky>(d2!8 z4{&+-q>s0RJ>?;h!gO=5$kE45C8=ev>2E_)z}eg^Bg02TX~}u1isPV@`MLP!%JZhIA^0#CjqgX*rD9;ifJjVJ1T($jpN!k24x{p6(q z*z`QRk+9IHD&Gi_nBSr*>PAbm4p6hW>d%XJ!657CM8EWFbLS@AR8-tqY;^3u)P1Q- z9)TTxU4?(r4aReN>Z;m>$UV0MWZzGQ23x3Vq}|x}GT9DO@%_C!ZEAlW%MVCyAIX)TwEFt=uYHE8dO&=)f}!*|77%e{AY?{xXYng@wDvL zP_8F7xz0|`ev=gm@@nyeDthJuBhx}4X{J%eD|!-VB`B|AUfeuuQ4zGzQ4OvFahp-&$dfdAZGPP{kp19^NO8~PJx2l^8}%)lxy9(SC%`_0nLPD?U$@S zCjJ1Cj^7{2?D}dJz%Q$PTCye2nrkwL9fENXb$DL_^uyalsGh7K2Rua_F2>FL3ODS1 zIfJ;}(!Pb5EWd(iuMD$sY4o+yH7k?FR?)o@53-YecKq0*)Y1L86mER+lufjU@=GbIc2V3+6Q9!~UL_q(<|y)p9~@Kaq!^Jo@sq#h z&O50*KA4t@Oa{G8XY*Y5f;m7aro8+rEDiwtqRTB&Vt+-L64qRE+e>5Bvb_t-Pc}yC zh*Ynj$f&7}1O9oy{P}giJ3DDFdVR_~i~CAus5jT`pN=O-);_nf7~;KsO4?N|nH2VE zR#J1m+5y$4g zexN}i|-`mBAJi7)m$=0|zt#z{cR@x{m!Q&6F*5Xr8U3dH&w-4=9eEvuUS z6$b6&;0-BW;k1)1VNZBws*{=eG!&Y-6D?%kkAQ4tVPkP>nPq!;O(9HmJY5JE=? zz4u;4x)A9dq<4^#gdU3YPAC%Eq4yRk0RsMW-??|@zW+JzoqNCEFMH2g`^%m^zrEIa zp7pGi?ZDB+EA@?{F^suYIFyOKMbV~?SluH%EvX{Tvy5wV0o%0{OtfgpM9?Y&nFP=Q zhM6l?inzVWCjVHLq0F~25pKuzYc-1Q6@t}&N`{zETvrwqQVxA*vl2u#a@=R7Z@t>ej@;;>2y~;+ZAsmTX*zWC$qKs& zQ{4RX2BLBHG5a7Ejn|hUg-)!A3TxM#oIo3zY|px*4;F{}bFG_{d+HL80oUKlImVkb;@(=Qu-qoMpYxmBSixKI23Z zHJCZ%jG6Hbydgf+soJ}0V*I*&T+yQQs>^%XU>wiA(lt!;av*!3C;h;S+*?o%_VvT9 z7R%!o1ynqn#szDQ3pt)aT(x`@dp8V}#kPqSHhP$9*~~hR$4KAE+mv8Y{7@=23on@z z^w44#yDQKuepMKRMXxKv_Y*yy9(;pb=r{D$T?*&pvjSw^zxWXs?eJdCN zrT*fhR5l9*dQ)8I4i9bO%(BIdn`G%h zSA_9NPHd6+U!+!#*oKpmgH@Qa!$mGj%soea<1Z`(hw zEK_3`gsHg9M9sduw^O*;TlB1xg~rNkQVsZDWG?xolxSs-+5n?hw_1nt!fVM)G}D5A zso_&86MbTvy=|w5BraT6?@Zk&nyft%b2>Ay{ZzhmL9zq1QJrjDObiItT^Qy*dBfCS z;9n-|nI!)c%sBtixBb~QV|h)Qpl{(;iIQIAj=4@?-Uo|RCnSS{!FCa_D$94=@Uf~7 zD`EQA_=)W3DUQ8s-jDVTvxnX>tk{bKJ*ZDYisF9vy}PbVzJn@hsGa)3~@d@#eZhZG-NG3&B$ z?pb)p*0jF|I6=-*vEeQIPZO$crh!e^V3KU@M)94G+93?KR-GwR(L;9#TU)(cK5F{3iH{z zIyi=^uJ^I4ROcbpuY;+o472oX1w^|_{F4Jo&XvB=O+`u1z3&gU+o(y_Rx{vh$jmMO zFH4q5IXNvWCZyvwZqx?oD8DvJWpqK`$J4lHmf9)@_11v*x~6CUM3?=`ch7&{MN(VF zd0Pj?M7|5DvEy6rHTF7_CFSjCF1ALAeW6nnl5OM>R_f&-5NA#@$GI0xKX*>?@YNBI zIR>)sut|{-%t41ozHf3*!{ucB`Bn1kUdfBJ(}~f|&J`(su%KY`(+l$3QPykCpBq}u z3ahk9?O1aZK{UNq9IlQJH#qt3qHmH+h!0&4%dLdh_Sq-{JB=cun!<&3y%fe#0!(QJPnOy&v(rtB+~iXlOQ1O5{D~USs@?FCjr`*Uiyia zBepZ<=3r-$x5D7UX>t)xmB@|1^%L!Wzt|XD|rZ`EsRH)$ zlg|GvCTgtx7QQ&Q&NcI0wMcu+y`!}O;JvqehSQ2nhx-0?#3QN-pJKbCP;shvF)ww1 z6=R@X-LtDeTZO}t*}`8LIwq(y@~>Xin{@yOa9Y(ZDJ&l3)!Ye?`@Q=8^N82#rVIEB z-E5|&Fn0Y3o}GtGUhlnYU|}4Y%O&&NFFU9Sqqc1i+oMP1zs{S+gtExaBd)RzUx?pB z_I29LYXIT(TUDG*IeE+8>RpF!n2WjvpPdQcK-Nc%P`8Cvlt`{Uy7csHnFME13ojbJ zNx3NDnDz1&)#&KB`?hzQC*+@z`2R%bq3Kt!ycWabl%miF_x19mb)`h&46cKzMb^ES zE21&i?zgPz1i9&8vEmu!?XpwnEpya~SqsDaO>uq$_yQg3!dRI+*?}&gI3;FySco=9 z85I@*94lRk6e%%XbZ2i_KNv>s7tUk1PlZV(?%P*}_#*uO5-a=_1Cy~g`6HO5oz1VR z-Pj3TveVaefiF1Ta5+)4&vC=^?Ol#-A|Ab*{tYPOq^eo3R=u)c z=A?U(H(1fDnfc>wwbmli<(pk{WQ{fEfT|#A8*}UnAZ)~mQC5iWKaL8-{02P0NP9)x zCYs8W*0X|Gg&`ZwG+F2k_^F=uIMqbK7^grA%dh9DNt>!N>d-&@xN+^Lj(4zNK9jJWCuNMv07$&2xk3RlF>|#tM70tgzk z)bQV5og9{ZFSHJIBO$DgBh`1B7F03ADmVOBmoeEvQI%WqIsr9jnv(*Ln%s`8_bjGm zkR198r^D{JE|-1ggqK9lRQ$)mY!Pbt|d zQf*G%IY1aN_Ej*XYtHS!$^aJEp8+6wg#PEo<^S?BMDd}Z*b7|7m1+9@Z;}dmrtSVJ zQD4%zVvClPcL(xrSfX&ktz!c&VEN^;cQ^I7V}(KC(vD4h>L9`031AR`YfTFbATTT& zK3g2#My5Uc(9DkjZ|1@aLBdM&%sLh~caIyY64ipop`mgiH>n~Zk0@6kpuT}hYR*wnRw90C zqHGE@ApB11jlNG2GInT}2Pql>86b&G@se<_RZwM4i1vctBvULP8Ks00fTO~@fW?p= zW=%Q@@h}_yyKT?^%KTm(tFD6EgNqgKY%26|SzAoRy0dnzNdg#gUL1+#~_QKqTvcV^-22E8OmoKdXPQ*F9ghF^; zsuqgb3XgURriZXJYYxkAL?zp}zVk!0SP&_+Rra2%yzC!@hGtEux~xQe*UaR5t`qeQ zopl~QJt0NxA>LfG8&_Tc0%pP5CzSs5cANfj{OY=-%+ zSC5geSq6Drxr*h!dJd!Y;x#(<`Wz|)PL|nknd;M*&)agdgm1a=ei%R1tu1i+78u=I zIPZXWfqx3tl}q^7s=WVvA&&z@OMVLNK>bJ!Zm@!qY7r0p7#{Zdr5nIE052$Vm)dK_ zF7x&DL~!#%${Gl(6;Rj<)Cw$NECtb>*gPYSC$RAm>)d_{8T6^O*nZvld-3o3J8$fh zb3_>HK_i$=mtrfIDWTl1zPrJMzLHF2E%W4M4XEbssewR(173^A-Gx(c_hpPr-7>q| zuxFldbsP5=k)?CgGYfblGX5ug(wV+o=i0t3N%ywE$?|?EwOYc{0mb}P>j_NpyFOs`lS*ZjBQBgU59q` z>g>cj`mx0j74Nt?YnruR*}7V?9Bi7Wu*F5UA|UT<;1BOndtCE4`pIWD79hn+TiY?? zN}J2=dEG=M@r0#A$@CXQ^C_jLCh?6$~kxB*l-Z?=nc7r1AV zvwRra6&$tW$EDWiLj><720c>Ar!(xM7EK8?4;lO+Bfzvp=BEdYBb& z<*wAF+4+n3ts(6mDdJeHeM2?YhSG#;$W8w0ncx3F(E;@Y)oAR2Da<3mZ!!sj04LH# zr7{g=txetS2YHywD!@G(#)@>P2Pd)gz!5La=%WU1>$rqf=#ao?mpg}jHz~_>G5+=C z-3DNev^CFEyA}0Eikz5BeM7}V>ipGXn{bgsW53j~cH@FlKl0D5%rp1JCT@rFN?o1e zx37%TsR&{xdd8@1gac9jr`K2i#Qy)AFH~l2jMU7#zFSXUTI1&qo%0|u-#ai##MbPB zK6=Y}XSiFm>IR(D45GnO<8D>mwDuMPp@*QbR;PkpUtxH9k)5b^ax`|1mUw&R@WDF6 ziq9iW9It5AA;_p&#qf*vSags>ThP(et)cy~O^q`TZ`87ga$qW=IVDctYk|*PG`_{( zjo8n(QO6N|P{pU4-Te^z(PqAt@1K!zcf#t z>17BU5{w<4(f5snZbpCm%j00DE0cD|Fg<3mA%WOHw^GT7{|~_ksZ1ZhxX8IC%zEeS zmpplLV}nTSH}VNR+EbYGCq2Uex*LT8b?|AKtrZ&RCpjzh?Es_jM?(GXJq2k^p6)E- zKOU_uN+`K^R))o2*7cKg{GEbTN6xADGD<)PEq&i4V4*gAFV-YO0(|sl@-q5tD+45k z-F$OIH*W`BG_CB-T^40S8W|pLOMhr}uXcK%+}b;HAs#_w5<^-eUg;pOGF?chFJki7 zH&PmeR&qTsYj~3-_5C$$p!y!@!q?=pKdsu>M&M9A3}{0e>5SdVRNFLeT0xQY-K8~p zv9_W+4L{$nR8xa=8j*LX zQL}UmfR>-{pQ`ezqH9s%G3tF}UiZeRwsKwlDrbHJG}kQ${1cm!;|o!DU(RL7%e>4! z$oC|?OjQ<(P}f0^sV8S3%$j#( zxZ+=UTQlY5bs-qFs@n)z|9tWD(;(iul-oqJn~v;Ci&;kp^`<9^5*04aHoJi5iuFoM zHsw-8f9G&+Fd1(osUZYg`zO6LTqI#bHy`lOc?N@Q&cXfS*P^|&v*bNN=)+1DI z%YZ@wYC&CVJGsW|Yt5uCz6a^dp|gGx8j|=)+rX%)(^%|}J~YmhuF~x{;B6K}v-jdr zO`@BK*k`iYDLl-AVe z;EcVGI8#hkJc9gWn-l_L&QE#B=ii4@aFQsx}Mv7_+p zn|w23BH7j>xQ;T~NG&C3*43d6qcIRxvI0!|Iq?@@UG6ymELUbf(I#8lntC{y$MyN^ z47^sMpNt#%w8${g%;x#ULh1T_nLi8KwH6lvI+wOV8RF?-T$foh2C9u0K<&)5*mw%= zqscaxxWWF3l48k)@nyP-6LOTyRY?+VbfV2~LBg=>LTD<$E-KfJf1!_p7BASO$sm&% zWZlkitb9)I&Ft%!)aWF#TOLocjz@opsJCTL)wyYF5Gq`P5X!7 z6+JqGDLs|$|0D=MZUJ#X$64&bH zn)k(9{46QLBFoA?^;c{^Y|MuTsTDQvb}T$#o(>ZhKaYmMi+T=gZSX5TGY}+C?(!|+sPC}o&%K*Pj1DNVwanon?Ou{XKHy)iP{+8zt@IjlT)f z)W+2k1^|_2V*Olwm+H1N4DO5h^(im_48TP}4I*S>m~c2%5mnxOm9JxuKfPNR6>fCD zE(=T=_8d!G@~2zvmsBsL38f>mSG~(Zf@j5#{02;&pKmv|E>QPKT&2vA$fdXeb(ABe zW`#$?OpAxyp`}2c12eci#6d^p3%AhQ;(x;!IqfO zcHHTNRx?uM(P0AA!Q39>E8*dCGh_F7xUUJ4Vl_vnuP>+7RDaE(DS6n znez++?X*uU?`qj97F{fYQY!UlqUnJ0O+~Y7cnBeptz)IN2#L(SnK#rTeZ+0eE$Z;? z{#uW|dt^QYfrKw8Lh_-3a{f*7gic#s8>P%~)|2~*FHm)f#;)S6+`=zt+4G*H zW3KgQqKRJohd$YV4`)DgbXM{gkVFXHmphLF5vIK}k@CUAb(_o`|LIDG|zAaua5 zIZ&$R7Vjh8jiT%3Q^S}Wf7Ktig~R68{n^aB4Q6{M8r7OSBqH=8Rw2ru)Uv;}U+rxE z;1RfR(zw)LaSfz}whG(;>FIH2?oBkW%^mpO^$`apuU+#v;F@fvD|m|Oe;(k4!xVKp z7HjN$&!m(-Ywgr-w67?6rzz*?-px=Yo2tAK_P=_c`Mge0lz-7{V7KT>VV83(LJZNT z>fq_c;SwDHs?a_BQ1pY(U|~~9onE{HWWmwH1>{BaK_iEO+cy3u-N~mk^ zOmDaU;Wwn5&x$qTFsp)U(7N%vvY=a(TgLGZdL4LT&UJe9&j%0OYe?2Qh_QX3M@p8z zcp|9P!=C`GWjz~b!sG7zUlh*&UPZ5E0H^QyxG)91rxN5CDeSndd|2#R5Wj-=f^~r9 zU{wA^Wh2K~y!X?P-q* zHFsKGl1?D#fnUIO`9WV1Dw+zC=zEolUZJlhr8Id|hQm|dYM)?f3h=|s$AQW)1#%<_ zUq=sU!;0|33aDRxMRh)H#r8cHKR!_?FOXbFyfy!dBN}~XdXo%ao0NLnw2lcFvI#rI zxwr%lZWkwyIv}sSf>#|JzXV3RXlo@;?kqj+{Y(;z%1?URG{Ak%0)stx)_KMLvdJXw z!B0_Z`KSLc!S8>Miy)FW`=3NQ`aFoBZ;AhwJ=!6mrO4)|ym<-j$!-r?UHJ`QQ>V1L zOS9eBZ2X)MTEX9(Dn{q-wZa=Yj^B+47 O|Np^%`U1qi=l%;4D{gN9 literal 0 HcmV?d00001 diff --git a/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts b/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts new file mode 100644 index 000000000000..1969ff0b9bb5 --- /dev/null +++ b/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts @@ -0,0 +1,341 @@ +import { expect, test } from "@jest/globals"; +import { ChatMinimax } from "../minimax.js"; +import { + ChatMessage, + HumanMessage, + LLMResult, + SystemMessage, +} from "../../schema/index.js"; +import { CallbackManager } from "../../callbacks/index.js"; +import { + ChatPromptTemplate, + HumanMessagePromptTemplate, + PromptTemplate, + SystemMessagePromptTemplate, +} from "../../prompts/index.js"; + +test.skip("Test ChatMinimax", async () => { + const chat = new ChatMinimax({ + modelName: "abab5.5-chat", + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }); + const message = new HumanMessage("Hello!"); + const res = await chat.call([message]); + console.log({ res }); +}); + +test.skip("Test ChatMinimax with SystemChatMessage", async () => { + const chat = new ChatMinimax(); + const system_message = new SystemMessage("You are to chat with a user."); + const message = new HumanMessage("Hello!"); + const res = await chat.call([system_message, message]); + console.log({ res }); +}); + +test.skip("Test ChatMinimax Generate", async () => { + const chat = new ChatMinimax({ + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }); + const message = new HumanMessage("Hello!"); + const res = await chat.generate([[message], [message]]); + expect(res.generations.length).toBe(2); + for (const generation of res.generations) { + expect(generation.length).toBe(1); + for (const message of generation) { + console.log(message.text); + expect(typeof message.text).toBe("string"); + } + } + console.log({ res }); +}); + +test.skip("Test ChatMinimax Generate throws when one of the calls fails", async () => { + const chat = new ChatMinimax({ + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }); + const message = new HumanMessage("Hello!"); + await expect(() => + chat.generate([[message], [message]], { + signal: AbortSignal.timeout(10), + }) + ).rejects.toThrow("TimeoutError: The operation was aborted due to timeout"); +}); + +test.skip("Test ChatMinimax tokenUsage", async () => { + let tokenUsage = { + totalTokens: 0, + }; + + const model = new ChatMinimax({ + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + callbackManager: CallbackManager.fromHandlers({ + async handleLLMEnd(output: LLMResult) { + tokenUsage = output.llmOutput?.tokenUsage; + }, + }), + }); + const message = new HumanMessage("Hello"); + const res = await model.call([message]); + console.log({ res }); + + expect(tokenUsage.totalTokens).toBeGreaterThan(0); +}); + +test.skip("Test ChatMinimax tokenUsage with a batch", async () => { + let tokenUsage = { + totalTokens: 0, + }; + + const model = new ChatMinimax({ + temperature: 0.01, + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + callbackManager: CallbackManager.fromHandlers({ + async handleLLMEnd(output: LLMResult) { + tokenUsage = output.llmOutput?.tokenUsage; + }, + }), + }); + const res = await model.generate([ + [new HumanMessage("Hello")], + [new HumanMessage("Hi")], + ]); + console.log({ tokenUsage }); + console.log(res); + + expect(tokenUsage.totalTokens).toBeGreaterThan(0); +}); + +test.skip("Test ChatMinimax in streaming mode", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new ChatMinimax({ + streaming: true, + tokensToGenerate: 10, + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + callbacks: [ + { + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }, + ], + }); + const message = new HumanMessage("Hello!"); + const result = await model.call([message]); + console.log(result); + + expect(nrNewTokens > 0).toBe(true); + expect(result.content).toBe(streamedCompletion); +}, 10000); + +test.skip("OpenAI Chat, docs, prompt templates", async () => { + const chat = new ChatMinimax({ + temperature: 0.01, + tokensToGenerate: 10, + }); + + const systemPrompt = PromptTemplate.fromTemplate( + "You are a helpful assistant that translates {input_language} to {output_language}." + ); + + const chatPrompt = ChatPromptTemplate.fromMessages([ + new SystemMessagePromptTemplate(systemPrompt), + HumanMessagePromptTemplate.fromTemplate("{text}"), + ]); + + const responseA = await chat.generatePrompt([ + await chatPrompt.formatPromptValue({ + input_language: "English", + output_language: "French", + text: "I love programming.", + }), + ]); + + console.log(responseA.generations); +}, 5000); + +test.skip("Test OpenAI with signal in call options", async () => { + const model = new ChatMinimax({ tokensToGenerate: 5 }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call([new HumanMessage("Print hello world")], { + signal: controller.signal, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test.skip("Test OpenAI with specific roles in ChatMessage", async () => { + const chat = new ChatMinimax({ tokensToGenerate: 10 }); + const system_message = new ChatMessage( + "You are to chat with a user.", + "system" + ); + const user_message = new ChatMessage("Hello!", "user"); + const res = await chat.call([system_message, user_message]); + console.log({ res }); +}); + +test.skip("Function calling ", async () => { + const weatherFunction = { + name: "get_weather", + description: " Get weather information.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: " The location to get the weather", + }, + }, + required: ["location"], + }, + }; + + const model = new ChatMinimax({ + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }).bind({ + functions: [weatherFunction], + }); + + const result = await model.invoke([ + new HumanMessage({ + content: " What is the weather like in NewYork tomorrow?", + name: "I", + }), + ]); + + console.log(result); + expect(result.additional_kwargs.function_call?.name).toBe("get_weather"); +}); +test.skip("Test ChatMinimax Function calling ", async () => { + const weatherFunction = { + name: "get_weather", + description: " Get weather information.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: " The location to get the weather", + }, + }, + required: ["location"], + }, + }; + + const model = new ChatMinimax({ + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }).bind({ + functions: [weatherFunction], + }); + + const result = await model.invoke([ + new HumanMessage({ + content: " What is the weather like in NewYork tomorrow?", + name: "I", + }), + ]); + + console.log(result); + expect(result.additional_kwargs.function_call?.name).toBe("get_weather"); +}); + +test.skip("Test ChatMinimax Glyph", async () => { + const model = new ChatMinimax({ + modelName: "abab5.5-chat", + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }).bind({ + replyConstraints: { + sender_type: "BOT", + sender_name: "MM Assistant", + glyph: { + type: "raw", + raw_glyph: "The translated text:{{gen 'content'}}", + }, + }, + }); + + const messagesTemplate = ChatPromptTemplate.fromMessages([ + HumanMessagePromptTemplate.fromTemplate( + " Please help me translate the following sentence in English: {text}" + ), + ]); + + const messages = await messagesTemplate.formatMessages({ text: "你好" }); + const result = await model.invoke(messages); + + console.log(result); + expect(result.content).toMatch(/The translated text:.*/); +}); +test.skip("Test ChatMinimax Plugins", async () => { + const model = new ChatMinimax({ + modelName: "abab5.5-chat", + botSetting: [ + { + bot_name: "MM Assistant", + content: "MM Assistant is an AI Assistant developed by minimax.", + }, + ], + }).bind({ + plugins: ["plugin_web_search"], + }); + + const result = await model.invoke([ + new HumanMessage({ + content: " What is the weather like in NewYork tomorrow?", + }), + ]); + + console.log(result); +}); diff --git a/libs/langchain-community/src/chat_models/yandex.ts b/libs/langchain-community/src/chat_models/yandex.ts new file mode 100644 index 000000000000..df1b164ae049 --- /dev/null +++ b/libs/langchain-community/src/chat_models/yandex.ts @@ -0,0 +1,141 @@ +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { + AIMessage, + BaseMessage +} from "@langchain/core/messages"; +import { BaseChatModel } from "@langchain/core/language_models/chat_models"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +import { YandexGPTInputs } from "../llms/yandex.js"; + +const apiUrl = "https://llm.api.cloud.yandex.net/llm/v1alpha/chat"; + +interface ParsedMessage { + role: string; + text: string; +} + +function _parseChatHistory(history: BaseMessage[]): [ParsedMessage[], string] { + const chatHistory: ParsedMessage[] = []; + let instruction = ""; + + for (const message of history) { + if (typeof message.content !== "string") { + throw new Error( + "ChatYandexGPT does not support non-string message content." + ); + } + if ("content" in message) { + if (message._getType() === "human") { + chatHistory.push({ role: "user", text: message.content }); + } else if (message._getType() === "ai") { + chatHistory.push({ role: "assistant", text: message.content }); + } else if (message._getType() === "system") { + instruction = message.content; + } + } + } + + return [chatHistory, instruction]; +} + +/** + * @example + * ```typescript + * const chat = new ChatYandexGPT({}); + * // The assistant is set to translate English to French. + * const res = await chat.call([ + * new SystemMessage( + * "You are a helpful assistant that translates English to French." + * ), + * new HumanMessage("I love programming."), + * ]); + * console.log(res); + * ``` + */ +export class ChatYandexGPT extends BaseChatModel { + apiKey?: string; + + iamToken?: string; + + temperature = 0.6; + + maxTokens = 1700; + + model = "general"; + + constructor(fields?: YandexGPTInputs) { + super(fields ?? {}); + + const apiKey = fields?.apiKey ?? getEnvironmentVariable("YC_API_KEY"); + + const iamToken = fields?.iamToken ?? getEnvironmentVariable("YC_IAM_TOKEN"); + + if (apiKey === undefined && iamToken === undefined) { + throw new Error( + "Please set the YC_API_KEY or YC_IAM_TOKEN environment variable or pass it to the constructor as the apiKey or iamToken field." + ); + } + + this.apiKey = apiKey; + this.iamToken = iamToken; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.temperature = fields?.temperature ?? this.temperature; + this.model = fields?.model ?? this.model; + } + + _llmType() { + return "yandexgpt"; + } + + _combineLLMOutput?() { + return {}; + } + + /** @ignore */ + async _generate( + messages: BaseMessage[], + options: this["ParsedCallOptions"], + _?: CallbackManagerForLLMRun | undefined + ): Promise { + const [messageHistory, instruction] = _parseChatHistory(messages); + const headers = { "Content-Type": "application/json", Authorization: "" }; + if (this.apiKey !== undefined) { + headers.Authorization = `Api-Key ${this.apiKey}`; + } else { + headers.Authorization = `Bearer ${this.iamToken}`; + } + const bodyData = { + model: this.model, + generationOptions: { + temperature: this.temperature, + maxTokens: this.maxTokens, + }, + messages: messageHistory, + instructionText: instruction, + }; + const response = await fetch(apiUrl, { + method: "POST", + headers, + body: JSON.stringify(bodyData), + signal: options?.signal, + }); + if (!response.ok) { + throw new Error( + `Failed to fetch ${apiUrl} from YandexGPT: ${response.status}` + ); + } + const responseData = await response.json(); + const { result } = responseData; + const { text } = result.message; + const totalTokens = result.num_tokens; + const generations: ChatGeneration[] = [ + { text, message: new AIMessage(text) }, + ]; + + return { + generations, + llmOutput: { totalTokens }, + }; + } +} diff --git a/libs/langchain-community/src/index.ts b/libs/langchain-community/src/index.ts new file mode 100644 index 000000000000..564fb4a3c181 --- /dev/null +++ b/libs/langchain-community/src/index.ts @@ -0,0 +1,3 @@ +export * from "./chat_models.js"; +export * from "./llms.js"; +export * from "./vectorstores.js"; diff --git a/libs/langchain-community/src/llms.ts b/libs/langchain-community/src/llms.ts new file mode 100644 index 000000000000..c3ede8d1f29b --- /dev/null +++ b/libs/langchain-community/src/llms.ts @@ -0,0 +1,73 @@ +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; + +// Uncomment if implementing streaming + +// import { +// GenerationChunk, +// } from "@langchain/core/outputs"; + +/** + * Input to LLM class. + */ +export interface LLMIntegrationInput extends BaseLLMParams {} + +/** + * Integration with an LLM. + */ +export class LLMIntegration + extends LLM + implements LLMIntegrationInput +{ + // Used for tracing, replace with the same name as your class + static lc_name() { + return "LLMIntegration"; + } + + lc_serializable = true; + + constructor(fields: LLMIntegrationInput) { + super(fields); + } + + // Replace + _llmType() { + return "llm_integration"; + } + + /** + * For some given input string and options, return a string output. + */ + async _call( + _prompt: string, + _options: this["ParsedCallOptions"], + _runManager?: CallbackManagerForLLMRun + ): Promise { + throw new Error("Not implemented."); + } + + /** + * Implement to support streaming. + * Should yield chunks iteratively. + */ + // async *_streamResponseChunks( + // prompt: string, + // options: this["ParsedCallOptions"], + // runManager?: CallbackManagerForLLMRun + // ): AsyncGenerator { + // const stream = await this.caller.call(async () => + // createStream() + // ); + // for await (const chunk of stream) { + // yield new GenerationChunk({ + // text: chunk.response, + // generationInfo: { + // ...chunk, + // response: undefined, + // }, + // }); + // await runManager?.handleLLMNewToken(chunk.response ?? ""); + // } + // } +} diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts new file mode 100644 index 000000000000..d45a9c6d6227 --- /dev/null +++ b/libs/langchain-community/src/load/import_constants.ts @@ -0,0 +1,176 @@ +// Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. + +export const optionalImportEntrypoints = [ + "langchain/agents/load", + "langchain/agents/toolkits/aws_sfn", + "langchain/agents/toolkits/sql", + "langchain/tools/aws_lambda", + "langchain/tools/aws_sfn", + "langchain/tools/calculator", + "langchain/tools/sql", + "langchain/tools/webbrowser", + "langchain/tools/gmail", + "langchain/tools/google_calendar", + "langchain/chains/load", + "langchain/chains/query_constructor", + "langchain/chains/query_constructor/ir", + "langchain/chains/sql_db", + "langchain/chains/graph_qa/cypher", + "langchain/embeddings/bedrock", + "langchain/embeddings/cloudflare_workersai", + "langchain/embeddings/cohere", + "langchain/embeddings/tensorflow", + "langchain/embeddings/hf", + "langchain/embeddings/hf_transformers", + "langchain/embeddings/googlevertexai", + "langchain/embeddings/googlepalm", + "langchain/embeddings/llama_cpp", + "langchain/embeddings/gradient_ai", + "langchain/llms/load", + "langchain/llms/cohere", + "langchain/llms/hf", + "langchain/llms/raycast", + "langchain/llms/replicate", + "langchain/llms/googlevertexai", + "langchain/llms/googlevertexai/web", + "langchain/llms/googlepalm", + "langchain/llms/gradient_ai", + "langchain/llms/sagemaker_endpoint", + "langchain/llms/watsonx_ai", + "langchain/llms/bedrock", + "langchain/llms/bedrock/web", + "langchain/llms/llama_cpp", + "langchain/llms/writer", + "langchain/llms/portkey", + "langchain/prompts/load", + "langchain/vectorstores/clickhouse", + "langchain/vectorstores/analyticdb", + "langchain/vectorstores/cassandra", + "langchain/vectorstores/convex", + "langchain/vectorstores/elasticsearch", + "langchain/vectorstores/cloudflare_vectorize", + "langchain/vectorstores/closevector/web", + "langchain/vectorstores/closevector/node", + "langchain/vectorstores/chroma", + "langchain/vectorstores/googlevertexai", + "langchain/vectorstores/hnswlib", + "langchain/vectorstores/faiss", + "langchain/vectorstores/weaviate", + "langchain/vectorstores/lancedb", + "langchain/vectorstores/momento_vector_index", + "langchain/vectorstores/mongo", + "langchain/vectorstores/mongodb_atlas", + "langchain/vectorstores/pinecone", + "langchain/vectorstores/qdrant", + "langchain/vectorstores/supabase", + "langchain/vectorstores/opensearch", + "langchain/vectorstores/pgvector", + "langchain/vectorstores/milvus", + "langchain/vectorstores/neo4j_vector", + "langchain/vectorstores/typeorm", + "langchain/vectorstores/myscale", + "langchain/vectorstores/redis", + "langchain/vectorstores/rockset", + "langchain/vectorstores/typesense", + "langchain/vectorstores/singlestore", + "langchain/vectorstores/tigris", + "langchain/vectorstores/usearch", + "langchain/vectorstores/vercel_postgres", + "langchain/vectorstores/voy", + "langchain/vectorstores/zep", + "langchain/memory/zep", + "langchain/document_loaders/web/apify_dataset", + "langchain/document_loaders/web/assemblyai", + "langchain/document_loaders/web/azure_blob_storage_container", + "langchain/document_loaders/web/azure_blob_storage_file", + "langchain/document_loaders/web/cheerio", + "langchain/document_loaders/web/puppeteer", + "langchain/document_loaders/web/playwright", + "langchain/document_loaders/web/college_confidential", + "langchain/document_loaders/web/gitbook", + "langchain/document_loaders/web/hn", + "langchain/document_loaders/web/imsdb", + "langchain/document_loaders/web/figma", + "langchain/document_loaders/web/github", + "langchain/document_loaders/web/notiondb", + "langchain/document_loaders/web/notionapi", + "langchain/document_loaders/web/pdf", + "langchain/document_loaders/web/recursive_url", + "langchain/document_loaders/web/s3", + "langchain/document_loaders/web/sonix_audio", + "langchain/document_loaders/web/confluence", + "langchain/document_loaders/web/youtube", + "langchain/document_loaders/fs/directory", + "langchain/document_loaders/fs/buffer", + "langchain/document_loaders/fs/chatgpt", + "langchain/document_loaders/fs/text", + "langchain/document_loaders/fs/json", + "langchain/document_loaders/fs/srt", + "langchain/document_loaders/fs/pdf", + "langchain/document_loaders/fs/docx", + "langchain/document_loaders/fs/epub", + "langchain/document_loaders/fs/csv", + "langchain/document_loaders/fs/notion", + "langchain/document_loaders/fs/obsidian", + "langchain/document_loaders/fs/unstructured", + "langchain/document_loaders/fs/openai_whisper_audio", + "langchain/document_loaders/fs/pptx", + "langchain/document_transformers/html_to_text", + "langchain/document_transformers/mozilla_readability", + "langchain/chat_models/portkey", + "langchain/chat_models/bedrock", + "langchain/chat_models/bedrock/web", + "langchain/chat_models/googlevertexai", + "langchain/chat_models/googlevertexai/web", + "langchain/chat_models/googlepalm", + "langchain/chat_models/iflytek_xinghuo", + "langchain/chat_models/iflytek_xinghuo/web", + "langchain/chat_models/llama_cpp", + "langchain/sql_db", + "langchain/callbacks/handlers/llmonitor", + "langchain/output_parsers/expression", + "langchain/retrievers/amazon_kendra", + "langchain/retrievers/supabase", + "langchain/retrievers/zep", + "langchain/retrievers/metal", + "langchain/retrievers/self_query", + "langchain/retrievers/self_query/chroma", + "langchain/retrievers/self_query/functional", + "langchain/retrievers/self_query/pinecone", + "langchain/retrievers/self_query/supabase", + "langchain/retrievers/self_query/weaviate", + "langchain/retrievers/self_query/vectara", + "langchain/cache/cloudflare_kv", + "langchain/cache/momento", + "langchain/cache/redis", + "langchain/cache/ioredis", + "langchain/cache/file_system", + "langchain/cache/upstash_redis", + "langchain/stores/doc/gcs", + "langchain/stores/file/node", + "langchain/stores/message/cassandra", + "langchain/stores/message/convex", + "langchain/stores/message/cloudflare_d1", + "langchain/stores/message/dynamodb", + "langchain/stores/message/firestore", + "langchain/stores/message/momento", + "langchain/stores/message/mongodb", + "langchain/stores/message/redis", + "langchain/stores/message/ioredis", + "langchain/stores/message/upstash_redis", + "langchain/stores/message/planetscale", + "langchain/stores/message/xata", + "langchain/storage/convex", + "langchain/storage/ioredis", + "langchain/storage/vercel_kv", + "langchain/storage/upstash_redis", + "langchain/storage/file_system", + "langchain/graphs/neo4j_graph", + "langchain/hub", + "langchain/util/convex", + "langchain/experimental/multimodal_embeddings/googlevertexai", + "langchain/experimental/chat_models/anthropic_functions", + "langchain/experimental/llms/bittensor", + "langchain/experimental/hubs/makersuite/googlemakersuitehub", + "langchain/experimental/tools/pyinterpreter", +]; diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts new file mode 100644 index 000000000000..fa6ef233e228 --- /dev/null +++ b/libs/langchain-community/src/load/import_map.ts @@ -0,0 +1,108 @@ +// Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. + +export * as load__serializable from "../load/serializable.js"; +export * as agents from "../agents/index.js"; +export * as agents__toolkits from "../agents/toolkits/index.js"; +export * as agents__toolkits__connery from "../agents/toolkits/connery/index.js"; +export * as agents__format_scratchpad from "../agents/format_scratchpad/openai_functions.js"; +export * as agents__format_scratchpad__openai_tools from "../agents/format_scratchpad/openai_tools.js"; +export * as agents__format_scratchpad__log from "../agents/format_scratchpad/log.js"; +export * as agents__format_scratchpad__xml from "../agents/format_scratchpad/xml.js"; +export * as agents__format_scratchpad__log_to_message from "../agents/format_scratchpad/log_to_message.js"; +export * as agents__react__output_parser from "../agents/react/output_parser.js"; +export * as agents__xml__output_parser from "../agents/xml/output_parser.js"; +export * as agents__openai__output_parser from "../agents/openai/output_parser.js"; +export * as base_language from "../base_language/index.js"; +export * as tools from "../tools/index.js"; +export * as tools__connery from "../tools/connery.js"; +export * as tools__render from "../tools/render.js"; +export * as tools__google_places from "../tools/google_places.js"; +export * as chains from "../chains/index.js"; +export * as chains__combine_documents__reduce from "../chains/combine_documents/reduce.js"; +export * as chains__openai_functions from "../chains/openai_functions/index.js"; +export * as embeddings__base from "../embeddings/base.js"; +export * as embeddings__cache_backed from "../embeddings/cache_backed.js"; +export * as embeddings__fake from "../embeddings/fake.js"; +export * as embeddings__ollama from "../embeddings/ollama.js"; +export * as embeddings__openai from "../embeddings/openai.js"; +export * as embeddings__minimax from "../embeddings/minimax.js"; +export * as embeddings__voyage from "../embeddings/voyage.js"; +export * as llms__base from "../llms/base.js"; +export * as llms__openai from "../llms/openai.js"; +export * as llms__ai21 from "../llms/ai21.js"; +export * as llms__aleph_alpha from "../llms/aleph_alpha.js"; +export * as llms__cloudflare_workersai from "../llms/cloudflare_workersai.js"; +export * as llms__ollama from "../llms/ollama.js"; +export * as llms__fireworks from "../llms/fireworks.js"; +export * as llms__yandex from "../llms/yandex.js"; +export * as llms__fake from "../llms/fake.js"; +export * as prompts from "../prompts/index.js"; +export * as vectorstores__base from "../vectorstores/base.js"; +export * as vectorstores__memory from "../vectorstores/memory.js"; +export * as vectorstores__prisma from "../vectorstores/prisma.js"; +export * as vectorstores__vectara from "../vectorstores/vectara.js"; +export * as vectorstores__xata from "../vectorstores/xata.js"; +export * as text_splitter from "../text_splitter.js"; +export * as memory from "../memory/index.js"; +export * as document from "../document.js"; +export * as document_loaders__base from "../document_loaders/base.js"; +export * as document_loaders__web__searchapi from "../document_loaders/web/searchapi.js"; +export * as document_loaders__web__serpapi from "../document_loaders/web/serpapi.js"; +export * as document_loaders__web__sort_xyz_blockchain from "../document_loaders/web/sort_xyz_blockchain.js"; +export * as document_transformers__openai_functions from "../document_transformers/openai_functions.js"; +export * as chat_models__base from "../chat_models/base.js"; +export * as chat_models__openai from "../chat_models/openai.js"; +export * as chat_models__anthropic from "../chat_models/anthropic.js"; +export * as chat_models__cloudflare_workersai from "../chat_models/cloudflare_workersai.js"; +export * as chat_models__fireworks from "../chat_models/fireworks.js"; +export * as chat_models__baiduwenxin from "../chat_models/baiduwenxin.js"; +export * as chat_models__ollama from "../chat_models/ollama.js"; +export * as chat_models__minimax from "../chat_models/minimax.js"; +export * as chat_models__yandex from "../chat_models/yandex.js"; +export * as chat_models__fake from "../chat_models/fake.js"; +export * as schema from "../schema/index.js"; +export * as schema__document from "../schema/document.js"; +export * as schema__output_parser from "../schema/output_parser.js"; +export * as schema__prompt_template from "../schema/prompt_template.js"; +export * as schema__query_constructor from "../schema/query_constructor.js"; +export * as schema__retriever from "../schema/retriever.js"; +export * as schema__runnable from "../schema/runnable/index.js"; +export * as schema__storage from "../schema/storage.js"; +export * as callbacks from "../callbacks/index.js"; +export * as output_parsers from "../output_parsers/index.js"; +export * as retrievers__remote from "../retrievers/remote/index.js"; +export * as retrievers__chaindesk from "../retrievers/chaindesk.js"; +export * as retrievers__databerry from "../retrievers/databerry.js"; +export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js"; +export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js"; +export * as retrievers__multi_query from "../retrievers/multi_query.js"; +export * as retrievers__multi_vector from "../retrievers/multi_vector.js"; +export * as retrievers__parent_document from "../retrievers/parent_document.js"; +export * as retrievers__tavily_search_api from "../retrievers/tavily_search_api.js"; +export * as retrievers__time_weighted from "../retrievers/time_weighted.js"; +export * as retrievers__document_compressors__chain_extract from "../retrievers/document_compressors/chain_extract.js"; +export * as retrievers__document_compressors__embeddings_filter from "../retrievers/document_compressors/embeddings_filter.js"; +export * as retrievers__hyde from "../retrievers/hyde.js"; +export * as retrievers__score_threshold from "../retrievers/score_threshold.js"; +export * as retrievers__vespa from "../retrievers/vespa.js"; +export * as cache from "../cache/index.js"; +export * as stores__doc__in_memory from "../stores/doc/in_memory.js"; +export * as stores__file__in_memory from "../stores/file/in_memory.js"; +export * as stores__message__in_memory from "../stores/message/in_memory.js"; +export * as storage__encoder_backed from "../storage/encoder_backed.js"; +export * as storage__in_memory from "../storage/in_memory.js"; +export * as util__document from "../util/document.js"; +export * as util__math from "../util/math.js"; +export * as util__time from "../util/time.js"; +export * as experimental__autogpt from "../experimental/autogpt/index.js"; +export * as experimental__openai_assistant from "../experimental/openai_assistant/index.js"; +export * as experimental__openai_files from "../experimental/openai_files/index.js"; +export * as experimental__babyagi from "../experimental/babyagi/index.js"; +export * as experimental__generative_agents from "../experimental/generative_agents/index.js"; +export * as experimental__plan_and_execute from "../experimental/plan_and_execute/index.js"; +export * as experimental__chat_models__bittensor from "../experimental/chat_models/bittensor.js"; +export * as experimental__chat_models__ollama_functions from "../experimental/chat_models/ollama_functions.js"; +export * as experimental__chains__violation_of_expectations from "../experimental/chains/violation_of_expectations/index.js"; +export * as evaluation from "../evaluation/index.js"; +export * as runnables from "../runnables/index.js"; +export * as runnables__remote from "../runnables/remote.js"; diff --git a/libs/langchain-community/src/load/import_type.d.ts b/libs/langchain-community/src/load/import_type.d.ts new file mode 100644 index 000000000000..9b2b3abbde95 --- /dev/null +++ b/libs/langchain-community/src/load/import_type.d.ts @@ -0,0 +1,580 @@ +// Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. + +export interface OptionalImportMap { + "langchain/agents/load"?: + | typeof import("../agents/load.js") + | Promise; + "langchain/agents/toolkits/aws_sfn"?: + | typeof import("../agents/toolkits/aws_sfn.js") + | Promise; + "langchain/agents/toolkits/sql"?: + | typeof import("../agents/toolkits/sql/index.js") + | Promise; + "langchain/tools/aws_lambda"?: + | typeof import("../tools/aws_lambda.js") + | Promise; + "langchain/tools/aws_sfn"?: + | typeof import("../tools/aws_sfn.js") + | Promise; + "langchain/tools/calculator"?: + | typeof import("../tools/calculator.js") + | Promise; + "langchain/tools/sql"?: + | typeof import("../tools/sql.js") + | Promise; + "langchain/tools/webbrowser"?: + | typeof import("../tools/webbrowser.js") + | Promise; + "langchain/tools/gmail"?: + | typeof import("../tools/gmail/index.js") + | Promise; + "langchain/tools/google_calendar"?: + | typeof import("../tools/google_calendar/index.js") + | Promise; + "langchain/chains/load"?: + | typeof import("../chains/load.js") + | Promise; + "langchain/chains/query_constructor"?: + | typeof import("../chains/query_constructor/index.js") + | Promise; + "langchain/chains/query_constructor/ir"?: + | typeof import("../chains/query_constructor/ir.js") + | Promise; + "langchain/chains/sql_db"?: + | typeof import("../chains/sql_db/index.js") + | Promise; + "langchain/chains/graph_qa/cypher"?: + | typeof import("../chains/graph_qa/cypher.js") + | Promise; + "langchain/embeddings/bedrock"?: + | typeof import("../embeddings/bedrock.js") + | Promise; + "langchain/embeddings/cloudflare_workersai"?: + | typeof import("../embeddings/cloudflare_workersai.js") + | Promise; + "langchain/embeddings/cohere"?: + | typeof import("../embeddings/cohere.js") + | Promise; + "langchain/embeddings/tensorflow"?: + | typeof import("../embeddings/tensorflow.js") + | Promise; + "langchain/embeddings/hf"?: + | typeof import("../embeddings/hf.js") + | Promise; + "langchain/embeddings/hf_transformers"?: + | typeof import("../embeddings/hf_transformers.js") + | Promise; + "langchain/embeddings/googlevertexai"?: + | typeof import("../embeddings/googlevertexai.js") + | Promise; + "langchain/embeddings/googlepalm"?: + | typeof import("../embeddings/googlepalm.js") + | Promise; + "langchain/embeddings/llama_cpp"?: + | typeof import("../embeddings/llama_cpp.js") + | Promise; + "langchain/embeddings/gradient_ai"?: + | typeof import("../embeddings/gradient_ai.js") + | Promise; + "langchain/llms/load"?: + | typeof import("../llms/load.js") + | Promise; + "langchain/llms/cohere"?: + | typeof import("../llms/cohere.js") + | Promise; + "langchain/llms/hf"?: + | typeof import("../llms/hf.js") + | Promise; + "langchain/llms/raycast"?: + | typeof import("../llms/raycast.js") + | Promise; + "langchain/llms/replicate"?: + | typeof import("../llms/replicate.js") + | Promise; + "langchain/llms/googlevertexai"?: + | typeof import("../llms/googlevertexai/index.js") + | Promise; + "langchain/llms/googlevertexai/web"?: + | typeof import("../llms/googlevertexai/web.js") + | Promise; + "langchain/llms/googlepalm"?: + | typeof import("../llms/googlepalm.js") + | Promise; + "langchain/llms/gradient_ai"?: + | typeof import("../llms/gradient_ai.js") + | Promise; + "langchain/llms/sagemaker_endpoint"?: + | typeof import("../llms/sagemaker_endpoint.js") + | Promise; + "langchain/llms/watsonx_ai"?: + | typeof import("../llms/watsonx_ai.js") + | Promise; + "langchain/llms/bedrock"?: + | typeof import("../llms/bedrock/index.js") + | Promise; + "langchain/llms/bedrock/web"?: + | typeof import("../llms/bedrock/web.js") + | Promise; + "langchain/llms/llama_cpp"?: + | typeof import("../llms/llama_cpp.js") + | Promise; + "langchain/llms/writer"?: + | typeof import("../llms/writer.js") + | Promise; + "langchain/llms/portkey"?: + | typeof import("../llms/portkey.js") + | Promise; + "langchain/prompts/load"?: + | typeof import("../prompts/load.js") + | Promise; + "langchain/vectorstores/clickhouse"?: + | typeof import("../vectorstores/clickhouse.js") + | Promise; + "langchain/vectorstores/analyticdb"?: + | typeof import("../vectorstores/analyticdb.js") + | Promise; + "langchain/vectorstores/cassandra"?: + | typeof import("../vectorstores/cassandra.js") + | Promise; + "langchain/vectorstores/convex"?: + | typeof import("../vectorstores/convex.js") + | Promise; + "langchain/vectorstores/elasticsearch"?: + | typeof import("../vectorstores/elasticsearch.js") + | Promise; + "langchain/vectorstores/cloudflare_vectorize"?: + | typeof import("../vectorstores/cloudflare_vectorize.js") + | Promise; + "langchain/vectorstores/closevector/web"?: + | typeof import("../vectorstores/closevector/web.js") + | Promise; + "langchain/vectorstores/closevector/node"?: + | typeof import("../vectorstores/closevector/node.js") + | Promise; + "langchain/vectorstores/chroma"?: + | typeof import("../vectorstores/chroma.js") + | Promise; + "langchain/vectorstores/googlevertexai"?: + | typeof import("../vectorstores/googlevertexai.js") + | Promise; + "langchain/vectorstores/hnswlib"?: + | typeof import("../vectorstores/hnswlib.js") + | Promise; + "langchain/vectorstores/faiss"?: + | typeof import("../vectorstores/faiss.js") + | Promise; + "langchain/vectorstores/weaviate"?: + | typeof import("../vectorstores/weaviate.js") + | Promise; + "langchain/vectorstores/lancedb"?: + | typeof import("../vectorstores/lancedb.js") + | Promise; + "langchain/vectorstores/momento_vector_index"?: + | typeof import("../vectorstores/momento_vector_index.js") + | Promise; + "langchain/vectorstores/mongo"?: + | typeof import("../vectorstores/mongo.js") + | Promise; + "langchain/vectorstores/mongodb_atlas"?: + | typeof import("../vectorstores/mongodb_atlas.js") + | Promise; + "langchain/vectorstores/pinecone"?: + | typeof import("../vectorstores/pinecone.js") + | Promise; + "langchain/vectorstores/qdrant"?: + | typeof import("../vectorstores/qdrant.js") + | Promise; + "langchain/vectorstores/supabase"?: + | typeof import("../vectorstores/supabase.js") + | Promise; + "langchain/vectorstores/opensearch"?: + | typeof import("../vectorstores/opensearch.js") + | Promise; + "langchain/vectorstores/pgvector"?: + | typeof import("../vectorstores/pgvector.js") + | Promise; + "langchain/vectorstores/milvus"?: + | typeof import("../vectorstores/milvus.js") + | Promise; + "langchain/vectorstores/neo4j_vector"?: + | typeof import("../vectorstores/neo4j_vector.js") + | Promise; + "langchain/vectorstores/typeorm"?: + | typeof import("../vectorstores/typeorm.js") + | Promise; + "langchain/vectorstores/myscale"?: + | typeof import("../vectorstores/myscale.js") + | Promise; + "langchain/vectorstores/redis"?: + | typeof import("../vectorstores/redis.js") + | Promise; + "langchain/vectorstores/rockset"?: + | typeof import("../vectorstores/rockset.js") + | Promise; + "langchain/vectorstores/typesense"?: + | typeof import("../vectorstores/typesense.js") + | Promise; + "langchain/vectorstores/singlestore"?: + | typeof import("../vectorstores/singlestore.js") + | Promise; + "langchain/vectorstores/tigris"?: + | typeof import("../vectorstores/tigris.js") + | Promise; + "langchain/vectorstores/usearch"?: + | typeof import("../vectorstores/usearch.js") + | Promise; + "langchain/vectorstores/vercel_postgres"?: + | typeof import("../vectorstores/vercel_postgres.js") + | Promise; + "langchain/vectorstores/voy"?: + | typeof import("../vectorstores/voy.js") + | Promise; + "langchain/vectorstores/zep"?: + | typeof import("../vectorstores/zep.js") + | Promise; + "langchain/memory/zep"?: + | typeof import("../memory/zep.js") + | Promise; + "langchain/document_loaders/web/apify_dataset"?: + | typeof import("../document_loaders/web/apify_dataset.js") + | Promise; + "langchain/document_loaders/web/assemblyai"?: + | typeof import("../document_loaders/web/assemblyai.js") + | Promise; + "langchain/document_loaders/web/azure_blob_storage_container"?: + | typeof import("../document_loaders/web/azure_blob_storage_container.js") + | Promise; + "langchain/document_loaders/web/azure_blob_storage_file"?: + | typeof import("../document_loaders/web/azure_blob_storage_file.js") + | Promise; + "langchain/document_loaders/web/cheerio"?: + | typeof import("../document_loaders/web/cheerio.js") + | Promise; + "langchain/document_loaders/web/puppeteer"?: + | typeof import("../document_loaders/web/puppeteer.js") + | Promise; + "langchain/document_loaders/web/playwright"?: + | typeof import("../document_loaders/web/playwright.js") + | Promise; + "langchain/document_loaders/web/college_confidential"?: + | typeof import("../document_loaders/web/college_confidential.js") + | Promise; + "langchain/document_loaders/web/gitbook"?: + | typeof import("../document_loaders/web/gitbook.js") + | Promise; + "langchain/document_loaders/web/hn"?: + | typeof import("../document_loaders/web/hn.js") + | Promise; + "langchain/document_loaders/web/imsdb"?: + | typeof import("../document_loaders/web/imsdb.js") + | Promise; + "langchain/document_loaders/web/figma"?: + | typeof import("../document_loaders/web/figma.js") + | Promise; + "langchain/document_loaders/web/github"?: + | typeof import("../document_loaders/web/github.js") + | Promise; + "langchain/document_loaders/web/notiondb"?: + | typeof import("../document_loaders/web/notiondb.js") + | Promise; + "langchain/document_loaders/web/notionapi"?: + | typeof import("../document_loaders/web/notionapi.js") + | Promise; + "langchain/document_loaders/web/pdf"?: + | typeof import("../document_loaders/web/pdf.js") + | Promise; + "langchain/document_loaders/web/recursive_url"?: + | typeof import("../document_loaders/web/recursive_url.js") + | Promise; + "langchain/document_loaders/web/s3"?: + | typeof import("../document_loaders/web/s3.js") + | Promise; + "langchain/document_loaders/web/sonix_audio"?: + | typeof import("../document_loaders/web/sonix_audio.js") + | Promise; + "langchain/document_loaders/web/confluence"?: + | typeof import("../document_loaders/web/confluence.js") + | Promise; + "langchain/document_loaders/web/youtube"?: + | typeof import("../document_loaders/web/youtube.js") + | Promise; + "langchain/document_loaders/fs/directory"?: + | typeof import("../document_loaders/fs/directory.js") + | Promise; + "langchain/document_loaders/fs/buffer"?: + | typeof import("../document_loaders/fs/buffer.js") + | Promise; + "langchain/document_loaders/fs/chatgpt"?: + | typeof import("../document_loaders/fs/chatgpt.js") + | Promise; + "langchain/document_loaders/fs/text"?: + | typeof import("../document_loaders/fs/text.js") + | Promise; + "langchain/document_loaders/fs/json"?: + | typeof import("../document_loaders/fs/json.js") + | Promise; + "langchain/document_loaders/fs/srt"?: + | typeof import("../document_loaders/fs/srt.js") + | Promise; + "langchain/document_loaders/fs/pdf"?: + | typeof import("../document_loaders/fs/pdf.js") + | Promise; + "langchain/document_loaders/fs/docx"?: + | typeof import("../document_loaders/fs/docx.js") + | Promise; + "langchain/document_loaders/fs/epub"?: + | typeof import("../document_loaders/fs/epub.js") + | Promise; + "langchain/document_loaders/fs/csv"?: + | typeof import("../document_loaders/fs/csv.js") + | Promise; + "langchain/document_loaders/fs/notion"?: + | typeof import("../document_loaders/fs/notion.js") + | Promise; + "langchain/document_loaders/fs/obsidian"?: + | typeof import("../document_loaders/fs/obsidian.js") + | Promise; + "langchain/document_loaders/fs/unstructured"?: + | typeof import("../document_loaders/fs/unstructured.js") + | Promise; + "langchain/document_loaders/fs/openai_whisper_audio"?: + | typeof import("../document_loaders/fs/openai_whisper_audio.js") + | Promise; + "langchain/document_loaders/fs/pptx"?: + | typeof import("../document_loaders/fs/pptx.js") + | Promise; + "langchain/document_transformers/html_to_text"?: + | typeof import("../document_transformers/html_to_text.js") + | Promise; + "langchain/document_transformers/mozilla_readability"?: + | typeof import("../document_transformers/mozilla_readability.js") + | Promise; + "langchain/chat_models/portkey"?: + | typeof import("../chat_models/portkey.js") + | Promise; + "langchain/chat_models/bedrock"?: + | typeof import("../chat_models/bedrock/index.js") + | Promise; + "langchain/chat_models/bedrock/web"?: + | typeof import("../chat_models/bedrock/web.js") + | Promise; + "langchain/chat_models/googlevertexai"?: + | typeof import("../chat_models/googlevertexai/index.js") + | Promise; + "langchain/chat_models/googlevertexai/web"?: + | typeof import("../chat_models/googlevertexai/web.js") + | Promise; + "langchain/chat_models/googlepalm"?: + | typeof import("../chat_models/googlepalm.js") + | Promise; + "langchain/chat_models/iflytek_xinghuo"?: + | typeof import("../chat_models/iflytek_xinghuo/index.js") + | Promise; + "langchain/chat_models/iflytek_xinghuo/web"?: + | typeof import("../chat_models/iflytek_xinghuo/web.js") + | Promise; + "langchain/chat_models/llama_cpp"?: + | typeof import("../chat_models/llama_cpp.js") + | Promise; + "langchain/sql_db"?: + | typeof import("../sql_db.js") + | Promise; + "langchain/callbacks/handlers/llmonitor"?: + | typeof import("../callbacks/handlers/llmonitor.js") + | Promise; + "langchain/output_parsers/expression"?: + | typeof import("../output_parsers/expression.js") + | Promise; + "langchain/retrievers/amazon_kendra"?: + | typeof import("../retrievers/amazon_kendra.js") + | Promise; + "langchain/retrievers/supabase"?: + | typeof import("../retrievers/supabase.js") + | Promise; + "langchain/retrievers/zep"?: + | typeof import("../retrievers/zep.js") + | Promise; + "langchain/retrievers/metal"?: + | typeof import("../retrievers/metal.js") + | Promise; + "langchain/retrievers/self_query"?: + | typeof import("../retrievers/self_query/index.js") + | Promise; + "langchain/retrievers/self_query/chroma"?: + | typeof import("../retrievers/self_query/chroma.js") + | Promise; + "langchain/retrievers/self_query/functional"?: + | typeof import("../retrievers/self_query/functional.js") + | Promise; + "langchain/retrievers/self_query/pinecone"?: + | typeof import("../retrievers/self_query/pinecone.js") + | Promise; + "langchain/retrievers/self_query/supabase"?: + | typeof import("../retrievers/self_query/supabase.js") + | Promise; + "langchain/retrievers/self_query/weaviate"?: + | typeof import("../retrievers/self_query/weaviate.js") + | Promise; + "langchain/retrievers/self_query/vectara"?: + | typeof import("../retrievers/self_query/vectara.js") + | Promise; + "langchain/cache/cloudflare_kv"?: + | typeof import("../cache/cloudflare_kv.js") + | Promise; + "langchain/cache/momento"?: + | typeof import("../cache/momento.js") + | Promise; + "langchain/cache/redis"?: + | typeof import("../cache/redis.js") + | Promise; + "langchain/cache/ioredis"?: + | typeof import("../cache/ioredis.js") + | Promise; + "langchain/cache/file_system"?: + | typeof import("../cache/file_system.js") + | Promise; + "langchain/cache/upstash_redis"?: + | typeof import("../cache/upstash_redis.js") + | Promise; + "langchain/stores/doc/gcs"?: + | typeof import("../stores/doc/gcs.js") + | Promise; + "langchain/stores/file/node"?: + | typeof import("../stores/file/node.js") + | Promise; + "langchain/stores/message/cassandra"?: + | typeof import("../stores/message/cassandra.js") + | Promise; + "langchain/stores/message/convex"?: + | typeof import("../stores/message/convex.js") + | Promise; + "langchain/stores/message/cloudflare_d1"?: + | typeof import("../stores/message/cloudflare_d1.js") + | Promise; + "langchain/stores/message/dynamodb"?: + | typeof import("../stores/message/dynamodb.js") + | Promise; + "langchain/stores/message/firestore"?: + | typeof import("../stores/message/firestore.js") + | Promise; + "langchain/stores/message/momento"?: + | typeof import("../stores/message/momento.js") + | Promise; + "langchain/stores/message/mongodb"?: + | typeof import("../stores/message/mongodb.js") + | Promise; + "langchain/stores/message/redis"?: + | typeof import("../stores/message/redis.js") + | Promise; + "langchain/stores/message/ioredis"?: + | typeof import("../stores/message/ioredis.js") + | Promise; + "langchain/stores/message/upstash_redis"?: + | typeof import("../stores/message/upstash_redis.js") + | Promise; + "langchain/stores/message/planetscale"?: + | typeof import("../stores/message/planetscale.js") + | Promise; + "langchain/stores/message/xata"?: + | typeof import("../stores/message/xata.js") + | Promise; + "langchain/storage/convex"?: + | typeof import("../storage/convex.js") + | Promise; + "langchain/storage/ioredis"?: + | typeof import("../storage/ioredis.js") + | Promise; + "langchain/storage/vercel_kv"?: + | typeof import("../storage/vercel_kv.js") + | Promise; + "langchain/storage/upstash_redis"?: + | typeof import("../storage/upstash_redis.js") + | Promise; + "langchain/storage/file_system"?: + | typeof import("../storage/file_system.js") + | Promise; + "langchain/graphs/neo4j_graph"?: + | typeof import("../graphs/neo4j_graph.js") + | Promise; + "langchain/hub"?: + | typeof import("../hub.js") + | Promise; + "langchain/util/convex"?: + | typeof import("../util/convex.js") + | Promise; + "langchain/experimental/multimodal_embeddings/googlevertexai"?: + | typeof import("../experimental/multimodal_embeddings/googlevertexai.js") + | Promise; + "langchain/experimental/chat_models/anthropic_functions"?: + | typeof import("../experimental/chat_models/anthropic_functions.js") + | Promise; + "langchain/experimental/llms/bittensor"?: + | typeof import("../experimental/llms/bittensor.js") + | Promise; + "langchain/experimental/hubs/makersuite/googlemakersuitehub"?: + | typeof import("../experimental/hubs/makersuite/googlemakersuitehub.js") + | Promise; + "langchain/experimental/tools/pyinterpreter"?: + | typeof import("../experimental/tools/pyinterpreter.js") + | Promise; +} + +export interface SecretMap { + ANTHROPIC_API_KEY?: string; + AWS_ACCESS_KEY_ID?: string; + AWS_SECRETE_ACCESS_KEY?: string; + AWS_SECRET_ACCESS_KEY?: string; + AWS_SESSION_TOKEN?: string; + AZURE_OPENAI_API_KEY?: string; + BAIDU_API_KEY?: string; + BAIDU_SECRET_KEY?: string; + BEDROCK_AWS_ACCESS_KEY_ID?: string; + BEDROCK_AWS_SECRET_ACCESS_KEY?: string; + CLOUDFLARE_API_TOKEN?: string; + COHERE_API_KEY?: string; + DATABERRY_API_KEY?: string; + FIREWORKS_API_KEY?: string; + GOOGLE_API_KEY?: string; + GOOGLE_PALM_API_KEY?: string; + GOOGLE_PLACES_API_KEY?: string; + GOOGLE_VERTEX_AI_WEB_CREDENTIALS?: string; + GRADIENT_ACCESS_TOKEN?: string; + GRADIENT_WORKSPACE_ID?: string; + HUGGINGFACEHUB_API_KEY?: string; + IBM_CLOUD_API_KEY?: string; + IFLYTEK_API_KEY?: string; + IFLYTEK_API_SECRET?: string; + MILVUS_PASSWORD?: string; + MILVUS_SSL?: string; + MILVUS_USERNAME?: string; + MINIMAX_API_KEY?: string; + MINIMAX_GROUP_ID?: string; + OPENAI_API_KEY?: string; + OPENAI_ORGANIZATION?: string; + PLANETSCALE_DATABASE_URL?: string; + PLANETSCALE_HOST?: string; + PLANETSCALE_PASSWORD?: string; + PLANETSCALE_USERNAME?: string; + PROMPTLAYER_API_KEY?: string; + QDRANT_API_KEY?: string; + QDRANT_URL?: string; + REDIS_PASSWORD?: string; + REDIS_URL?: string; + REDIS_USERNAME?: string; + REMOTE_RETRIEVER_AUTH_BEARER?: string; + REPLICATE_API_TOKEN?: string; + SEARXNG_API_BASE?: string; + UPSTASH_REDIS_REST_TOKEN?: string; + UPSTASH_REDIS_REST_URL?: string; + VECTARA_API_KEY?: string; + VECTARA_CORPUS_ID?: string; + VECTARA_CUSTOMER_ID?: string; + WATSONX_PROJECT_ID?: string; + WRITER_API_KEY?: string; + WRITER_ORG_ID?: string; + YC_API_KEY?: string; + YC_IAM_TOKEN?: string; + ZAPIER_NLA_API_KEY?: string; + ZEP_API_KEY?: string; + ZEP_API_URL?: string; +} diff --git a/libs/langchain-community/src/load/index.ts b/libs/langchain-community/src/load/index.ts new file mode 100644 index 000000000000..0f4fe863a1e7 --- /dev/null +++ b/libs/langchain-community/src/load/index.ts @@ -0,0 +1,18 @@ +import { load as coreLoad } from "@langchain/core/load"; +import { optionalImportEntrypoints } from "./import_constants.js"; +import * as importMap from "./import_map.js"; +import { OptionalImportMap } from "./import_type.js"; + +export async function load( + text: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + secretsMap: Record = {}, + optionalImportsMap: OptionalImportMap = {} +): Promise { + return coreLoad(text, { + secretsMap, + optionalImportsMap, + optionalImportEntrypoints, + importMap, + }); +} diff --git a/libs/langchain-community/src/load/map_keys.ts b/libs/langchain-community/src/load/map_keys.ts new file mode 100644 index 000000000000..93a0ea6e4fa7 --- /dev/null +++ b/libs/langchain-community/src/load/map_keys.ts @@ -0,0 +1,4 @@ +export interface SerializedFields { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [key: string]: any; +} diff --git a/libs/langchain-community/src/load/serializable.ts b/libs/langchain-community/src/load/serializable.ts new file mode 100644 index 000000000000..b196ae7faac5 --- /dev/null +++ b/libs/langchain-community/src/load/serializable.ts @@ -0,0 +1 @@ +export * from "@langchain/core/load/serializable"; diff --git a/libs/langchain-community/src/tests/chat_models.test.ts b/libs/langchain-community/src/tests/chat_models.test.ts new file mode 100644 index 000000000000..5d609f496501 --- /dev/null +++ b/libs/langchain-community/src/tests/chat_models.test.ts @@ -0,0 +1,5 @@ +import { test } from "@jest/globals"; + +test("Test chat model", async () => { + // Your test here +}); diff --git a/libs/langchain-community/src/tests/integration.int.test.ts b/libs/langchain-community/src/tests/integration.int.test.ts new file mode 100644 index 000000000000..7fce4ce53302 --- /dev/null +++ b/libs/langchain-community/src/tests/integration.int.test.ts @@ -0,0 +1,5 @@ +import { test } from "@jest/globals"; + +test("Test chat model", async () => { + // Your integration test here +}); diff --git a/libs/langchain-community/src/tests/llms.test.ts b/libs/langchain-community/src/tests/llms.test.ts new file mode 100644 index 000000000000..3428ecaaf599 --- /dev/null +++ b/libs/langchain-community/src/tests/llms.test.ts @@ -0,0 +1,5 @@ +import { test } from "@jest/globals"; + +test("Test LLM", async () => { + // Your test here +}); diff --git a/libs/langchain-community/src/tests/vectorstores.test.ts b/libs/langchain-community/src/tests/vectorstores.test.ts new file mode 100644 index 000000000000..023cfbd8b77c --- /dev/null +++ b/libs/langchain-community/src/tests/vectorstores.test.ts @@ -0,0 +1,5 @@ +import { test } from "@jest/globals"; + +test("Test vectorstore", async () => { + // Your test here +}); diff --git a/libs/langchain-community/src/types/assemblyai-types.ts b/libs/langchain-community/src/types/assemblyai-types.ts new file mode 100644 index 000000000000..583248940a1d --- /dev/null +++ b/libs/langchain-community/src/types/assemblyai-types.ts @@ -0,0 +1,6 @@ +import { BaseServiceParams } from "assemblyai"; +import { Optional } from "./type-utils.js"; + +export type * from "assemblyai"; + +export type AssemblyAIOptions = Optional; diff --git a/libs/langchain-community/src/types/expression-parser.d.ts b/libs/langchain-community/src/types/expression-parser.d.ts new file mode 100644 index 000000000000..87e7e2ef57e7 --- /dev/null +++ b/libs/langchain-community/src/types/expression-parser.d.ts @@ -0,0 +1,91 @@ +declare interface ParseOptions { + filename?: string; + startRule?: "Start"; + tracer?: any; + [key: string]: any; +} +declare type ParseFunction = ( + input: string, + options?: Options +) => Options extends { startRule: infer StartRule } + ? StartRule extends "Start" + ? Start + : Start + : Start; + +// These types were autogenerated by ts-pegjs +declare type Start = Program; +declare type Identifier = IdentifierName; +declare type IdentifierName = { type: "Identifier"; name: string }; +declare type Literal = + | NullLiteral + | BooleanLiteral + | NumericLiteral + | StringLiteral; +declare type NullLiteral = { type: "NullLiteral"; value: null }; +declare type BooleanLiteral = + | { type: "BooleanLiteral"; value: true } + | { type: "BooleanLiteral"; value: false }; +declare type NumericLiteral = DecimalLiteral; +declare type DecimalLiteral = { type: "NumericLiteral"; value: number }; +declare type StringLiteral = { type: "StringLiteral"; value: string }; +declare type PrimaryExpression = + | Identifier + | Literal + | ArrayExpression + | ObjectExpression + | Expression; +declare type ArrayExpression = { + type: "ArrayExpression"; + elements: ElementList; +}; +declare type ElementList = PrimaryExpression[]; +declare type ObjectExpression = + | { type: "ObjectExpression"; properties: [] } + | { type: "ObjectExpression"; properties: PropertyNameAndValueList }; +declare type PropertyNameAndValueList = PrimaryExpression[]; +declare type PropertyAssignment = { + type: "PropertyAssignment"; + key: PropertyName; + value: Expression; + kind: "init"; +}; +declare type PropertyName = IdentifierName | StringLiteral | NumericLiteral; +declare type MemberExpression = + | { + type: "MemberExpression"; + property: StringLiteral; + computed: true; + object: MemberExpression | Identifier | StringLiteral; + } + | { + type: "MemberExpression"; + property: Identifier; + computed: false; + object: MemberExpression | Identifier | StringLiteral; + }; +declare type CallExpression = { + type: "CallExpression"; + arguments: Arguments; + callee: MemberExpression | Identifier; +}; +declare type Arguments = PrimaryExpression[]; +declare type Expression = CallExpression | MemberExpression; +declare type ExpressionStatement = { + type: "ExpressionStatement"; + expression: Expression; +}; +declare type Program = { type: "Program"; body: ExpressionStatement }; +declare type ExpressionNode = + | Program + | ExpressionStatement + | ArrayExpression + | BooleanLiteral + | CallExpression + | Identifier + | MemberExpression + | NumericLiteral + | ObjectExpression + | PropertyAssignment + | NullLiteral + | StringLiteral; diff --git a/libs/langchain-community/src/types/googlevertexai-types.ts b/libs/langchain-community/src/types/googlevertexai-types.ts new file mode 100644 index 000000000000..f65694cacd49 --- /dev/null +++ b/libs/langchain-community/src/types/googlevertexai-types.ts @@ -0,0 +1,89 @@ +import type { BaseLLMParams } from "@langchain/core/language_models/llms"; + +export interface GoogleConnectionParams { + authOptions?: AuthOptions; +} + +export interface GoogleVertexAIConnectionParams + extends GoogleConnectionParams { + /** Hostname for the API call */ + endpoint?: string; + + /** Region where the LLM is stored */ + location?: string; + + /** The version of the API functions. Part of the path. */ + apiVersion?: string; +} + +export interface GoogleVertexAIModelParams { + /** Model to use */ + model?: string; + + /** Sampling temperature to use */ + temperature?: number; + + /** + * Maximum number of tokens to generate in the completion. + */ + maxOutputTokens?: number; + + /** + * Top-p changes how the model selects tokens for output. + * + * Tokens are selected from most probable to least until the sum + * of their probabilities equals the top-p value. + * + * For example, if tokens A, B, and C have a probability of + * .3, .2, and .1 and the top-p value is .5, then the model will + * select either A or B as the next token (using temperature). + */ + topP?: number; + + /** + * Top-k changes how the model selects tokens for output. + * + * A top-k of 1 means the selected token is the most probable among + * all tokens in the model’s vocabulary (also called greedy decoding), + * while a top-k of 3 means that the next token is selected from + * among the 3 most probable tokens (using temperature). + */ + topK?: number; +} + +export interface GoogleVertexAIBaseLLMInput + extends BaseLLMParams, + GoogleVertexAIConnectionParams, + GoogleVertexAIModelParams {} + +export interface GoogleResponse { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + data: any; +} + +export interface GoogleVertexAIBasePrediction { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + safetyAttributes?: any; +} + +export interface GoogleVertexAILLMPredictions< + PredictionType extends GoogleVertexAIBasePrediction +> { + predictions: PredictionType[]; +} + +export type GoogleAbstractedClientOpsMethod = "GET" | "POST"; + +export type GoogleAbstractedClientOpsResponseType = "json" | "stream"; + +export type GoogleAbstractedClientOps = { + url?: string; + method?: GoogleAbstractedClientOpsMethod; + data?: unknown; + responseType?: GoogleAbstractedClientOpsResponseType; +}; + +export interface GoogleAbstractedClient { + request: (opts: GoogleAbstractedClientOps) => unknown; + getProjectId: () => Promise; +} diff --git a/libs/langchain-community/src/types/openai-types.ts b/libs/langchain-community/src/types/openai-types.ts new file mode 100644 index 000000000000..f3df0278a6a9 --- /dev/null +++ b/libs/langchain-community/src/types/openai-types.ts @@ -0,0 +1,172 @@ +import type { OpenAI as OpenAIClient } from "openai"; + +import { TiktokenModel } from "js-tiktoken/lite"; +import { BaseLanguageModelCallOptions } from "../base_language/index.js"; + +// reexport this type from the included package so we can easily override and extend it if needed in the future +// also makes it easier for folks to import this type without digging around into the dependent packages +export type { TiktokenModel }; + +export declare interface OpenAIBaseInput { + /** Sampling temperature to use */ + temperature: number; + + /** + * Maximum number of tokens to generate in the completion. -1 returns as many + * tokens as possible given the prompt and the model's maximum context size. + */ + maxTokens?: number; + + /** Total probability mass of tokens to consider at each step */ + topP: number; + + /** Penalizes repeated tokens according to frequency */ + frequencyPenalty: number; + + /** Penalizes repeated tokens */ + presencePenalty: number; + + /** Number of completions to generate for each prompt */ + n: number; + + /** Dictionary used to adjust the probability of specific tokens being generated */ + logitBias?: Record; + + /** Unique string identifier representing your end-user, which can help OpenAI to monitor and detect abuse. */ + user?: string; + + /** Whether to stream the results or not. Enabling disables tokenUsage reporting */ + streaming: boolean; + + /** Model name to use */ + modelName: string; + + /** Holds any additional parameters that are valid to pass to {@link + * https://platform.openai.com/docs/api-reference/completions/create | + * `openai.createCompletion`} that are not explicitly specified on this class. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + modelKwargs?: Record; + + /** List of stop words to use when generating */ + stop?: string[]; + + /** + * Timeout to use when making requests to OpenAI. + */ + timeout?: number; + + /** + * API key to use when making requests to OpenAI. Defaults to the value of + * `OPENAI_API_KEY` environment variable. + */ + openAIApiKey?: string; +} + +// TODO use OpenAI.Core.RequestOptions when SDK is updated to make it available +export type OpenAICoreRequestOptions< + Req extends object = Record +> = { + path?: string; + query?: Req | undefined; + body?: Req | undefined; + headers?: Record | undefined; + + maxRetries?: number; + stream?: boolean | undefined; + timeout?: number; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + httpAgent?: any; + signal?: AbortSignal | undefined | null; + idempotencyKey?: string; +}; + +export interface OpenAICallOptions extends BaseLanguageModelCallOptions { + /** + * Additional options to pass to the underlying axios request. + */ + options?: OpenAICoreRequestOptions; +} + +/** + * Input to OpenAI class. + */ +export declare interface OpenAIInput extends OpenAIBaseInput { + /** Generates `bestOf` completions server side and returns the "best" */ + bestOf?: number; + + /** Batch size to use when passing multiple documents to generate */ + batchSize: number; +} + +/** + * @deprecated Use "baseURL", "defaultHeaders", and "defaultParams" instead. + */ +export interface LegacyOpenAIInput { + /** @deprecated Use baseURL instead */ + basePath?: string; + /** @deprecated Use defaultHeaders and defaultQuery instead */ + baseOptions?: { + headers?: Record; + params?: Record; + }; +} + +export interface OpenAIChatInput extends OpenAIBaseInput { + /** ChatGPT messages to pass as a prefix to the prompt */ + prefixMessages?: OpenAIClient.Chat.CreateChatCompletionRequestMessage[]; +} + +export declare interface AzureOpenAIInput { + /** + * API version to use when making requests to Azure OpenAI. + */ + azureOpenAIApiVersion?: string; + + /** + * API key to use when making requests to Azure OpenAI. + */ + azureOpenAIApiKey?: string; + + /** + * Azure OpenAI API instance name to use when making requests to Azure OpenAI. + * this is the name of the instance you created in the Azure portal. + * e.g. "my-openai-instance" + * this will be used in the endpoint URL: https://my-openai-instance.openai.azure.com/openai/deployments/{DeploymentName}/ + */ + azureOpenAIApiInstanceName?: string; + + /** + * Azure OpenAI API deployment name to use for completions when making requests to Azure OpenAI. + * This is the name of the deployment you created in the Azure portal. + * e.g. "my-openai-deployment" + * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ + */ + azureOpenAIApiDeploymentName?: string; + + /** + * Azure OpenAI API deployment name to use for embedding when making requests to Azure OpenAI. + * This is the name of the deployment you created in the Azure portal. + * This will fallback to azureOpenAIApiDeploymentName if not provided. + * e.g. "my-openai-deployment" + * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ + */ + azureOpenAIApiEmbeddingsDeploymentName?: string; + + /** + * Azure OpenAI API deployment name to use for completions when making requests to Azure OpenAI. + * Completions are only available for gpt-3.5-turbo and text-davinci-003 deployments. + * This is the name of the deployment you created in the Azure portal. + * This will fallback to azureOpenAIApiDeploymentName if not provided. + * e.g. "my-openai-deployment" + * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ + */ + azureOpenAIApiCompletionsDeploymentName?: string; + + /** + * Custom endpoint for Azure OpenAI API. This is useful in case you have a deployment in another region. + * e.g. setting this value to "https://westeurope.api.cognitive.microsoft.com/openai/deployments" + * will be result in the endpoint URL: https://westeurope.api.cognitive.microsoft.com/openai/deployments/{DeploymentName}/ + */ + azureOpenAIBasePath?: string; +} diff --git a/libs/langchain-community/src/types/pdf-parse.d.ts b/libs/langchain-community/src/types/pdf-parse.d.ts new file mode 100644 index 000000000000..675c403a559c --- /dev/null +++ b/libs/langchain-community/src/types/pdf-parse.d.ts @@ -0,0 +1,1409 @@ +/** + * Type definitions adapted from pdfjs-dist + * https://github.com/mozilla/pdfjs-dist/blob/master/types/src/display/api.d.ts + */ + +declare module "pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js" { + export type TypedArray = + | Int8Array + | Uint8Array + | Uint8ClampedArray + | Int16Array + | Uint16Array + | Int32Array + | Uint32Array + | Float32Array + | Float64Array; + export type BinaryData = TypedArray | ArrayBuffer | Array | string; + export type RefProxy = { + num: number; + gen: number; + }; + /** + * Document initialization / loading parameters object. + */ + export type DocumentInitParameters = { + /** + * - The URL of the PDF. + */ + url?: string | URL | undefined; + /** + * - Binary PDF data. + * Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is + * BASE64-encoded, use `atob()` to convert it to a binary string first. + * + * NOTE: If TypedArrays are used they will generally be transferred to the + * worker-thread. This will help reduce main-thread memory usage, however + * it will take ownership of the TypedArrays. + */ + data?: BinaryData | undefined; + /** + * - Basic authentication headers. + */ + httpHeaders?: Object | undefined; + /** + * - Indicates whether or not + * cross-site Access-Control requests should be made using credentials such + * as cookies or authorization headers. The default is `false`. + */ + withCredentials?: boolean | undefined; + /** + * - For decrypting password-protected PDFs. + */ + password?: string | undefined; + /** + * - The PDF file length. It's used for progress + * reports and range requests operations. + */ + length?: number | undefined; + /** + * - Allows for using a custom range + * transport implementation. + */ + range?: PDFDataRangeTransport | undefined; + /** + * - Specify maximum number of bytes fetched + * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE }. + */ + rangeChunkSize?: number | undefined; + /** + * - The worker that will be used for loading and + * parsing the PDF data. + */ + worker?: PDFWorker | undefined; + /** + * - Controls the logging level; the constants + * from {@link VerbosityLevel } should be used. + */ + verbosity?: number | undefined; + /** + * - The base URL of the document, used when + * attempting to recover valid absolute URLs for annotations, and outline + * items, that (incorrectly) only specify relative URLs. + */ + docBaseUrl?: string | undefined; + /** + * - The URL where the predefined Adobe CMaps are + * located. Include the trailing slash. + */ + cMapUrl?: string | undefined; + /** + * - Specifies if the Adobe CMaps are binary + * packed or not. The default value is `true`. + */ + cMapPacked?: boolean | undefined; + /** + * - The factory that will be used when + * reading built-in CMap files. Providing a custom factory is useful for + * environments without Fetch API or `XMLHttpRequest` support, such as + * Node.js. The default value is {DOMCMapReaderFactory}. + */ + CMapReaderFactory?: Object | undefined; + /** + * - When `true`, fonts that aren't + * embedded in the PDF document will fallback to a system font. + * The default value is `true` in web environments and `false` in Node.js; + * unless `disableFontFace === true` in which case this defaults to `false` + * regardless of the environment (to prevent completely broken fonts). + */ + useSystemFonts?: boolean | undefined; + /** + * - The URL where the standard font + * files are located. Include the trailing slash. + */ + standardFontDataUrl?: string | undefined; + /** + * - The factory that will be used + * when reading the standard font files. Providing a custom factory is useful + * for environments without Fetch API or `XMLHttpRequest` support, such as + * Node.js. The default value is {DOMStandardFontDataFactory}. + */ + StandardFontDataFactory?: Object | undefined; + /** + * - Enable using the Fetch API in the + * worker-thread when reading CMap and standard font files. When `true`, + * the `CMapReaderFactory` and `StandardFontDataFactory` options are ignored. + * The default value is `true` in web environments and `false` in Node.js. + */ + useWorkerFetch?: boolean | undefined; + /** + * - Reject certain promises, e.g. + * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated + * PDF data cannot be successfully parsed, instead of attempting to recover + * whatever possible of the data. The default value is `false`. + */ + stopAtErrors?: boolean | undefined; + /** + * - The maximum allowed image size in total + * pixels, i.e. width * height. Images above this value will not be rendered. + * Use -1 for no limit, which is also the default value. + */ + maxImageSize?: number | undefined; + /** + * - Determines if we can evaluate strings + * as JavaScript. Primarily used to improve performance of font rendering, and + * when parsing PDF functions. The default value is `true`. + */ + isEvalSupported?: boolean | undefined; + /** + * - Determines if we can use + * `OffscreenCanvas` in the worker. Primarily used to improve performance of + * image conversion/rendering. + * The default value is `true` in web environments and `false` in Node.js. + */ + isOffscreenCanvasSupported?: boolean | undefined; + /** + * - The integer value is used to + * know when an image must be resized (uses `OffscreenCanvas` in the worker). + * If it's -1 then a possibly slow algorithm is used to guess the max value. + */ + canvasMaxAreaInBytes?: boolean | undefined; + /** + * - By default fonts are converted to + * OpenType fonts and loaded via the Font Loading API or `@font-face` rules. + * If disabled, fonts will be rendered using a built-in font renderer that + * constructs the glyphs with primitive path commands. + * The default value is `false` in web environments and `true` in Node.js. + */ + disableFontFace?: boolean | undefined; + /** + * - Include additional properties, + * which are unused during rendering of PDF documents, when exporting the + * parsed font data from the worker-thread. This may be useful for debugging + * purposes (and backwards compatibility), but note that it will lead to + * increased memory usage. The default value is `false`. + */ + fontExtraProperties?: boolean | undefined; + /** + * - Render Xfa forms if any. + * The default value is `false`. + */ + enableXfa?: boolean | undefined; + /** + * - Specify an explicit document + * context to create elements with and to load resources, such as fonts, + * into. Defaults to the current document. + */ + ownerDocument?: HTMLDocument | undefined; + /** + * - Disable range request loading of PDF + * files. When enabled, and if the server supports partial content requests, + * then the PDF will be fetched in chunks. The default value is `false`. + */ + disableRange?: boolean | undefined; + /** + * - Disable streaming of PDF file data. + * By default PDF.js attempts to load PDF files in chunks. The default value + * is `false`. + */ + disableStream?: boolean | undefined; + /** + * - Disable pre-fetching of PDF file + * data. When range requests are enabled PDF.js will automatically keep + * fetching more data even if it isn't needed to display the current page. + * The default value is `false`. + * + * NOTE: It is also necessary to disable streaming, see above, in order for + * disabling of pre-fetching to work correctly. + */ + disableAutoFetch?: boolean | undefined; + /** + * - Enables special hooks for debugging PDF.js + * (see `web/debugger.js`). The default value is `false`. + */ + pdfBug?: boolean | undefined; + /** + * - The factory instance that will be used + * when creating canvases. The default value is {new DOMCanvasFactory()}. + */ + canvasFactory?: Object | undefined; + /** + * - A factory instance that will be used + * to create SVG filters when rendering some images on the main canvas. + */ + filterFactory?: Object | undefined; + }; + export type OnProgressParameters = { + /** + * - Currently loaded number of bytes. + */ + loaded: number; + /** + * - Total number of bytes in the PDF file. + */ + total: number; + }; + /** + * Page getViewport parameters. + */ + export type GetViewportParameters = { + /** + * - The desired scale of the viewport. + */ + scale: number; + /** + * - The desired rotation, in degrees, of + * the viewport. If omitted it defaults to the page rotation. + */ + rotation?: number | undefined; + /** + * - The horizontal, i.e. x-axis, offset. + * The default value is `0`. + */ + offsetX?: number | undefined; + /** + * - The vertical, i.e. y-axis, offset. + * The default value is `0`. + */ + offsetY?: number | undefined; + /** + * - If true, the y-axis will not be + * flipped. The default value is `false`. + */ + dontFlip?: boolean | undefined; + }; + /** + * Page getTextContent parameters. + */ + export type getTextContentParameters = { + /** + * - When true include marked + * content items in the items array of TextContent. The default is `false`. + */ + includeMarkedContent?: boolean | undefined; + }; + /** + * Page text content. + */ + export type TextContent = { + /** + * - Array of + * {@link TextItem } and {@link TextMarkedContent } objects. TextMarkedContent + * items are included when includeMarkedContent is true. + */ + items: Array; + /** + * - {@link TextStyle } objects, + * indexed by font name. + */ + styles: { + [x: string]: TextStyle; + }; + }; + /** + * Page text content part. + */ + export type TextItem = { + /** + * - Text content. + */ + str: string; + /** + * - Text direction: 'ttb', 'ltr' or 'rtl'. + */ + dir: string; + /** + * - Transformation matrix. + */ + transform: Array; + /** + * - Width in device space. + */ + width: number; + /** + * - Height in device space. + */ + height: number; + /** + * - Font name used by PDF.js for converted font. + */ + fontName: string; + /** + * - Indicating if the text content is followed by a + * line-break. + */ + hasEOL: boolean; + }; + /** + * Page text marked content part. + */ + export type TextMarkedContent = { + /** + * - Either 'beginMarkedContent', + * 'beginMarkedContentProps', or 'endMarkedContent'. + */ + type: string; + /** + * - The marked content identifier. Only used for type + * 'beginMarkedContentProps'. + */ + id: string; + }; + /** + * Text style. + */ + export type TextStyle = { + /** + * - Font ascent. + */ + ascent: number; + /** + * - Font descent. + */ + descent: number; + /** + * - Whether or not the text is in vertical mode. + */ + vertical: boolean; + /** + * - The possible font family. + */ + fontFamily: string; + }; + /** + * Page annotation parameters. + */ + export type GetAnnotationsParameters = { + /** + * - Determines the annotations that are fetched, + * can be 'display' (viewable annotations), 'print' (printable annotations), + * or 'any' (all annotations). The default value is 'display'. + */ + intent?: string | undefined; + }; + /** + * Page render parameters. + */ + export type RenderParameters = { + /** + * - A 2D context of a DOM + * Canvas object. + */ + canvasContext: CanvasRenderingContext2D; + /** + * - Rendering viewport obtained by calling + * the `PDFPageProxy.getViewport` method. + */ + viewport: PageViewport; + /** + * - Rendering intent, can be 'display', 'print', + * or 'any'. The default value is 'display'. + */ + intent?: string | undefined; + /** + * Controls which annotations are rendered + * onto the canvas, for annotations with appearance-data; the values from + * {@link AnnotationMode } should be used. The following values are supported: + * - `AnnotationMode.DISABLE`, which disables all annotations. + * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus + * it also depends on the `intent`-option, see above). + * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain + * interactive form elements (those will be rendered in the display layer). + * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations + * (as above) but where interactive form elements are updated with data + * from the {@link AnnotationStorage }-instance; useful e.g. for printing. + * The default value is `AnnotationMode.ENABLE`. + */ + annotationMode?: number | undefined; + /** + * - Additional transform, applied just + * before viewport transform. + */ + transform?: any[] | undefined; + /** + * - Background + * to use for the canvas. + * Any valid `canvas.fillStyle` can be used: a `DOMString` parsed as CSS + * value, a `CanvasGradient` object (a linear or radial gradient) or + * a `CanvasPattern` object (a repetitive image). The default value is + * 'rgb(255,255,255)'. + * + * NOTE: This option may be partially, or completely, ignored when the + * `pageColors`-option is used. + */ + background?: string | CanvasGradient | CanvasPattern | undefined; + /** + * - Overwrites background and foreground colors + * with user defined ones in order to improve readability in high contrast + * mode. + */ + pageColors?: Object | undefined; + /** + * - + * A promise that should resolve with an {@link OptionalContentConfig }created from `PDFDocumentProxy.getOptionalContentConfig`. If `null`, + * the configuration will be fetched automatically with the default visibility + * states set. + */ + optionalContentConfigPromise?: Promise | undefined; + /** + * - Map some + * annotation ids with canvases used to render them. + */ + annotationCanvasMap?: Map | undefined; + printAnnotationStorage?: PrintAnnotationStorage | undefined; + }; + /** + * Page getOperatorList parameters. + */ + export type GetOperatorListParameters = { + /** + * - Rendering intent, can be 'display', 'print', + * or 'any'. The default value is 'display'. + */ + intent?: string | undefined; + /** + * Controls which annotations are included + * in the operatorList, for annotations with appearance-data; the values from + * {@link AnnotationMode } should be used. The following values are supported: + * - `AnnotationMode.DISABLE`, which disables all annotations. + * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus + * it also depends on the `intent`-option, see above). + * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain + * interactive form elements (those will be rendered in the display layer). + * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations + * (as above) but where interactive form elements are updated with data + * from the {@link AnnotationStorage }-instance; useful e.g. for printing. + * The default value is `AnnotationMode.ENABLE`. + */ + annotationMode?: number | undefined; + printAnnotationStorage?: PrintAnnotationStorage | undefined; + }; + /** + * Structure tree node. The root node will have a role "Root". + */ + export type StructTreeNode = { + /** + * - Array of + * {@link StructTreeNode } and {@link StructTreeContent } objects. + */ + children: Array; + /** + * - element's role, already mapped if a role map exists + * in the PDF. + */ + role: string; + }; + /** + * Structure tree content. + */ + export type StructTreeContent = { + /** + * - either "content" for page and stream structure + * elements or "object" for object references. + */ + type: string; + /** + * - unique id that will map to the text layer. + */ + id: string; + }; + /** + * PDF page operator list. + */ + export type PDFOperatorList = { + /** + * - Array containing the operator functions. + */ + fnArray: Array; + /** + * - Array containing the arguments of the + * functions. + */ + argsArray: Array; + }; + export type PDFWorkerParameters = { + /** + * - The name of the worker. + */ + name?: string | undefined; + /** + * - The `workerPort` object. + */ + port?: Worker | undefined; + /** + * - Controls the logging level; + * the constants from {@link VerbosityLevel } should be used. + */ + verbosity?: number | undefined; + }; + /** @type {string} */ + export const build: string; + export let DefaultCanvasFactory: typeof DOMCanvasFactory; + export let DefaultCMapReaderFactory: typeof DOMCMapReaderFactory; + export let DefaultFilterFactory: typeof DOMFilterFactory; + export let DefaultStandardFontDataFactory: typeof DOMStandardFontDataFactory; + /** + * @typedef { Int8Array | Uint8Array | Uint8ClampedArray | + * Int16Array | Uint16Array | + * Int32Array | Uint32Array | Float32Array | + * Float64Array + * } TypedArray + */ + /** + * @typedef { TypedArray | ArrayBuffer | Array | string } BinaryData + */ + /** + * @typedef {Object} RefProxy + * @property {number} num + * @property {number} gen + */ + /** + * Document initialization / loading parameters object. + * + * @typedef {Object} DocumentInitParameters + * @property {string | URL} [url] - The URL of the PDF. + * @property {BinaryData} [data] - Binary PDF data. + * Use TypedArrays (Uint8Array) to improve the memory usage. If PDF data is + * BASE64-encoded, use `atob()` to convert it to a binary string first. + * + * NOTE: If TypedArrays are used they will generally be transferred to the + * worker-thread. This will help reduce main-thread memory usage, however + * it will take ownership of the TypedArrays. + * @property {Object} [httpHeaders] - Basic authentication headers. + * @property {boolean} [withCredentials] - Indicates whether or not + * cross-site Access-Control requests should be made using credentials such + * as cookies or authorization headers. The default is `false`. + * @property {string} [password] - For decrypting password-protected PDFs. + * @property {number} [length] - The PDF file length. It's used for progress + * reports and range requests operations. + * @property {PDFDataRangeTransport} [range] - Allows for using a custom range + * transport implementation. + * @property {number} [rangeChunkSize] - Specify maximum number of bytes fetched + * per range request. The default value is {@link DEFAULT_RANGE_CHUNK_SIZE}. + * @property {PDFWorker} [worker] - The worker that will be used for loading and + * parsing the PDF data. + * @property {number} [verbosity] - Controls the logging level; the constants + * from {@link VerbosityLevel} should be used. + * @property {string} [docBaseUrl] - The base URL of the document, used when + * attempting to recover valid absolute URLs for annotations, and outline + * items, that (incorrectly) only specify relative URLs. + * @property {string} [cMapUrl] - The URL where the predefined Adobe CMaps are + * located. Include the trailing slash. + * @property {boolean} [cMapPacked] - Specifies if the Adobe CMaps are binary + * packed or not. The default value is `true`. + * @property {Object} [CMapReaderFactory] - The factory that will be used when + * reading built-in CMap files. Providing a custom factory is useful for + * environments without Fetch API or `XMLHttpRequest` support, such as + * Node.js. The default value is {DOMCMapReaderFactory}. + * @property {boolean} [useSystemFonts] - When `true`, fonts that aren't + * embedded in the PDF document will fallback to a system font. + * The default value is `true` in web environments and `false` in Node.js; + * unless `disableFontFace === true` in which case this defaults to `false` + * regardless of the environment (to prevent completely broken fonts). + * @property {string} [standardFontDataUrl] - The URL where the standard font + * files are located. Include the trailing slash. + * @property {Object} [StandardFontDataFactory] - The factory that will be used + * when reading the standard font files. Providing a custom factory is useful + * for environments without Fetch API or `XMLHttpRequest` support, such as + * Node.js. The default value is {DOMStandardFontDataFactory}. + * @property {boolean} [useWorkerFetch] - Enable using the Fetch API in the + * worker-thread when reading CMap and standard font files. When `true`, + * the `CMapReaderFactory` and `StandardFontDataFactory` options are ignored. + * The default value is `true` in web environments and `false` in Node.js. + * @property {boolean} [stopAtErrors] - Reject certain promises, e.g. + * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated + * PDF data cannot be successfully parsed, instead of attempting to recover + * whatever possible of the data. The default value is `false`. + * @property {number} [maxImageSize] - The maximum allowed image size in total + * pixels, i.e. width * height. Images above this value will not be rendered. + * Use -1 for no limit, which is also the default value. + * @property {boolean} [isEvalSupported] - Determines if we can evaluate strings + * as JavaScript. Primarily used to improve performance of font rendering, and + * when parsing PDF functions. The default value is `true`. + * @property {boolean} [isOffscreenCanvasSupported] - Determines if we can use + * `OffscreenCanvas` in the worker. Primarily used to improve performance of + * image conversion/rendering. + * The default value is `true` in web environments and `false` in Node.js. + * @property {boolean} [canvasMaxAreaInBytes] - The integer value is used to + * know when an image must be resized (uses `OffscreenCanvas` in the worker). + * If it's -1 then a possibly slow algorithm is used to guess the max value. + * @property {boolean} [disableFontFace] - By default fonts are converted to + * OpenType fonts and loaded via the Font Loading API or `@font-face` rules. + * If disabled, fonts will be rendered using a built-in font renderer that + * constructs the glyphs with primitive path commands. + * The default value is `false` in web environments and `true` in Node.js. + * @property {boolean} [fontExtraProperties] - Include additional properties, + * which are unused during rendering of PDF documents, when exporting the + * parsed font data from the worker-thread. This may be useful for debugging + * purposes (and backwards compatibility), but note that it will lead to + * increased memory usage. The default value is `false`. + * @property {boolean} [enableXfa] - Render Xfa forms if any. + * The default value is `false`. + * @property {HTMLDocument} [ownerDocument] - Specify an explicit document + * context to create elements with and to load resources, such as fonts, + * into. Defaults to the current document. + * @property {boolean} [disableRange] - Disable range request loading of PDF + * files. When enabled, and if the server supports partial content requests, + * then the PDF will be fetched in chunks. The default value is `false`. + * @property {boolean} [disableStream] - Disable streaming of PDF file data. + * By default PDF.js attempts to load PDF files in chunks. The default value + * is `false`. + * @property {boolean} [disableAutoFetch] - Disable pre-fetching of PDF file + * data. When range requests are enabled PDF.js will automatically keep + * fetching more data even if it isn't needed to display the current page. + * The default value is `false`. + * + * NOTE: It is also necessary to disable streaming, see above, in order for + * disabling of pre-fetching to work correctly. + * @property {boolean} [pdfBug] - Enables special hooks for debugging PDF.js + * (see `web/debugger.js`). The default value is `false`. + * @property {Object} [canvasFactory] - The factory instance that will be used + * when creating canvases. The default value is {new DOMCanvasFactory()}. + * @property {Object} [filterFactory] - A factory instance that will be used + * to create SVG filters when rendering some images on the main canvas. + */ + /** + * This is the main entry point for loading a PDF and interacting with it. + * + * NOTE: If a URL is used to fetch the PDF data a standard Fetch API call (or + * XHR as fallback) is used, which means it must follow same origin rules, + * e.g. no cross-domain requests without CORS. + * + * @param {string | URL | TypedArray | ArrayBuffer | DocumentInitParameters} + * src - Can be a URL where a PDF file is located, a typed array (Uint8Array) + * already populated with data, or a parameter object. + * @returns {PDFDocumentLoadingTask} + */ + export function getDocument( + src: string | URL | TypedArray | ArrayBuffer | DocumentInitParameters + ): PDFDocumentLoadingTask; + export class LoopbackPort { + postMessage(obj: any, transfer: any): void; + addEventListener(name: any, listener: any): void; + removeEventListener(name: any, listener: any): void; + terminate(): void; + #private; + } + /** + * @typedef {Object} OnProgressParameters + * @property {number} loaded - Currently loaded number of bytes. + * @property {number} total - Total number of bytes in the PDF file. + */ + /** + * The loading task controls the operations required to load a PDF document + * (such as network requests) and provides a way to listen for completion, + * after which individual pages can be rendered. + */ + export class PDFDocumentLoadingTask { + static "__#16@#docId": number; + _capability: import("../shared/util.js").PromiseCapability; + _transport: any; + _worker: any; + /** + * Unique identifier for the document loading task. + * @type {string} + */ + docId: string; + /** + * Whether the loading task is destroyed or not. + * @type {boolean} + */ + destroyed: boolean; + /** + * Callback to request a password if a wrong or no password was provided. + * The callback receives two parameters: a function that should be called + * with the new password, and a reason (see {@link PasswordResponses}). + * @type {function} + */ + onPassword: Function; + /** + * Callback to be able to monitor the loading progress of the PDF file + * (necessary to implement e.g. a loading bar). + * The callback receives an {@link OnProgressParameters} argument. + * @type {function} + */ + onProgress: Function; + /** + * Promise for document loading task completion. + * @type {Promise} + */ + get promise(): Promise; + /** + * Abort all network requests and destroy the worker. + * @returns {Promise} A promise that is resolved when destruction is + * completed. + */ + destroy(): Promise; + } + /** + * Proxy to a `PDFDocument` in the worker thread. + */ + export class PDFDocumentProxy { + constructor(pdfInfo: any, transport: any); + _pdfInfo: any; + _transport: any; + /** + * @type {AnnotationStorage} Storage for annotation data in forms. + */ + get annotationStorage(): AnnotationStorage; + /** + * @type {Object} The filter factory instance. + */ + get filterFactory(): Object; + /** + * @type {number} Total number of pages in the PDF file. + */ + get numPages(): number; + /** + * @type {Array} A (not guaranteed to be) unique ID to + * identify the PDF document. + * NOTE: The first element will always be defined for all PDF documents, + * whereas the second element is only defined for *modified* PDF documents. + */ + get fingerprints(): string[]; + /** + * @type {boolean} True if only XFA form. + */ + get isPureXfa(): boolean; + /** + * NOTE: This is (mostly) intended to support printing of XFA forms. + * + * @type {Object | null} An object representing a HTML tree structure + * to render the XFA, or `null` when no XFA form exists. + */ + get allXfaHtml(): Object | null; + /** + * @param {number} pageNumber - The page number to get. The first page is 1. + * @returns {Promise} A promise that is resolved with + * a {@link PDFPageProxy} object. + */ + getPage(pageNumber: number): Promise; + /** + * @param {RefProxy} ref - The page reference. + * @returns {Promise} A promise that is resolved with the page index, + * starting from zero, that is associated with the reference. + */ + getPageIndex(ref: RefProxy): Promise; + /** + * @returns {Promise>>} A promise that is resolved + * with a mapping from named destinations to references. + * + * This can be slow for large documents. Use `getDestination` instead. + */ + getDestinations(): Promise<{ + [x: string]: Array; + }>; + /** + * @param {string} id - The named destination to get. + * @returns {Promise | null>} A promise that is resolved with all + * information of the given named destination, or `null` when the named + * destination is not present in the PDF file. + */ + getDestination(id: string): Promise | null>; + /** + * @returns {Promise | null>} A promise that is resolved with + * an {Array} containing the page labels that correspond to the page + * indexes, or `null` when no page labels are present in the PDF file. + */ + getPageLabels(): Promise | null>; + /** + * @returns {Promise} A promise that is resolved with a {string} + * containing the page layout name. + */ + getPageLayout(): Promise; + /** + * @returns {Promise} A promise that is resolved with a {string} + * containing the page mode name. + */ + getPageMode(): Promise; + /** + * @returns {Promise} A promise that is resolved with an + * {Object} containing the viewer preferences, or `null` when no viewer + * preferences are present in the PDF file. + */ + getViewerPreferences(): Promise; + /** + * @returns {Promise} A promise that is resolved with an {Array} + * containing the destination, or `null` when no open action is present + * in the PDF. + */ + getOpenAction(): Promise; + /** + * @returns {Promise} A promise that is resolved with a lookup table + * for mapping named attachments to their content. + */ + getAttachments(): Promise; + /** + * @returns {Promise | null>} A promise that is resolved with + * an {Array} of all the JavaScript strings in the name tree, or `null` + * if no JavaScript exists. + */ + getJavaScript(): Promise | null>; + /** + * @returns {Promise} A promise that is resolved with + * an {Object} with the JavaScript actions: + * - from the name tree (like getJavaScript); + * - from A or AA entries in the catalog dictionary. + * , or `null` if no JavaScript exists. + */ + getJSActions(): Promise; + /** + * @typedef {Object} OutlineNode + * @property {string} title + * @property {boolean} bold + * @property {boolean} italic + * @property {Uint8ClampedArray} color - The color in RGB format to use for + * display purposes. + * @property {string | Array | null} dest + * @property {string | null} url + * @property {string | undefined} unsafeUrl + * @property {boolean | undefined} newWindow + * @property {number | undefined} count + * @property {Array} items + */ + /** + * @returns {Promise>} A promise that is resolved with an + * {Array} that is a tree outline (if it has one) of the PDF file. + */ + getOutline(): Promise< + { + title: string; + bold: boolean; + italic: boolean; + /** + * - The color in RGB format to use for + * display purposes. + */ + color: Uint8ClampedArray; + dest: string | Array | null; + url: string | null; + unsafeUrl: string | undefined; + newWindow: boolean | undefined; + count: number | undefined; + items: any[]; + }[] + >; + /** + * @returns {Promise} A promise that is resolved with + * an {@link OptionalContentConfig} that contains all the optional content + * groups (assuming that the document has any). + */ + getOptionalContentConfig(): Promise; + /** + * @returns {Promise | null>} A promise that is resolved with + * an {Array} that contains the permission flags for the PDF document, or + * `null` when no permissions are present in the PDF file. + */ + getPermissions(): Promise | null>; + /** + * @returns {Promise<{ info: Object, metadata: Metadata }>} A promise that is + * resolved with an {Object} that has `info` and `metadata` properties. + * `info` is an {Object} filled with anything available in the information + * dictionary and similarly `metadata` is a {Metadata} object with + * information from the metadata section of the PDF. + */ + getMetadata(): Promise<{ + info: Object; + metadata: Metadata; + }>; + /** + * @typedef {Object} MarkInfo + * Properties correspond to Table 321 of the PDF 32000-1:2008 spec. + * @property {boolean} Marked + * @property {boolean} UserProperties + * @property {boolean} Suspects + */ + /** + * @returns {Promise} A promise that is resolved with + * a {MarkInfo} object that contains the MarkInfo flags for the PDF + * document, or `null` when no MarkInfo values are present in the PDF file. + */ + getMarkInfo(): Promise<{ + Marked: boolean; + UserProperties: boolean; + Suspects: boolean; + } | null>; + /** + * @returns {Promise} A promise that is resolved with a + * {Uint8Array} containing the raw data of the PDF document. + */ + getData(): Promise; + /** + * @returns {Promise} A promise that is resolved with a + * {Uint8Array} containing the full data of the saved document. + */ + saveDocument(): Promise; + /** + * @returns {Promise<{ length: number }>} A promise that is resolved when the + * document's data is loaded. It is resolved with an {Object} that contains + * the `length` property that indicates size of the PDF data in bytes. + */ + getDownloadInfo(): Promise<{ + length: number; + }>; + /** + * Cleans up resources allocated by the document on both the main and worker + * threads. + * + * NOTE: Do not, under any circumstances, call this method when rendering is + * currently ongoing since that may lead to rendering errors. + * + * @param {boolean} [keepLoadedFonts] - Let fonts remain attached to the DOM. + * NOTE: This will increase persistent memory usage, hence don't use this + * option unless absolutely necessary. The default value is `false`. + * @returns {Promise} A promise that is resolved when clean-up has finished. + */ + cleanup(keepLoadedFonts?: boolean | undefined): Promise; + /** + * Destroys the current document instance and terminates the worker. + */ + destroy(): Promise; + /** + * @type {DocumentInitParameters} A subset of the current + * {DocumentInitParameters}, which are needed in the viewer. + */ + get loadingParams(): DocumentInitParameters; + /** + * @type {PDFDocumentLoadingTask} The loadingTask for the current document. + */ + get loadingTask(): PDFDocumentLoadingTask; + /** + * @returns {Promise> | null>} A promise that is + * resolved with an {Object} containing /AcroForm field data for the JS + * sandbox, or `null` when no field data is present in the PDF file. + */ + getFieldObjects(): Promise<{ + [x: string]: Array; + } | null>; + /** + * @returns {Promise} A promise that is resolved with `true` + * if some /AcroForm fields have JavaScript actions. + */ + hasJSActions(): Promise; + /** + * @returns {Promise | null>} A promise that is resolved with an + * {Array} containing IDs of annotations that have a calculation + * action, or `null` when no such annotations are present in the PDF file. + */ + getCalculationOrderIds(): Promise | null>; + } + /** + * Page getViewport parameters. + * + * @typedef {Object} GetViewportParameters + * @property {number} scale - The desired scale of the viewport. + * @property {number} [rotation] - The desired rotation, in degrees, of + * the viewport. If omitted it defaults to the page rotation. + * @property {number} [offsetX] - The horizontal, i.e. x-axis, offset. + * The default value is `0`. + * @property {number} [offsetY] - The vertical, i.e. y-axis, offset. + * The default value is `0`. + * @property {boolean} [dontFlip] - If true, the y-axis will not be + * flipped. The default value is `false`. + */ + /** + * Page getTextContent parameters. + * + * @typedef {Object} getTextContentParameters + * @property {boolean} [includeMarkedContent] - When true include marked + * content items in the items array of TextContent. The default is `false`. + */ + /** + * Page text content. + * + * @typedef {Object} TextContent + * @property {Array} items - Array of + * {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent + * items are included when includeMarkedContent is true. + * @property {Object} styles - {@link TextStyle} objects, + * indexed by font name. + */ + /** + * Page text content part. + * + * @typedef {Object} TextItem + * @property {string} str - Text content. + * @property {string} dir - Text direction: 'ttb', 'ltr' or 'rtl'. + * @property {Array} transform - Transformation matrix. + * @property {number} width - Width in device space. + * @property {number} height - Height in device space. + * @property {string} fontName - Font name used by PDF.js for converted font. + * @property {boolean} hasEOL - Indicating if the text content is followed by a + * line-break. + */ + /** + * Page text marked content part. + * + * @typedef {Object} TextMarkedContent + * @property {string} type - Either 'beginMarkedContent', + * 'beginMarkedContentProps', or 'endMarkedContent'. + * @property {string} id - The marked content identifier. Only used for type + * 'beginMarkedContentProps'. + */ + /** + * Text style. + * + * @typedef {Object} TextStyle + * @property {number} ascent - Font ascent. + * @property {number} descent - Font descent. + * @property {boolean} vertical - Whether or not the text is in vertical mode. + * @property {string} fontFamily - The possible font family. + */ + /** + * Page annotation parameters. + * + * @typedef {Object} GetAnnotationsParameters + * @property {string} [intent] - Determines the annotations that are fetched, + * can be 'display' (viewable annotations), 'print' (printable annotations), + * or 'any' (all annotations). The default value is 'display'. + */ + /** + * Page render parameters. + * + * @typedef {Object} RenderParameters + * @property {CanvasRenderingContext2D} canvasContext - A 2D context of a DOM + * Canvas object. + * @property {PageViewport} viewport - Rendering viewport obtained by calling + * the `PDFPageProxy.getViewport` method. + * @property {string} [intent] - Rendering intent, can be 'display', 'print', + * or 'any'. The default value is 'display'. + * @property {number} [annotationMode] Controls which annotations are rendered + * onto the canvas, for annotations with appearance-data; the values from + * {@link AnnotationMode} should be used. The following values are supported: + * - `AnnotationMode.DISABLE`, which disables all annotations. + * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus + * it also depends on the `intent`-option, see above). + * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain + * interactive form elements (those will be rendered in the display layer). + * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations + * (as above) but where interactive form elements are updated with data + * from the {@link AnnotationStorage}-instance; useful e.g. for printing. + * The default value is `AnnotationMode.ENABLE`. + * @property {Array} [transform] - Additional transform, applied just + * before viewport transform. + * @property {CanvasGradient | CanvasPattern | string} [background] - Background + * to use for the canvas. + * Any valid `canvas.fillStyle` can be used: a `DOMString` parsed as CSS + * value, a `CanvasGradient` object (a linear or radial gradient) or + * a `CanvasPattern` object (a repetitive image). The default value is + * 'rgb(255,255,255)'. + * + * NOTE: This option may be partially, or completely, ignored when the + * `pageColors`-option is used. + * @property {Object} [pageColors] - Overwrites background and foreground colors + * with user defined ones in order to improve readability in high contrast + * mode. + * @property {Promise} [optionalContentConfigPromise] - + * A promise that should resolve with an {@link OptionalContentConfig} + * created from `PDFDocumentProxy.getOptionalContentConfig`. If `null`, + * the configuration will be fetched automatically with the default visibility + * states set. + * @property {Map} [annotationCanvasMap] - Map some + * annotation ids with canvases used to render them. + * @property {PrintAnnotationStorage} [printAnnotationStorage] + */ + /** + * Page getOperatorList parameters. + * + * @typedef {Object} GetOperatorListParameters + * @property {string} [intent] - Rendering intent, can be 'display', 'print', + * or 'any'. The default value is 'display'. + * @property {number} [annotationMode] Controls which annotations are included + * in the operatorList, for annotations with appearance-data; the values from + * {@link AnnotationMode} should be used. The following values are supported: + * - `AnnotationMode.DISABLE`, which disables all annotations. + * - `AnnotationMode.ENABLE`, which includes all possible annotations (thus + * it also depends on the `intent`-option, see above). + * - `AnnotationMode.ENABLE_FORMS`, which excludes annotations that contain + * interactive form elements (those will be rendered in the display layer). + * - `AnnotationMode.ENABLE_STORAGE`, which includes all possible annotations + * (as above) but where interactive form elements are updated with data + * from the {@link AnnotationStorage}-instance; useful e.g. for printing. + * The default value is `AnnotationMode.ENABLE`. + * @property {PrintAnnotationStorage} [printAnnotationStorage] + */ + /** + * Structure tree node. The root node will have a role "Root". + * + * @typedef {Object} StructTreeNode + * @property {Array} children - Array of + * {@link StructTreeNode} and {@link StructTreeContent} objects. + * @property {string} role - element's role, already mapped if a role map exists + * in the PDF. + */ + /** + * Structure tree content. + * + * @typedef {Object} StructTreeContent + * @property {string} type - either "content" for page and stream structure + * elements or "object" for object references. + * @property {string} id - unique id that will map to the text layer. + */ + /** + * PDF page operator list. + * + * @typedef {Object} PDFOperatorList + * @property {Array} fnArray - Array containing the operator functions. + * @property {Array} argsArray - Array containing the arguments of the + * functions. + */ + /** + * Proxy to a `PDFPage` in the worker thread. + */ + export class PDFPageProxy { + constructor( + pageIndex: any, + pageInfo: any, + transport: any, + pdfBug?: boolean + ); + _pageIndex: any; + _pageInfo: any; + _transport: any; + _stats: StatTimer | null; + _pdfBug: boolean; + /** @type {PDFObjects} */ + commonObjs: PDFObjects; + objs: PDFObjects; + _maybeCleanupAfterRender: boolean; + _intentStates: Map; + destroyed: boolean; + /** + * @type {number} Page number of the page. First page is 1. + */ + get pageNumber(): number; + /** + * @type {number} The number of degrees the page is rotated clockwise. + */ + get rotate(): number; + /** + * @type {RefProxy | null} The reference that points to this page. + */ + get ref(): RefProxy | null; + /** + * @type {number} The default size of units in 1/72nds of an inch. + */ + get userUnit(): number; + /** + * @type {Array} An array of the visible portion of the PDF page in + * user space units [x1, y1, x2, y2]. + */ + get view(): number[]; + /** + * @param {GetViewportParameters} params - Viewport parameters. + * @returns {PageViewport} Contains 'width' and 'height' properties + * along with transforms required for rendering. + */ + getViewport({ + scale, + rotation, + offsetX, + offsetY, + dontFlip, + }?: GetViewportParameters): PageViewport; + /** + * @param {GetAnnotationsParameters} params - Annotation parameters. + * @returns {Promise>} A promise that is resolved with an + * {Array} of the annotation objects. + */ + getAnnotations({ intent }?: GetAnnotationsParameters): Promise>; + /** + * @returns {Promise} A promise that is resolved with an + * {Object} with JS actions. + */ + getJSActions(): Promise; + /** + * @type {boolean} True if only XFA form. + */ + get isPureXfa(): boolean; + /** + * @returns {Promise} A promise that is resolved with + * an {Object} with a fake DOM object (a tree structure where elements + * are {Object} with a name, attributes (class, style, ...), value and + * children, very similar to a HTML DOM tree), or `null` if no XFA exists. + */ + getXfa(): Promise; + /** + * Begins the process of rendering a page to the desired context. + * + * @param {RenderParameters} params - Page render parameters. + * @returns {RenderTask} An object that contains a promise that is + * resolved when the page finishes rendering. + */ + render( + { + canvasContext, + viewport, + intent, + annotationMode, + transform, + background, + optionalContentConfigPromise, + annotationCanvasMap, + pageColors, + printAnnotationStorage, + }: RenderParameters, + ...args: any[] + ): RenderTask; + /** + * @param {GetOperatorListParameters} params - Page getOperatorList + * parameters. + * @returns {Promise} A promise resolved with an + * {@link PDFOperatorList} object that represents the page's operator list. + */ + getOperatorList({ + intent, + annotationMode, + printAnnotationStorage, + }?: GetOperatorListParameters): Promise; + /** + * NOTE: All occurrences of whitespace will be replaced by + * standard spaces (0x20). + * + * @param {getTextContentParameters} params - getTextContent parameters. + * @returns {ReadableStream} Stream for reading text content chunks. + */ + streamTextContent({ + includeMarkedContent, + }?: getTextContentParameters): ReadableStream; + /** + * NOTE: All occurrences of whitespace will be replaced by + * standard spaces (0x20). + * + * @param {getTextContentParameters} params - getTextContent parameters. + * @returns {Promise} A promise that is resolved with a + * {@link TextContent} object that represents the page's text content. + */ + getTextContent(params?: getTextContentParameters): Promise; + /** + * @returns {Promise} A promise that is resolved with a + * {@link StructTreeNode} object that represents the page's structure tree, + * or `null` when no structure tree is present for the current page. + */ + getStructTree(): Promise; + /** + * Destroys the page object. + * @private + */ + private _destroy; + /** + * Cleans up resources allocated by the page. + * + * @param {boolean} [resetStats] - Reset page stats, if enabled. + * The default value is `false`. + * @returns {boolean} Indicates if clean-up was successfully run. + */ + cleanup(resetStats?: boolean | undefined): boolean; + /** + * @private + */ + private _startRenderPage; + /** + * @private + */ + private _renderPageChunk; + /** + * @private + */ + private _pumpOperatorList; + /** + * @private + */ + private _abortOperatorList; + /** + * @type {StatTimer | null} Returns page stats, if enabled; returns `null` + * otherwise. + */ + get stats(): StatTimer | null; + #private; + } + /** + * PDF.js web worker abstraction that controls the instantiation of PDF + * documents. Message handlers are used to pass information from the main + * thread to the worker thread and vice versa. If the creation of a web + * worker is not possible, a "fake" worker will be used instead. + * + * @param {PDFWorkerParameters} params - The worker initialization parameters. + */ + export class PDFWorker { + static "__#19@#workerPorts": WeakMap; + /** + * @param {PDFWorkerParameters} params - The worker initialization parameters. + */ + static fromPort(params: PDFWorkerParameters): any; + /** + * The current `workerSrc`, when it exists. + * @type {string} + */ + static get workerSrc(): string; + static get _mainThreadWorkerMessageHandler(): any; + static get _setupFakeWorkerGlobal(): any; + constructor({ + name, + port, + verbosity, + }?: { + name?: null | undefined; + port?: null | undefined; + verbosity?: number | undefined; + }); + name: any; + destroyed: boolean; + verbosity: number; + _readyCapability: import("../shared/util.js").PromiseCapability; + _port: any; + _webWorker: Worker | null; + _messageHandler: MessageHandler | null; + /** + * Promise for worker initialization completion. + * @type {Promise} + */ + get promise(): Promise; + /** + * The current `workerPort`, when it exists. + * @type {Worker} + */ + get port(): Worker; + /** + * The current MessageHandler-instance. + * @type {MessageHandler} + */ + get messageHandler(): MessageHandler; + _initializeFromPort(port: any): void; + _initialize(): void; + _setupFakeWorker(): void; + /** + * Destroys the worker instance. + */ + destroy(): void; + } + export namespace PDFWorkerUtil { + const isWorkerDisabled: boolean; + const fallbackWorkerSrc: null; + const fakeWorkerId: number; + } + /** + * Allows controlling of the rendering tasks. + */ + export class RenderTask { + constructor(internalRenderTask: any); + /** + * Callback for incremental rendering -- a function that will be called + * each time the rendering is paused. To continue rendering call the + * function that is the first argument to the callback. + * @type {function} + */ + onContinue: Function; + /** + * Promise for rendering task completion. + * @type {Promise} + */ + get promise(): Promise; + /** + * Cancels the rendering task. If the task is currently rendering it will + * not be cancelled until graphics pauses with a timeout. The promise that + * this object extends will be rejected when cancelled. + * + * @param {number} [extraDelay] + */ + cancel(extraDelay?: number | undefined): void; + /** + * Whether form fields are rendered separately from the main operatorList. + * @type {boolean} + */ + get separateAnnots(): boolean; + #private; + } + /** @type {string} */ + export const version: string; +} diff --git a/libs/langchain-community/src/types/type-utils.ts b/libs/langchain-community/src/types/type-utils.ts new file mode 100644 index 000000000000..e2c1e6970a52 --- /dev/null +++ b/libs/langchain-community/src/types/type-utils.ts @@ -0,0 +1,3 @@ +// Utility for marking only some keys of an interface as optional +// Compare to Partial which marks all keys as optional +export type Optional = Omit & Partial>; diff --git a/libs/langchain-community/src/util/bedrock.ts b/libs/langchain-community/src/util/bedrock.ts new file mode 100644 index 000000000000..82a5d21ca4c0 --- /dev/null +++ b/libs/langchain-community/src/util/bedrock.ts @@ -0,0 +1,134 @@ +import type { AwsCredentialIdentity, Provider } from "@aws-sdk/types"; + +export type CredentialType = + | AwsCredentialIdentity + | Provider; + +/** Bedrock models. + To authenticate, the AWS client uses the following methods to automatically load credentials: + https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + If a specific credential profile should be used, you must pass the name of the profile from the ~/.aws/credentials file that is to be used. + Make sure the credentials / roles used have the required policies to access the Bedrock service. +*/ +export interface BaseBedrockInput { + /** Model to use. + For example, "amazon.titan-tg1-large", this is equivalent to the modelId property in the list-foundation-models api. + */ + model: string; + + /** The AWS region e.g. `us-west-2`. + Fallback to AWS_DEFAULT_REGION env variable or region specified in ~/.aws/config in case it is not provided here. + */ + region?: string; + + /** AWS Credentials. + If no credentials are provided, the default credentials from `@aws-sdk/credential-provider-node` will be used. + */ + credentials?: CredentialType; + + /** Temperature. */ + temperature?: number; + + /** Max tokens. */ + maxTokens?: number; + + /** A custom fetch function for low-level access to AWS API. Defaults to fetch(). */ + fetchFn?: typeof fetch; + + /** @deprecated Use endpointHost instead Override the default endpoint url. */ + endpointUrl?: string; + + /** Override the default endpoint hostname. */ + endpointHost?: string; + + /** + * Optional additional stop sequences to pass to the model. Currently only supported for Anthropic and AI21. + * @deprecated Use .bind({ "stop": [...] }) instead + * */ + stopSequences?: string[]; + + /** Additional kwargs to pass to the model. */ + modelKwargs?: Record; + + /** Whether or not to stream responses */ + streaming: boolean; +} + +type Dict = { [key: string]: unknown }; + +/** + * A helper class used within the `Bedrock` class. It is responsible for + * preparing the input and output for the Bedrock service. It formats the + * input prompt based on the provider (e.g., "anthropic", "ai21", + * "amazon") and extracts the generated text from the service response. + */ +export class BedrockLLMInputOutputAdapter { + /** Adapter class to prepare the inputs from Langchain to a format + that LLM model expects. Also, provides a helper function to extract + the generated text from the model response. */ + + static prepareInput( + provider: string, + prompt: string, + maxTokens = 50, + temperature = 0, + stopSequences: string[] | undefined = undefined, + modelKwargs: Record = {}, + bedrockMethod: "invoke" | "invoke-with-response-stream" = "invoke" + ): Dict { + const inputBody: Dict = {}; + + if (provider === "anthropic") { + inputBody.prompt = prompt; + inputBody.max_tokens_to_sample = maxTokens; + inputBody.temperature = temperature; + inputBody.stop_sequences = stopSequences; + } else if (provider === "ai21") { + inputBody.prompt = prompt; + inputBody.maxTokens = maxTokens; + inputBody.temperature = temperature; + inputBody.stopSequences = stopSequences; + } else if (provider === "meta") { + inputBody.prompt = prompt; + inputBody.max_gen_len = maxTokens; + inputBody.temperature = temperature; + } else if (provider === "amazon") { + inputBody.inputText = prompt; + inputBody.textGenerationConfig = { + maxTokenCount: maxTokens, + temperature, + }; + } else if (provider === "cohere") { + inputBody.prompt = prompt; + inputBody.max_tokens = maxTokens; + inputBody.temperature = temperature; + inputBody.stop_sequences = stopSequences; + if (bedrockMethod === "invoke-with-response-stream") { + inputBody.stream = true; + } + } + return { ...inputBody, ...modelKwargs }; + } + + /** + * Extracts the generated text from the service response. + * @param provider The provider name. + * @param responseBody The response body from the service. + * @returns The generated text. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + static prepareOutput(provider: string, responseBody: any): string { + if (provider === "anthropic") { + return responseBody.completion; + } else if (provider === "ai21") { + return responseBody?.completions?.[0]?.data?.text ?? ""; + } else if (provider === "cohere") { + return responseBody?.generations?.[0]?.text ?? responseBody?.text ?? ""; + } else if (provider === "meta") { + return responseBody.generation; + } + + // I haven't been able to get a response with more than one result in it. + return responseBody.results?.[0]?.outputText; + } +} diff --git a/libs/langchain-community/src/util/event-source-parse.ts b/libs/langchain-community/src/util/event-source-parse.ts new file mode 100644 index 000000000000..9a279a2b28d4 --- /dev/null +++ b/libs/langchain-community/src/util/event-source-parse.ts @@ -0,0 +1,287 @@ +/* eslint-disable prefer-template */ +/* eslint-disable default-case */ +/* eslint-disable no-plusplus */ +// Adapted from https://github.com/gfortaine/fetch-event-source/blob/main/src/parse.ts +// due to a packaging issue in the original. +// MIT License +import { type Readable } from "stream"; +import { IterableReadableStream } from "@langchain/core/utils/stream"; + +export const EventStreamContentType = "text/event-stream"; + +/** + * Represents a message sent in an event stream + * https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format + */ +export interface EventSourceMessage { + /** The event ID to set the EventSource object's last event ID value. */ + id: string; + /** A string identifying the type of event described. */ + event: string; + /** The event data */ + data: string; + /** The reconnection interval (in milliseconds) to wait before retrying the connection */ + retry?: number; +} + +function isNodeJSReadable(x: unknown): x is Readable { + return x != null && typeof x === "object" && "on" in x; +} + +/** + * Converts a ReadableStream into a callback pattern. + * @param stream The input ReadableStream. + * @param onChunk A function that will be called on each new byte chunk in the stream. + * @returns {Promise} A promise that will be resolved when the stream closes. + */ +export async function getBytes( + stream: ReadableStream, + onChunk: (arr: Uint8Array, flush?: boolean) => void +) { + // stream is a Node.js Readable / PassThrough stream + // this can happen if node-fetch is polyfilled + if (isNodeJSReadable(stream)) { + return new Promise((resolve) => { + stream.on("readable", () => { + let chunk; + // eslint-disable-next-line no-constant-condition + while (true) { + chunk = stream.read(); + if (chunk == null) { + onChunk(new Uint8Array(), true); + break; + } + onChunk(chunk); + } + + resolve(); + }); + }); + } + + const reader = stream.getReader(); + // CHANGED: Introduced a "flush" mechanism to process potential pending messages when the stream ends. + // This change is essential to ensure that we capture every last piece of information from streams, + // such as those from Azure OpenAI, which may not terminate with a blank line. Without this + // mechanism, we risk ignoring a possibly significant last message. + // See https://github.com/langchain-ai/langchainjs/issues/1299 for details. + // eslint-disable-next-line no-constant-condition + while (true) { + const result = await reader.read(); + if (result.done) { + onChunk(new Uint8Array(), true); + break; + } + onChunk(result.value); + } +} + +const enum ControlChars { + NewLine = 10, + CarriageReturn = 13, + Space = 32, + Colon = 58, +} + +/** + * Parses arbitary byte chunks into EventSource line buffers. + * Each line should be of the format "field: value" and ends with \r, \n, or \r\n. + * @param onLine A function that will be called on each new EventSource line. + * @returns A function that should be called for each incoming byte chunk. + */ +export function getLines( + onLine: (line: Uint8Array, fieldLength: number, flush?: boolean) => void +) { + let buffer: Uint8Array | undefined; + let position: number; // current read position + let fieldLength: number; // length of the `field` portion of the line + let discardTrailingNewline = false; + + // return a function that can process each incoming byte chunk: + return function onChunk(arr: Uint8Array, flush?: boolean) { + if (flush) { + onLine(arr, 0, true); + return; + } + + if (buffer === undefined) { + buffer = arr; + position = 0; + fieldLength = -1; + } else { + // we're still parsing the old line. Append the new bytes into buffer: + buffer = concat(buffer, arr); + } + + const bufLength = buffer.length; + let lineStart = 0; // index where the current line starts + while (position < bufLength) { + if (discardTrailingNewline) { + if (buffer[position] === ControlChars.NewLine) { + lineStart = ++position; // skip to next char + } + + discardTrailingNewline = false; + } + + // start looking forward till the end of line: + let lineEnd = -1; // index of the \r or \n char + for (; position < bufLength && lineEnd === -1; ++position) { + switch (buffer[position]) { + case ControlChars.Colon: + if (fieldLength === -1) { + // first colon in line + fieldLength = position - lineStart; + } + break; + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore:7029 \r case below should fallthrough to \n: + case ControlChars.CarriageReturn: + discardTrailingNewline = true; + // eslint-disable-next-line no-fallthrough + case ControlChars.NewLine: + lineEnd = position; + break; + } + } + + if (lineEnd === -1) { + // We reached the end of the buffer but the line hasn't ended. + // Wait for the next arr and then continue parsing: + break; + } + + // we've reached the line end, send it out: + onLine(buffer.subarray(lineStart, lineEnd), fieldLength); + lineStart = position; // we're now on the next line + fieldLength = -1; + } + + if (lineStart === bufLength) { + buffer = undefined; // we've finished reading it + } else if (lineStart !== 0) { + // Create a new view into buffer beginning at lineStart so we don't + // need to copy over the previous lines when we get the new arr: + buffer = buffer.subarray(lineStart); + position -= lineStart; + } + }; +} + +/** + * Parses line buffers into EventSourceMessages. + * @param onId A function that will be called on each `id` field. + * @param onRetry A function that will be called on each `retry` field. + * @param onMessage A function that will be called on each message. + * @returns A function that should be called for each incoming line buffer. + */ +export function getMessages( + onMessage?: (msg: EventSourceMessage) => void, + onId?: (id: string) => void, + onRetry?: (retry: number) => void +) { + let message = newMessage(); + const decoder = new TextDecoder(); + + // return a function that can process each incoming line buffer: + return function onLine( + line: Uint8Array, + fieldLength: number, + flush?: boolean + ) { + if (flush) { + if (!isEmpty(message)) { + onMessage?.(message); + message = newMessage(); + } + return; + } + + if (line.length === 0) { + // empty line denotes end of message. Trigger the callback and start a new message: + onMessage?.(message); + message = newMessage(); + } else if (fieldLength > 0) { + // exclude comments and lines with no values + // line is of format ":" or ": " + // https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation + const field = decoder.decode(line.subarray(0, fieldLength)); + const valueOffset = + fieldLength + (line[fieldLength + 1] === ControlChars.Space ? 2 : 1); + const value = decoder.decode(line.subarray(valueOffset)); + + switch (field) { + case "data": + // if this message already has data, append the new value to the old. + // otherwise, just set to the new value: + message.data = message.data ? message.data + "\n" + value : value; // otherwise, + break; + case "event": + message.event = value; + break; + case "id": + onId?.((message.id = value)); + break; + case "retry": { + const retry = parseInt(value, 10); + if (!Number.isNaN(retry)) { + // per spec, ignore non-integers + onRetry?.((message.retry = retry)); + } + break; + } + } + } + }; +} + +function concat(a: Uint8Array, b: Uint8Array) { + const res = new Uint8Array(a.length + b.length); + res.set(a); + res.set(b, a.length); + return res; +} + +function newMessage(): EventSourceMessage { + // data, event, and id must be initialized to empty strings: + // https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation + // retry should be initialized to undefined so we return a consistent shape + // to the js engine all the time: https://mathiasbynens.be/notes/shapes-ics#takeaways + return { + data: "", + event: "", + id: "", + retry: undefined, + }; +} + +export function convertEventStreamToIterableReadableDataStream( + stream: ReadableStream +) { + const dataStream = new ReadableStream({ + async start(controller) { + const enqueueLine = getMessages((msg) => { + if (msg.data) controller.enqueue(msg.data); + }); + const onLine = ( + line: Uint8Array, + fieldLength: number, + flush?: boolean + ) => { + enqueueLine(line, fieldLength, flush); + if (flush) controller.close(); + }; + await getBytes(stream, getLines(onLine)); + }, + }); + return IterableReadableStream.fromReadableStream(dataStream); +} + +function isEmpty(message: EventSourceMessage): boolean { + return ( + message.data === "" && + message.event === "" && + message.id === "" && + message.retry === undefined + ); +} diff --git a/libs/langchain-community/src/util/googlevertexai-connection.ts b/libs/langchain-community/src/util/googlevertexai-connection.ts new file mode 100644 index 000000000000..ee4f83de7e59 --- /dev/null +++ b/libs/langchain-community/src/util/googlevertexai-connection.ts @@ -0,0 +1,423 @@ +import { BaseLanguageModelCallOptions } from "../base_language/index.js"; +import { AsyncCaller, AsyncCallerCallOptions } from "./async_caller.js"; +import type { + GoogleVertexAIBaseLLMInput, + GoogleVertexAIBasePrediction, + GoogleVertexAIConnectionParams, + GoogleVertexAILLMPredictions, + GoogleVertexAIModelParams, + GoogleResponse, + GoogleAbstractedClient, + GoogleAbstractedClientOps, + GoogleAbstractedClientOpsMethod, +} from "../types/googlevertexai-types.js"; +import { GenerationChunk } from "../schema/index.js"; + +export abstract class GoogleConnection< + CallOptions extends AsyncCallerCallOptions, + ResponseType extends GoogleResponse +> { + caller: AsyncCaller; + + client: GoogleAbstractedClient; + + streaming: boolean; + + constructor( + caller: AsyncCaller, + client: GoogleAbstractedClient, + streaming?: boolean + ) { + this.caller = caller; + this.client = client; + this.streaming = streaming ?? false; + } + + abstract buildUrl(): Promise; + + abstract buildMethod(): GoogleAbstractedClientOpsMethod; + + async _request( + data: unknown | undefined, + options: CallOptions + ): Promise { + const url = await this.buildUrl(); + const method = this.buildMethod(); + + const opts: GoogleAbstractedClientOps = { + url, + method, + }; + if (data && method === "POST") { + opts.data = data; + } + if (this.streaming) { + opts.responseType = "stream"; + } else { + opts.responseType = "json"; + } + + const callResponse = await this.caller.callWithOptions( + { signal: options?.signal }, + async () => this.client.request(opts) + ); + const response: unknown = callResponse; // Done for typecast safety, I guess + return response; + } +} + +export abstract class GoogleVertexAIConnection< + CallOptions extends AsyncCallerCallOptions, + ResponseType extends GoogleResponse, + AuthOptions + > + extends GoogleConnection + implements GoogleVertexAIConnectionParams +{ + endpoint = "us-central1-aiplatform.googleapis.com"; + + location = "us-central1"; + + apiVersion = "v1"; + + constructor( + fields: GoogleVertexAIConnectionParams | undefined, + caller: AsyncCaller, + client: GoogleAbstractedClient, + streaming?: boolean + ) { + super(caller, client, streaming); + this.caller = caller; + + this.endpoint = fields?.endpoint ?? this.endpoint; + this.location = fields?.location ?? this.location; + this.apiVersion = fields?.apiVersion ?? this.apiVersion; + this.client = client; + } + + buildMethod(): GoogleAbstractedClientOpsMethod { + return "POST"; + } +} + +export function complexValue(value: unknown): unknown { + if (value === null || typeof value === "undefined") { + // I dunno what to put here. An error, probably + return undefined; + } else if (typeof value === "object") { + if (Array.isArray(value)) { + return { + list_val: value.map((avalue) => complexValue(avalue)), + }; + } else { + const ret: Record = {}; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const v: Record = value; + Object.keys(v).forEach((key) => { + ret[key] = complexValue(v[key]); + }); + return { struct_val: ret }; + } + } else if (typeof value === "number") { + if (Number.isInteger(value)) { + return { int_val: value }; + } else { + return { float_val: value }; + } + } else { + return { + string_val: [value], + }; + } +} + +export function simpleValue(val: unknown): unknown { + if (val && typeof val === "object" && !Array.isArray(val)) { + // eslint-disable-next-line no-prototype-builtins + if (val.hasOwnProperty("stringVal")) { + return (val as { stringVal: string[] }).stringVal[0]; + + // eslint-disable-next-line no-prototype-builtins + } else if (val.hasOwnProperty("boolVal")) { + return (val as { boolVal: boolean[] }).boolVal[0]; + + // eslint-disable-next-line no-prototype-builtins + } else if (val.hasOwnProperty("listVal")) { + const { listVal } = val as { listVal: unknown[] }; + return listVal.map((aval) => simpleValue(aval)); + + // eslint-disable-next-line no-prototype-builtins + } else if (val.hasOwnProperty("structVal")) { + const ret: Record = {}; + const struct = (val as { structVal: Record }).structVal; + Object.keys(struct).forEach((key) => { + ret[key] = simpleValue(struct[key]); + }); + return ret; + } else { + const ret: Record = {}; + const struct = val as Record; + Object.keys(struct).forEach((key) => { + ret[key] = simpleValue(struct[key]); + }); + return ret; + } + } else if (Array.isArray(val)) { + return val.map((aval) => simpleValue(aval)); + } else { + return val; + } +} + +export class GoogleVertexAILLMConnection< + CallOptions extends BaseLanguageModelCallOptions, + InstanceType, + PredictionType extends GoogleVertexAIBasePrediction, + AuthOptions + > + extends GoogleVertexAIConnection< + CallOptions, + GoogleVertexAILLMResponse, + AuthOptions + > + implements GoogleVertexAIBaseLLMInput +{ + model: string; + + client: GoogleAbstractedClient; + + constructor( + fields: GoogleVertexAIBaseLLMInput | undefined, + caller: AsyncCaller, + client: GoogleAbstractedClient, + streaming?: boolean + ) { + super(fields, caller, client, streaming); + this.client = client; + this.model = fields?.model ?? this.model; + } + + async buildUrl(): Promise { + const projectId = await this.client.getProjectId(); + const method = this.streaming ? "serverStreamingPredict" : "predict"; + const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:${method}`; + return url; + } + + formatStreamingData( + inputs: InstanceType[], + parameters: GoogleVertexAIModelParams + ): unknown { + return { + inputs: [inputs.map((i) => complexValue(i))], + parameters: complexValue(parameters), + }; + } + + formatStandardData( + instances: InstanceType[], + parameters: GoogleVertexAIModelParams + ): unknown { + return { + instances, + parameters, + }; + } + + formatData( + instances: InstanceType[], + parameters: GoogleVertexAIModelParams + ): unknown { + return this.streaming + ? this.formatStreamingData(instances, parameters) + : this.formatStandardData(instances, parameters); + } + + async request( + instances: InstanceType[], + parameters: GoogleVertexAIModelParams, + options: CallOptions + ): Promise> { + const data = this.formatData(instances, parameters); + const response = await this._request(data, options); + return response; + } +} + +export interface GoogleVertexAILLMResponse< + PredictionType extends GoogleVertexAIBasePrediction +> extends GoogleResponse { + data: GoogleVertexAIStream | GoogleVertexAILLMPredictions; +} + +export class GoogleVertexAIStream { + _buffer = ""; + + _bufferOpen = true; + + _firstRun = true; + + /** + * Add data to the buffer. This may cause chunks to be generated, if available. + * @param data + */ + appendBuffer(data: string): void { + this._buffer += data; + // Our first time, skip to the opening of the array + if (this._firstRun) { + this._skipTo("["); + this._firstRun = false; + } + + this._parseBuffer(); + } + + /** + * Indicate there is no more data that will be added to the text buffer. + * This should be called when all the data has been read and added to indicate + * that we should process everything remaining in the buffer. + */ + closeBuffer(): void { + this._bufferOpen = false; + this._parseBuffer(); + } + + /** + * Skip characters in the buffer till we get to the start of an object. + * Then attempt to read a full object. + * If we do read a full object, turn it into a chunk and send it to the chunk handler. + * Repeat this for as much as we can. + */ + _parseBuffer(): void { + let obj = null; + do { + this._skipTo("{"); + obj = this._getFullObject(); + if (obj !== null) { + const chunk = this._simplifyObject(obj); + this._handleChunk(chunk); + } + } while (obj !== null); + + if (!this._bufferOpen) { + // No more data will be added, and we have parsed everything we could, + // so everything else is garbage. + this._handleChunk(null); + this._buffer = ""; + } + } + + /** + * If the string is present, move the start of the buffer to the first occurrence + * of that string. This is useful for skipping over elements or parts that we're not + * really interested in parsing. (ie - the opening characters, comma separators, etc.) + * @param start The string to start the buffer with + */ + _skipTo(start: string): void { + const index = this._buffer.indexOf(start); + if (index > 0) { + this._buffer = this._buffer.slice(index); + } + } + + /** + * Given what is in the buffer, parse a single object out of it. + * If a complete object isn't available, return null. + * Assumes that we are at the start of an object to parse. + */ + _getFullObject(): object | null { + let ret: object | null = null; + + // Loop while we don't have something to return AND we have something in the buffer + let index = 0; + while (ret === null && this._buffer.length > index) { + // Advance to the next close bracket after our current index + index = this._buffer.indexOf("}", index + 1); + + // If we don't find one, exit with null + if (index === -1) { + return null; + } + + // If we have one, try to turn it into an object to return + try { + const objStr = this._buffer.substring(0, index + 1); + ret = JSON.parse(objStr); + + // We only get here if it parsed it ok + // If we did turn it into an object, remove it from the buffer + this._buffer = this._buffer.slice(index + 1); + } catch (xx) { + // It didn't parse it correctly, so we swallow the exception and continue + } + } + + return ret; + } + + _simplifyObject(obj: unknown): object { + return simpleValue(obj) as object; + } + + // Set up a potential Promise that the handler can resolve. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _chunkResolution: (chunk: any) => void; + + // If there is no Promise (it is null), the handler must add it to the queue + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _chunkPending: Promise | null = null; + + // A queue that will collect chunks while there is no Promise + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _chunkQueue: any[] = []; + + /** + * Register that we have another chunk available for consumption. + * If we are waiting for a chunk, resolve the promise waiting for it immediately. + * If not, then add it to the queue. + * @param chunk + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _handleChunk(chunk: any): void { + if (this._chunkPending) { + this._chunkResolution(chunk); + this._chunkPending = null; + } else { + this._chunkQueue.push(chunk); + } + } + + /** + * Get the next chunk that is coming from the stream. + * This chunk may be null, usually indicating the last chunk in the stream. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async nextChunk(): Promise { + if (this._chunkQueue.length > 0) { + // If there is data in the queue, return the next queue chunk + return this._chunkQueue.shift() as GenerationChunk; + } else { + // Otherwise, set up a promise that handleChunk will cause to be resolved + this._chunkPending = new Promise((resolve) => { + this._chunkResolution = resolve; + }); + return this._chunkPending; + } + } + + /** + * Is the stream done? + * A stream is only done if all of the following are true: + * - There is no more data to be added to the text buffer + * - There is no more data in the text buffer + * - There are no chunks that are waiting to be consumed + */ + get streamDone(): boolean { + return ( + !this._bufferOpen && + this._buffer.length === 0 && + this._chunkQueue.length === 0 && + this._chunkPending === null + ); + } +} diff --git a/libs/langchain-community/src/util/googlevertexai-gauth.ts b/libs/langchain-community/src/util/googlevertexai-gauth.ts new file mode 100644 index 000000000000..e952391ef398 --- /dev/null +++ b/libs/langchain-community/src/util/googlevertexai-gauth.ts @@ -0,0 +1,38 @@ +import { Readable } from "stream"; +import { GoogleAuth, GoogleAuthOptions } from "google-auth-library"; +import { + GoogleAbstractedClient, + GoogleAbstractedClientOps, +} from "../types/googlevertexai-types.js"; +import { GoogleVertexAIStream } from "./googlevertexai-connection.js"; + +class GoogleVertexAINodeStream extends GoogleVertexAIStream { + constructor(data: Readable) { + super(); + + data.on("data", (data) => this.appendBuffer(data.toString())); + data.on("end", () => this.closeBuffer()); + } +} + +export class GAuthClient implements GoogleAbstractedClient { + gauth: GoogleAuth; + + constructor(options?: GoogleAuthOptions) { + this.gauth = new GoogleAuth(options); + } + + async getProjectId(): Promise { + return this.gauth.getProjectId(); + } + + async request(opts: GoogleAbstractedClientOps): Promise { + const ret = await this.gauth.request(opts); + return opts.responseType !== "stream" + ? ret + : { + ...ret, + data: new GoogleVertexAINodeStream(ret.data), + }; + } +} diff --git a/libs/langchain-community/src/util/googlevertexai-webauth.ts b/libs/langchain-community/src/util/googlevertexai-webauth.ts new file mode 100644 index 000000000000..87168fbaa1b0 --- /dev/null +++ b/libs/langchain-community/src/util/googlevertexai-webauth.ts @@ -0,0 +1,119 @@ +import { + getAccessToken, + getCredentials, + Credentials, +} from "web-auth-library/google"; +import { getEnvironmentVariable } from "./env.js"; +import type { + GoogleAbstractedClient, + GoogleAbstractedClientOps, +} from "../types/googlevertexai-types.js"; +import { GoogleVertexAIStream } from "./googlevertexai-connection.js"; + +class GoogleVertexAIResponseStream extends GoogleVertexAIStream { + decoder: TextDecoder; + + constructor(body: ReadableStream | null) { + super(); + this.decoder = new TextDecoder(); + if (body) { + void this.run(body); + } else { + console.error("Unexpected empty body while streaming"); + } + } + + async run(body: ReadableStream) { + const reader = body.getReader(); + let isDone = false; + while (!isDone) { + const { value, done } = await reader.read(); + if (!done) { + const svalue = this.decoder.decode(value); + this.appendBuffer(svalue); + } else { + isDone = done; + this.closeBuffer(); + } + } + } +} + +export type WebGoogleAuthOptions = { + credentials: string | Credentials; + scope?: string | string[]; + accessToken?: string; +}; + +export class WebGoogleAuth implements GoogleAbstractedClient { + options: WebGoogleAuthOptions; + + constructor(options?: WebGoogleAuthOptions) { + const accessToken = options?.accessToken; + + const credentials = + options?.credentials ?? + getEnvironmentVariable("GOOGLE_VERTEX_AI_WEB_CREDENTIALS"); + if (credentials === undefined) + throw new Error( + `Credentials not found. Please set the GOOGLE_VERTEX_AI_WEB_CREDENTIALS environment variable or pass credentials into "authOptions.credentials".` + ); + + const scope = + options?.scope ?? "https://www.googleapis.com/auth/cloud-platform"; + + this.options = { ...options, accessToken, credentials, scope }; + } + + async getProjectId() { + const credentials = getCredentials(this.options.credentials); + return credentials.project_id; + } + + async request(opts: GoogleAbstractedClientOps) { + let { accessToken } = this.options; + + if (accessToken === undefined) { + accessToken = await getAccessToken(this.options); + } + + if (opts.url == null) throw new Error("Missing URL"); + const fetchOptions: { + method?: string; + headers: Record; + body?: string; + } = { + method: opts.method, + headers: { + Authorization: `Bearer ${accessToken}`, + "Content-Type": "application/json", + }, + }; + if (opts.data !== undefined) { + fetchOptions.body = JSON.stringify(opts.data); + } + + const res = await fetch(opts.url, fetchOptions); + + if (!res.ok) { + const error = new Error( + `Could not get access token for Vertex AI with status code: ${res.status}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = res; + throw error; + } + + return { + data: + opts.responseType === "json" + ? await res.json() + : new GoogleVertexAIResponseStream(res.body), + config: {}, + status: res.status, + statusText: res.statusText, + headers: res.headers, + request: { responseURL: res.url }, + }; + } +} diff --git a/libs/langchain-community/src/util/iflytek_websocket_stream.ts b/libs/langchain-community/src/util/iflytek_websocket_stream.ts new file mode 100644 index 000000000000..85766ad37281 --- /dev/null +++ b/libs/langchain-community/src/util/iflytek_websocket_stream.ts @@ -0,0 +1,95 @@ +export interface WebSocketConnection< + T extends Uint8Array | string = Uint8Array | string +> { + readable: ReadableStream; + writable: WritableStream; + protocol: string; + extensions: string; +} + +export interface WebSocketCloseInfo { + code?: number; + reason?: string; +} + +export interface WebSocketStreamOptions { + protocols?: string[]; + signal?: AbortSignal; +} + +/** + * [WebSocket](https://developer.mozilla.org/en-US/docs/Web/API/WebSocket) with [Streams API](https://developer.mozilla.org/en-US/docs/Web/API/Streams_API) + * + * @see https://web.dev/websocketstream/ + */ +export abstract class BaseWebSocketStream< + T extends Uint8Array | string = Uint8Array | string +> { + readonly url: string; + + readonly connection: Promise>; + + readonly closed: Promise; + + readonly close: (closeInfo?: WebSocketCloseInfo) => void; + + constructor(url: string, options: WebSocketStreamOptions = {}) { + if (options.signal?.aborted) { + throw new DOMException("This operation was aborted", "AbortError"); + } + + this.url = url; + + const ws = this.openWebSocket(url, options); + + const closeWithInfo = ({ code, reason }: WebSocketCloseInfo = {}) => + ws.close(code, reason); + + this.connection = new Promise((resolve, reject) => { + ws.onopen = () => { + resolve({ + readable: new ReadableStream({ + start(controller) { + ws.onmessage = ({ data }) => controller.enqueue(data); + ws.onerror = (e) => controller.error(e); + }, + cancel: closeWithInfo, + }), + writable: new WritableStream({ + write(chunk) { + ws.send(chunk); + }, + abort() { + ws.close(); + }, + close: closeWithInfo, + }), + protocol: ws.protocol, + extensions: ws.extensions, + }); + ws.removeEventListener("error", reject); + }; + ws.addEventListener("error", reject); + }); + + this.closed = new Promise((resolve, reject) => { + ws.onclose = ({ code, reason }) => { + resolve({ code, reason }); + ws.removeEventListener("error", reject); + }; + ws.addEventListener("error", reject); + }); + + if (options.signal) { + // eslint-disable-next-line no-param-reassign + options.signal.onabort = () => ws.close(); + } + + this.close = closeWithInfo; + } + + abstract openWebSocket( + url: string, + options: WebSocketStreamOptions + ): WebSocket; +} diff --git a/libs/langchain-community/src/util/llama_cpp.ts b/libs/langchain-community/src/util/llama_cpp.ts new file mode 100644 index 000000000000..d961ab510942 --- /dev/null +++ b/libs/langchain-community/src/util/llama_cpp.ts @@ -0,0 +1,79 @@ +import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"; + +/** + * Note that the modelPath is the only required parameter. For testing you + * can set this in the environment variable `LLAMA_PATH`. + */ +export interface LlamaBaseCppInputs { + /** Prompt processing batch size. */ + batchSize?: number; + /** Text context size. */ + contextSize?: number; + /** Embedding mode only. */ + embedding?: boolean; + /** Use fp16 for KV cache. */ + f16Kv?: boolean; + /** Number of layers to store in VRAM. */ + gpuLayers?: number; + /** The llama_eval() call computes all logits, not just the last one. */ + logitsAll?: boolean; + /** */ + maxTokens?: number; + /** Path to the model on the filesystem. */ + modelPath: string; + /** Add the begining of sentence token. */ + prependBos?: boolean; + /** If null, a random seed will be used. */ + seed?: null | number; + /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */ + temperature?: number; + /** Number of threads to use to evaluate tokens. */ + threads?: number; + /** Trim whitespace from the end of the generated text Disabled by default. */ + trimWhitespaceSuffix?: boolean; + /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */ + topK?: number; + /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */ + topP?: number; + /** Force system to keep model in RAM. */ + useMlock?: boolean; + /** Use mmap if possible. */ + useMmap?: boolean; + /** Only load the vocabulary, no weights. */ + vocabOnly?: boolean; +} + +export function createLlamaModel(inputs: LlamaBaseCppInputs): LlamaModel { + const options = { + gpuLayers: inputs?.gpuLayers, + modelPath: inputs.modelPath, + useMlock: inputs?.useMlock, + useMmap: inputs?.useMmap, + vocabOnly: inputs?.vocabOnly, + }; + + return new LlamaModel(options); +} + +export function createLlamaContext( + model: LlamaModel, + inputs: LlamaBaseCppInputs +): LlamaContext { + const options = { + batchSize: inputs?.batchSize, + contextSize: inputs?.contextSize, + embedding: inputs?.embedding, + f16Kv: inputs?.f16Kv, + logitsAll: inputs?.logitsAll, + model, + prependBos: inputs?.prependBos, + seed: inputs?.seed, + threads: inputs?.threads, + }; + + return new LlamaContext(options); +} + +export function createLlamaSession(context: LlamaContext): LlamaChatSession { + return new LlamaChatSession({ context }); +} diff --git a/libs/langchain-community/src/util/ollama.ts b/libs/langchain-community/src/util/ollama.ts new file mode 100644 index 000000000000..30f675f0d9b3 --- /dev/null +++ b/libs/langchain-community/src/util/ollama.ts @@ -0,0 +1,146 @@ +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { IterableReadableStream } from "@langchain/core/utils/stream"; +import type { StringWithAutocomplete } from "@langchain/core/utils/types"; + +export interface OllamaInput { + embeddingOnly?: boolean; + f16KV?: boolean; + frequencyPenalty?: number; + logitsAll?: boolean; + lowVram?: boolean; + mainGpu?: number; + model?: string; + baseUrl?: string; + mirostat?: number; + mirostatEta?: number; + mirostatTau?: number; + numBatch?: number; + numCtx?: number; + numGpu?: number; + numGqa?: number; + numKeep?: number; + numThread?: number; + penalizeNewline?: boolean; + presencePenalty?: number; + repeatLastN?: number; + repeatPenalty?: number; + ropeFrequencyBase?: number; + ropeFrequencyScale?: number; + temperature?: number; + stop?: string[]; + tfsZ?: number; + topK?: number; + topP?: number; + typicalP?: number; + useMLock?: boolean; + useMMap?: boolean; + vocabOnly?: boolean; + format?: StringWithAutocomplete<"json">; +} + +export interface OllamaRequestParams { + model: string; + prompt: string; + format?: StringWithAutocomplete<"json">; + options: { + embedding_only?: boolean; + f16_kv?: boolean; + frequency_penalty?: number; + logits_all?: boolean; + low_vram?: boolean; + main_gpu?: number; + mirostat?: number; + mirostat_eta?: number; + mirostat_tau?: number; + num_batch?: number; + num_ctx?: number; + num_gpu?: number; + num_gqa?: number; + num_keep?: number; + num_thread?: number; + penalize_newline?: boolean; + presence_penalty?: number; + repeat_last_n?: number; + repeat_penalty?: number; + rope_frequency_base?: number; + rope_frequency_scale?: number; + temperature?: number; + stop?: string[]; + tfs_z?: number; + top_k?: number; + top_p?: number; + typical_p?: number; + use_mlock?: boolean; + use_mmap?: boolean; + vocab_only?: boolean; + }; +} + +export interface OllamaCallOptions extends BaseLanguageModelCallOptions {} + +export type OllamaGenerationChunk = { + response: string; + model: string; + created_at: string; + done: boolean; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; + prompt_eval_duration?: number; + eval_count?: number; + eval_duration?: number; +}; + +export async function* createOllamaStream( + baseUrl: string, + params: OllamaRequestParams, + options: OllamaCallOptions +): AsyncGenerator { + let formattedBaseUrl = baseUrl; + if (formattedBaseUrl.startsWith("http://localhost:")) { + // Node 18 has issues with resolving "localhost" + // See https://github.com/node-fetch/node-fetch/issues/1624 + formattedBaseUrl = formattedBaseUrl.replace( + "http://localhost:", + "http://127.0.0.1:" + ); + } + const response = await fetch(`${formattedBaseUrl}/api/generate`, { + method: "POST", + body: JSON.stringify(params), + headers: { + "Content-Type": "application/json", + }, + signal: options.signal, + }); + if (!response.ok) { + const json = await response.json(); + const error = new Error( + `Ollama call failed with status code ${response.status}: ${json.error}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + if (!response.body) { + throw new Error( + "Could not begin Ollama stream. Please check the given URL and try again." + ); + } + + const stream = IterableReadableStream.fromReadableStream(response.body); + const decoder = new TextDecoder(); + let extra = ""; + for await (const chunk of stream) { + const decoded = extra + decoder.decode(chunk); + const lines = decoded.split("\n"); + extra = lines.pop() || ""; + for (const line of lines) { + try { + yield JSON.parse(line); + } catch (e) { + console.warn(`Received a non-JSON parseable chunk: ${line}`); + } + } + } +} diff --git a/libs/langchain-community/src/vectorstores.ts b/libs/langchain-community/src/vectorstores.ts new file mode 100644 index 000000000000..27c83543801b --- /dev/null +++ b/libs/langchain-community/src/vectorstores.ts @@ -0,0 +1,80 @@ +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; + +/** + * Database config for your vectorstore. + */ +export interface VectorstoreIntegrationParams {} + +/** + * Class for managing and operating vector search applications with + * Tigris, an open-source Serverless NoSQL Database and Search Platform. + */ +export class VectorstoreIntegration extends VectorStore { + // Replace + _vectorstoreType(): string { + return "vectorstore_integration"; + } + + constructor(embeddings: Embeddings, params: VectorstoreIntegrationParams) { + super(embeddings, params); + this.embeddings = embeddings; + } + + /** + * Method to add an array of documents to the vectorstore. + * + * Useful to override in case your vectorstore doesn't work directly with embeddings. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] } | string[] + ): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method to add raw vectors to the vectorstore. + */ + async addVectors( + _vectors: number[][], + _documents: Document[], + _options?: { ids?: string[] } | string[] + ) { + throw new Error("Not implemented."); + } + + /** + * Method to perform a similarity search over the vectorstore and return + * the k most similar vectors along with their similarity scores. + */ + async similaritySearchVectorWithScore( + _query: number[], + _k: number, + _filter?: object + ): Promise<[Document, number][]> { + throw new Error("Not implemented."); + } + + /** + * Static method to create a new instance of the vectorstore from an + * array of Document instances. + * + * Other common static initializer names are fromExistingIndex, initialize, and fromTexts. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: VectorstoreIntegrationParams + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/tsconfig.cjs.json b/libs/langchain-community/tsconfig.cjs.json new file mode 100644 index 000000000000..3b7026ea406c --- /dev/null +++ b/libs/langchain-community/tsconfig.cjs.json @@ -0,0 +1,8 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "module": "commonjs", + "declaration": false + }, + "exclude": ["node_modules", "dist", "docs", "**/tests"] +} diff --git a/libs/langchain-community/tsconfig.json b/libs/langchain-community/tsconfig.json new file mode 100644 index 000000000000..bc85d83b6229 --- /dev/null +++ b/libs/langchain-community/tsconfig.json @@ -0,0 +1,23 @@ +{ + "extends": "@tsconfig/recommended", + "compilerOptions": { + "outDir": "../dist", + "rootDir": "./src", + "target": "ES2021", + "lib": ["ES2021", "ES2022.Object", "DOM"], + "module": "ES2020", + "moduleResolution": "nodenext", + "esModuleInterop": true, + "declaration": true, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "useDefineForClassFields": true, + "strictPropertyInitialization": false, + "allowJs": true, + "strict": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "docs"] +} diff --git a/yarn.lock b/yarn.lock index eeb8c0ec538e..091751a8be8f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7997,6 +7997,462 @@ __metadata: languageName: unknown linkType: soft +"@langchain/community@workspace:*, @langchain/community@workspace:libs/langchain-community": + version: 0.0.0-use.local + resolution: "@langchain/community@workspace:libs/langchain-community" + dependencies: + "@aws-crypto/sha256-js": ^5.0.0 + "@aws-sdk/client-bedrock-runtime": ^3.422.0 + "@aws-sdk/client-dynamodb": ^3.310.0 + "@aws-sdk/client-kendra": ^3.352.0 + "@aws-sdk/client-lambda": ^3.310.0 + "@aws-sdk/client-s3": ^3.310.0 + "@aws-sdk/client-sagemaker-runtime": ^3.414.0 + "@aws-sdk/client-sfn": ^3.362.0 + "@aws-sdk/credential-provider-node": ^3.388.0 + "@aws-sdk/types": ^3.357.0 + "@azure/storage-blob": ^12.15.0 + "@clickhouse/client": ^0.2.5 + "@cloudflare/ai": ^1.0.12 + "@cloudflare/workers-types": ^4.20230922.0 + "@elastic/elasticsearch": ^8.4.0 + "@faker-js/faker": ^7.6.0 + "@getmetal/metal-sdk": ^4.0.0 + "@getzep/zep-js": ^0.9.0 + "@gomomento/sdk": ^1.51.1 + "@gomomento/sdk-core": ^1.51.1 + "@google-ai/generativelanguage": ^0.2.1 + "@google-cloud/storage": ^6.10.1 + "@gradientai/nodejs-sdk": ^1.2.0 + "@huggingface/inference": ^2.6.4 + "@jest/globals": ^29.5.0 + "@langchain/core": ~0.0.9 + "@langchain/openai": "workspace:^" + "@mozilla/readability": ^0.4.4 + "@notionhq/client": ^2.2.10 + "@opensearch-project/opensearch": ^2.2.0 + "@pinecone-database/pinecone": ^1.1.0 + "@planetscale/database": ^1.8.0 + "@qdrant/js-client-rest": ^1.2.0 + "@raycast/api": ^1.55.2 + "@rockset/client": ^0.9.1 + "@smithy/eventstream-codec": ^2.0.5 + "@smithy/protocol-http": ^3.0.6 + "@smithy/signature-v4": ^2.0.10 + "@smithy/util-utf8": ^2.0.0 + "@supabase/postgrest-js": ^1.1.1 + "@supabase/supabase-js": ^2.10.0 + "@swc/core": ^1.3.90 + "@swc/jest": ^0.2.29 + "@tensorflow-models/universal-sentence-encoder": ^1.3.3 + "@tensorflow/tfjs-backend-cpu": ^3 + "@tensorflow/tfjs-converter": ^3.6.0 + "@tensorflow/tfjs-core": ^3.6.0 + "@tsconfig/recommended": ^1.0.2 + "@types/d3-dsv": ^2 + "@types/decamelize": ^1.2.0 + "@types/flat": ^5.0.2 + "@types/html-to-text": ^9 + "@types/js-yaml": ^4 + "@types/jsdom": ^21.1.1 + "@types/lodash": ^4 + "@types/mozilla-readability": ^0.2.1 + "@types/pdf-parse": ^1.1.1 + "@types/pg": ^8 + "@types/pg-copy-streams": ^1.2.2 + "@types/uuid": ^9 + "@types/ws": ^8 + "@typescript-eslint/eslint-plugin": ^5.58.0 + "@typescript-eslint/parser": ^5.58.0 + "@upstash/redis": ^1.20.6 + "@vercel/kv": ^0.2.3 + "@vercel/postgres": ^0.5.0 + "@writerai/writer-sdk": ^0.40.2 + "@xata.io/client": ^0.28.0 + "@xenova/transformers": ^2.5.4 + "@zilliz/milvus2-sdk-node": ">=2.2.11" + apify-client: ^2.7.1 + assemblyai: ^2.0.2 + axios: ^0.26.0 + cassandra-driver: ^4.7.2 + cheerio: ^1.0.0-rc.12 + chromadb: ^1.5.3 + closevector-common: 0.1.0-alpha.1 + closevector-node: 0.1.0-alpha.10 + closevector-web: 0.1.0-alpha.15 + cohere-ai: ">=6.0.0" + convex: ^1.3.1 + d3-dsv: ^2.0.0 + dotenv: ^16.0.3 + dpdm: ^3.12.0 + epub2: ^3.0.1 + eslint: ^8.33.0 + eslint-config-airbnb-base: ^15.0.0 + eslint-config-prettier: ^8.6.0 + eslint-plugin-import: ^2.27.5 + eslint-plugin-jest: ^27.6.0 + eslint-plugin-no-instanceof: ^1.0.1 + eslint-plugin-prettier: ^4.2.1 + faiss-node: ^0.5.1 + fast-xml-parser: ^4.2.7 + firebase-admin: ^11.9.0 + google-auth-library: ^8.9.0 + googleapis: ^126.0.1 + graphql: ^16.6.0 + hnswlib-node: ^1.4.2 + html-to-text: ^9.0.5 + ignore: ^5.2.0 + ioredis: ^5.3.2 + jest: ^29.5.0 + jest-environment-node: ^29.6.4 + jsdom: ^22.1.0 + llmonitor: ^0.5.9 + lodash: ^4.17.21 + mammoth: ^1.5.1 + ml-matrix: ^6.10.4 + mongodb: ^5.2.0 + mysql2: ^3.3.3 + neo4j-driver: ^5.12.0 + node-llama-cpp: 2.7.3 + notion-to-md: ^3.1.0 + officeparser: ^4.0.4 + pdf-parse: 1.1.1 + peggy: ^3.0.2 + pg: ^8.11.0 + pg-copy-streams: ^6.0.5 + pickleparser: ^0.2.1 + playwright: ^1.32.1 + portkey-ai: ^0.1.11 + prettier: ^2.8.3 + puppeteer: ^19.7.2 + pyodide: ^0.24.1 + redis: ^4.6.6 + release-it: ^15.10.1 + replicate: ^0.18.0 + rimraf: ^5.0.1 + rollup: ^3.19.1 + sonix-speech-recognition: ^2.1.1 + sqlite3: ^5.1.4 + srt-parser-2: ^1.2.2 + ts-jest: ^29.1.0 + typeorm: ^0.3.12 + typescript: ~5.1.6 + typesense: ^1.5.3 + usearch: ^1.1.1 + vectordb: ^0.1.4 + voy-search: 0.6.2 + weaviate-ts-client: ^1.4.0 + web-auth-library: ^1.0.3 + youtube-transcript: ^1.0.6 + youtubei.js: ^5.8.0 + peerDependencies: + "@aws-crypto/sha256-js": ^5.0.0 + "@aws-sdk/client-bedrock-runtime": ^3.422.0 + "@aws-sdk/client-dynamodb": ^3.310.0 + "@aws-sdk/client-kendra": ^3.352.0 + "@aws-sdk/client-lambda": ^3.310.0 + "@aws-sdk/client-s3": ^3.310.0 + "@aws-sdk/client-sagemaker-runtime": ^3.310.0 + "@aws-sdk/client-sfn": ^3.310.0 + "@aws-sdk/credential-provider-node": ^3.388.0 + "@azure/storage-blob": ^12.15.0 + "@clickhouse/client": ^0.2.5 + "@cloudflare/ai": ^1.0.12 + "@elastic/elasticsearch": ^8.4.0 + "@getmetal/metal-sdk": "*" + "@getzep/zep-js": ^0.9.0 + "@gomomento/sdk": ^1.51.1 + "@gomomento/sdk-core": ^1.51.1 + "@gomomento/sdk-web": ^1.51.1 + "@google-ai/generativelanguage": ^0.2.1 + "@google-cloud/storage": ^6.10.1 + "@gradientai/nodejs-sdk": ^1.2.0 + "@huggingface/inference": ^2.6.4 + "@mozilla/readability": "*" + "@notionhq/client": ^2.2.10 + "@opensearch-project/opensearch": "*" + "@pinecone-database/pinecone": ^1.1.0 + "@planetscale/database": ^1.8.0 + "@qdrant/js-client-rest": ^1.2.0 + "@raycast/api": ^1.55.2 + "@rockset/client": ^0.9.1 + "@smithy/eventstream-codec": ^2.0.5 + "@smithy/protocol-http": ^3.0.6 + "@smithy/signature-v4": ^2.0.10 + "@smithy/util-utf8": ^2.0.0 + "@supabase/postgrest-js": ^1.1.1 + "@supabase/supabase-js": ^2.10.0 + "@tensorflow-models/universal-sentence-encoder": "*" + "@tensorflow/tfjs-converter": "*" + "@tensorflow/tfjs-core": "*" + "@upstash/redis": ^1.20.6 + "@vercel/kv": ^0.2.3 + "@vercel/postgres": ^0.5.0 + "@writerai/writer-sdk": ^0.40.2 + "@xata.io/client": ^0.28.0 + "@xenova/transformers": ^2.5.4 + "@zilliz/milvus2-sdk-node": ">=2.2.7" + apify-client: ^2.7.1 + assemblyai: ^2.0.2 + axios: "*" + cassandra-driver: ^4.7.2 + cheerio: ^1.0.0-rc.12 + chromadb: "*" + closevector-common: 0.1.0-alpha.1 + closevector-node: 0.1.0-alpha.10 + closevector-web: 0.1.0-alpha.16 + cohere-ai: ">=6.0.0" + convex: ^1.3.1 + d3-dsv: ^2.0.0 + epub2: ^3.0.1 + faiss-node: ^0.5.1 + fast-xml-parser: ^4.2.7 + firebase-admin: ^11.9.0 + google-auth-library: ^8.9.0 + googleapis: ^126.0.1 + hnswlib-node: ^1.4.2 + html-to-text: ^9.0.5 + ignore: ^5.2.0 + ioredis: ^5.3.2 + jsdom: "*" + llmonitor: ^0.5.9 + lodash: ^4.17.21 + mammoth: "*" + mongodb: ^5.2.0 + mysql2: ^3.3.3 + neo4j-driver: "*" + node-llama-cpp: "*" + notion-to-md: ^3.1.0 + officeparser: ^4.0.4 + pdf-parse: 1.1.1 + peggy: ^3.0.2 + pg: ^8.11.0 + pg-copy-streams: ^6.0.5 + pickleparser: ^0.2.1 + playwright: ^1.32.1 + portkey-ai: ^0.1.11 + puppeteer: ^19.7.2 + pyodide: ^0.24.1 + redis: ^4.6.4 + replicate: ^0.18.0 + sonix-speech-recognition: ^2.1.1 + srt-parser-2: ^1.2.2 + typeorm: ^0.3.12 + typesense: ^1.5.3 + usearch: ^1.1.1 + vectordb: ^0.1.4 + voy-search: 0.6.2 + weaviate-ts-client: ^1.4.0 + web-auth-library: ^1.0.3 + ws: ^8.14.2 + youtube-transcript: ^1.0.6 + youtubei.js: ^5.8.0 + peerDependenciesMeta: + "@aws-crypto/sha256-js": + optional: true + "@aws-sdk/client-bedrock-runtime": + optional: true + "@aws-sdk/client-dynamodb": + optional: true + "@aws-sdk/client-kendra": + optional: true + "@aws-sdk/client-lambda": + optional: true + "@aws-sdk/client-s3": + optional: true + "@aws-sdk/client-sagemaker-runtime": + optional: true + "@aws-sdk/client-sfn": + optional: true + "@aws-sdk/credential-provider-node": + optional: true + "@azure/storage-blob": + optional: true + "@clickhouse/client": + optional: true + "@cloudflare/ai": + optional: true + "@elastic/elasticsearch": + optional: true + "@getmetal/metal-sdk": + optional: true + "@getzep/zep-js": + optional: true + "@gomomento/sdk": + optional: true + "@gomomento/sdk-core": + optional: true + "@gomomento/sdk-web": + optional: true + "@google-ai/generativelanguage": + optional: true + "@google-cloud/storage": + optional: true + "@gradientai/nodejs-sdk": + optional: true + "@huggingface/inference": + optional: true + "@mozilla/readability": + optional: true + "@notionhq/client": + optional: true + "@opensearch-project/opensearch": + optional: true + "@pinecone-database/pinecone": + optional: true + "@planetscale/database": + optional: true + "@qdrant/js-client-rest": + optional: true + "@raycast/api": + optional: true + "@rockset/client": + optional: true + "@smithy/eventstream-codec": + optional: true + "@smithy/protocol-http": + optional: true + "@smithy/signature-v4": + optional: true + "@smithy/util-utf8": + optional: true + "@supabase/postgrest-js": + optional: true + "@supabase/supabase-js": + optional: true + "@tensorflow-models/universal-sentence-encoder": + optional: true + "@tensorflow/tfjs-converter": + optional: true + "@tensorflow/tfjs-core": + optional: true + "@upstash/redis": + optional: true + "@vercel/kv": + optional: true + "@vercel/postgres": + optional: true + "@writerai/writer-sdk": + optional: true + "@xata.io/client": + optional: true + "@xenova/transformers": + optional: true + "@zilliz/milvus2-sdk-node": + optional: true + apify-client: + optional: true + assemblyai: + optional: true + axios: + optional: true + cassandra-driver: + optional: true + cheerio: + optional: true + chromadb: + optional: true + closevector-common: + optional: true + closevector-node: + optional: true + closevector-web: + optional: true + cohere-ai: + optional: true + convex: + optional: true + d3-dsv: + optional: true + epub2: + optional: true + faiss-node: + optional: true + fast-xml-parser: + optional: true + firebase-admin: + optional: true + google-auth-library: + optional: true + googleapis: + optional: true + hnswlib-node: + optional: true + html-to-text: + optional: true + ignore: + optional: true + ioredis: + optional: true + jsdom: + optional: true + llmonitor: + optional: true + lodash: + optional: true + mammoth: + optional: true + mongodb: + optional: true + mysql2: + optional: true + neo4j-driver: + optional: true + node-llama-cpp: + optional: true + notion-to-md: + optional: true + officeparser: + optional: true + pdf-parse: + optional: true + peggy: + optional: true + pg: + optional: true + pg-copy-streams: + optional: true + pickleparser: + optional: true + playwright: + optional: true + portkey-ai: + optional: true + puppeteer: + optional: true + pyodide: + optional: true + redis: + optional: true + replicate: + optional: true + sonix-speech-recognition: + optional: true + srt-parser-2: + optional: true + typeorm: + optional: true + typesense: + optional: true + usearch: + optional: true + vectordb: + optional: true + voy-search: + optional: true + weaviate-ts-client: + optional: true + web-auth-library: + optional: true + ws: + optional: true + youtube-transcript: + optional: true + youtubei.js: + optional: true + languageName: unknown + linkType: soft + "@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.0.9": version: 0.0.0-use.local resolution: "@langchain/core@workspace:langchain-core" @@ -8031,7 +8487,7 @@ __metadata: languageName: unknown linkType: soft -"@langchain/openai@workspace:libs/langchain-openai": +"@langchain/openai@workspace:^, @langchain/openai@workspace:libs/langchain-openai": version: 0.0.0-use.local resolution: "@langchain/openai@workspace:libs/langchain-openai" dependencies: @@ -22649,6 +23105,7 @@ __metadata: "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 "@jest/globals": ^29.5.0 + "@langchain/community": "workspace:*" "@langchain/core": ~0.0.9 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 From 59f927761076c20ad1595744f76422dd8e5012e4 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 6 Dec 2023 16:30:28 -0800 Subject: [PATCH 02/22] Move LLMs --- libs/langchain-community/src/llms/ai21.ts | 203 ++++++++++ .../src/llms/aleph_alpha.ts | 302 +++++++++++++++ .../src/llms/bedrock/index.ts | 17 + .../src/llms/bedrock/web.ts | 359 ++++++++++++++++++ .../src/llms/cloudflare_workersai.ts | 192 ++++++++++ libs/langchain-community/src/llms/cohere.ts | 131 +++++++ .../langchain-community/src/llms/fireworks.ts | 138 +++++++ .../src/llms/googlepalm.ts | 207 ++++++++++ .../src/llms/googlevertexai/common.ts | 234 ++++++++++++ .../src/llms/googlevertexai/index.ts | 66 ++++ .../src/llms/googlevertexai/web.ts | 66 ++++ .../src/llms/gradient_ai.ts | 140 +++++++ libs/langchain-community/src/llms/hf.ts | 159 ++++++++ .../langchain-community/src/llms/llama_cpp.ts | 121 ++++++ libs/langchain-community/src/llms/ollama.ts | 248 ++++++++++++ libs/langchain-community/src/llms/portkey.ts | 181 +++++++++ libs/langchain-community/src/llms/raycast.ts | 103 +++++ .../langchain-community/src/llms/replicate.ts | 160 ++++++++ .../src/llms/sagemaker_endpoint.ts | 287 ++++++++++++++ .../src/llms/tests/ai21.int.test.ts | 51 +++ .../src/llms/tests/aleph_alpha.int.test.ts | 54 +++ .../src/llms/tests/bedrock.int.test.ts | 182 +++++++++ .../tests/cloudflare_workersai.int.test.ts | 50 +++ .../src/llms/tests/cohere.int.test.ts | 8 + .../src/llms/tests/fake.test.ts | 98 +++++ .../src/llms/tests/fireworks.int.test.ts | 20 + .../src/llms/tests/googlepalm.int.test.ts | 32 ++ .../src/llms/tests/googlepalm.test.ts | 75 ++++ .../src/llms/tests/googlevertexai.int.test.ts | 78 ++++ .../llms/tests/googlevertexai_web.int.test.ts | 77 ++++ .../llms/tests/huggingface_hub.int.test.ts | 8 + .../src/llms/tests/llama_cpp.int.test.ts | 47 +++ .../src/llms/tests/ollama.int.test.ts | 113 ++++++ .../src/llms/tests/openai-chat.int.test.ts | 142 +++++++ .../src/llms/tests/openai.int.test.ts | 310 +++++++++++++++ .../src/llms/tests/prompt_layer.int.test.ts | 95 +++++ .../src/llms/tests/replicate.int.test.ts | 57 +++ .../llms/tests/sagemaker_endpoint.int.test.ts | 133 +++++++ .../src/llms/tests/writer.int.test.ts | 8 + .../src/llms/watsonx_ai.ts | 198 ++++++++++ libs/langchain-community/src/llms/writer.ts | 174 +++++++++ libs/langchain-community/src/llms/yandex.ts | 127 +++++++ 42 files changed, 5451 insertions(+) create mode 100644 libs/langchain-community/src/llms/ai21.ts create mode 100644 libs/langchain-community/src/llms/aleph_alpha.ts create mode 100644 libs/langchain-community/src/llms/bedrock/index.ts create mode 100644 libs/langchain-community/src/llms/bedrock/web.ts create mode 100644 libs/langchain-community/src/llms/cloudflare_workersai.ts create mode 100644 libs/langchain-community/src/llms/cohere.ts create mode 100644 libs/langchain-community/src/llms/fireworks.ts create mode 100644 libs/langchain-community/src/llms/googlepalm.ts create mode 100644 libs/langchain-community/src/llms/googlevertexai/common.ts create mode 100644 libs/langchain-community/src/llms/googlevertexai/index.ts create mode 100644 libs/langchain-community/src/llms/googlevertexai/web.ts create mode 100644 libs/langchain-community/src/llms/gradient_ai.ts create mode 100644 libs/langchain-community/src/llms/hf.ts create mode 100644 libs/langchain-community/src/llms/llama_cpp.ts create mode 100644 libs/langchain-community/src/llms/ollama.ts create mode 100644 libs/langchain-community/src/llms/portkey.ts create mode 100644 libs/langchain-community/src/llms/raycast.ts create mode 100644 libs/langchain-community/src/llms/replicate.ts create mode 100644 libs/langchain-community/src/llms/sagemaker_endpoint.ts create mode 100644 libs/langchain-community/src/llms/tests/ai21.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/bedrock.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/cohere.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/fake.test.ts create mode 100644 libs/langchain-community/src/llms/tests/fireworks.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/googlepalm.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/googlepalm.test.ts create mode 100644 libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/ollama.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/openai-chat.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/openai.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/replicate.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts create mode 100644 libs/langchain-community/src/llms/tests/writer.int.test.ts create mode 100644 libs/langchain-community/src/llms/watsonx_ai.ts create mode 100644 libs/langchain-community/src/llms/writer.ts create mode 100644 libs/langchain-community/src/llms/yandex.ts diff --git a/libs/langchain-community/src/llms/ai21.ts b/libs/langchain-community/src/llms/ai21.ts new file mode 100644 index 000000000000..6f14495c0140 --- /dev/null +++ b/libs/langchain-community/src/llms/ai21.ts @@ -0,0 +1,203 @@ +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Type definition for AI21 penalty data. + */ +export type AI21PenaltyData = { + scale: number; + applyToWhitespaces: boolean; + applyToPunctuations: boolean; + applyToNumbers: boolean; + applyToStopwords: boolean; + applyToEmojis: boolean; +}; + +/** + * Interface for AI21 input parameters. + */ +export interface AI21Input extends BaseLLMParams { + ai21ApiKey?: string; + model?: string; + temperature?: number; + minTokens?: number; + maxTokens?: number; + topP?: number; + presencePenalty?: AI21PenaltyData; + countPenalty?: AI21PenaltyData; + frequencyPenalty?: AI21PenaltyData; + numResults?: number; + logitBias?: Record; + stop?: string[]; + baseUrl?: string; +} + +/** + * Class representing the AI21 language model. It extends the LLM (Large + * Language Model) class, providing a standard interface for interacting + * with the AI21 language model. + */ +export class AI21 extends LLM implements AI21Input { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + model = "j2-jumbo-instruct"; + + temperature = 0.7; + + maxTokens = 1024; + + minTokens = 0; + + topP = 1; + + presencePenalty = AI21.getDefaultAI21PenaltyData(); + + countPenalty = AI21.getDefaultAI21PenaltyData(); + + frequencyPenalty = AI21.getDefaultAI21PenaltyData(); + + numResults = 1; + + logitBias?: Record; + + ai21ApiKey?: string; + + stop?: string[]; + + baseUrl?: string; + + constructor(fields?: AI21Input) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.temperature = fields?.temperature ?? this.temperature; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.minTokens = fields?.minTokens ?? this.minTokens; + this.topP = fields?.topP ?? this.topP; + this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; + this.countPenalty = fields?.countPenalty ?? this.countPenalty; + this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; + this.numResults = fields?.numResults ?? this.numResults; + this.logitBias = fields?.logitBias; + this.ai21ApiKey = + fields?.ai21ApiKey ?? getEnvironmentVariable("AI21_API_KEY"); + this.stop = fields?.stop; + this.baseUrl = fields?.baseUrl; + } + + /** + * Method to validate the environment. It checks if the AI21 API key is + * set. If not, it throws an error. + */ + validateEnvironment() { + if (!this.ai21ApiKey) { + throw new Error( + `No AI21 API key found. Please set it as "AI21_API_KEY" in your environment variables.` + ); + } + } + + /** + * Static method to get the default penalty data for AI21. + * @returns AI21PenaltyData + */ + static getDefaultAI21PenaltyData(): AI21PenaltyData { + return { + scale: 0, + applyToWhitespaces: true, + applyToPunctuations: true, + applyToNumbers: true, + applyToStopwords: true, + applyToEmojis: true, + }; + } + + /** Get the type of LLM. */ + _llmType() { + return "ai21"; + } + + /** Get the default parameters for calling AI21 API. */ + get defaultParams() { + return { + temperature: this.temperature, + maxTokens: this.maxTokens, + minTokens: this.minTokens, + topP: this.topP, + presencePenalty: this.presencePenalty, + countPenalty: this.countPenalty, + frequencyPenalty: this.frequencyPenalty, + numResults: this.numResults, + logitBias: this.logitBias, + }; + } + + /** Get the identifying parameters for this LLM. */ + get identifyingParams() { + return { ...this.defaultParams, model: this.model }; + } + + /** Call out to AI21's complete endpoint. + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + let response = ai21._call("Tell me a joke."); + */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + let stop = options?.stop; + this.validateEnvironment(); + if (this.stop && stop && this.stop.length > 0 && stop.length > 0) { + throw new Error("`stop` found in both the input and default params."); + } + stop = this.stop ?? stop ?? []; + + const baseUrl = + this.baseUrl ?? this.model === "j1-grande-instruct" + ? "https://api.ai21.com/studio/v1/experimental" + : "https://api.ai21.com/studio/v1"; + + const url = `${baseUrl}/${this.model}/complete`; + const headers = { + Authorization: `Bearer ${this.ai21ApiKey}`, + "Content-Type": "application/json", + }; + const data = { prompt, stopSequences: stop, ...this.defaultParams }; + const responseData = await this.caller.callWithOptions({}, async () => { + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(data), + signal: options.signal, + }); + if (!response.ok) { + const error = new Error( + `AI21 call failed with status code ${response.status}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + return response.json(); + }); + + if ( + !responseData.completions || + responseData.completions.length === 0 || + !responseData.completions[0].data + ) { + throw new Error("No completions found in response"); + } + + return responseData.completions[0].data.text ?? ""; + } +} diff --git a/libs/langchain-community/src/llms/aleph_alpha.ts b/libs/langchain-community/src/llms/aleph_alpha.ts new file mode 100644 index 000000000000..c3731ecd1dd8 --- /dev/null +++ b/libs/langchain-community/src/llms/aleph_alpha.ts @@ -0,0 +1,302 @@ +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface for the input parameters specific to the Aleph Alpha LLM. + */ +export interface AlephAlphaInput extends BaseLLMParams { + model: string; + maximum_tokens: number; + minimum_tokens?: number; + echo?: boolean; + temperature?: number; + top_k?: number; + top_p?: number; + presence_penalty?: number; + frequency_penalty?: number; + sequence_penalty?: number; + sequence_penalty_min_length?: number; + repetition_penalties_include_prompt?: boolean; + repetition_penalties_include_completion?: boolean; + use_multiplicative_presence_penalty?: boolean; + use_multiplicative_frequency_penalty?: boolean; + use_multiplicative_sequence_penalty?: boolean; + penalty_bias?: string; + penalty_exceptions?: string[]; + penalty_exceptions_include_stop_sequences?: boolean; + best_of?: number; + n?: number; + logit_bias?: object; + log_probs?: number; + tokens?: boolean; + raw_completion: boolean; + disable_optimizations?: boolean; + completion_bias_inclusion?: string[]; + completion_bias_inclusion_first_token_only: boolean; + completion_bias_exclusion?: string[]; + completion_bias_exclusion_first_token_only: boolean; + contextual_control_threshold?: number; + control_log_additive: boolean; + stop?: string[]; + aleph_alpha_api_key?: string; + base_url: string; +} + +/** + * Specific implementation of a Large Language Model (LLM) designed to + * interact with the Aleph Alpha API. It extends the base LLM class and + * includes a variety of parameters for customizing the behavior of the + * Aleph Alpha model. + */ +export class AlephAlpha extends LLM implements AlephAlphaInput { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + model = "luminous-base"; + + maximum_tokens = 64; + + minimum_tokens = 0; + + echo: boolean; + + temperature = 0.0; + + top_k: number; + + top_p = 0.0; + + presence_penalty?: number; + + frequency_penalty?: number; + + sequence_penalty?: number; + + sequence_penalty_min_length?: number; + + repetition_penalties_include_prompt?: boolean; + + repetition_penalties_include_completion?: boolean; + + use_multiplicative_presence_penalty?: boolean; + + use_multiplicative_frequency_penalty?: boolean; + + use_multiplicative_sequence_penalty?: boolean; + + penalty_bias?: string; + + penalty_exceptions?: string[]; + + penalty_exceptions_include_stop_sequences?: boolean; + + best_of?: number; + + n?: number; + + logit_bias?: object; + + log_probs?: number; + + tokens?: boolean; + + raw_completion: boolean; + + disable_optimizations?: boolean; + + completion_bias_inclusion?: string[]; + + completion_bias_inclusion_first_token_only: boolean; + + completion_bias_exclusion?: string[]; + + completion_bias_exclusion_first_token_only: boolean; + + contextual_control_threshold?: number; + + control_log_additive: boolean; + + aleph_alpha_api_key? = getEnvironmentVariable("ALEPH_ALPHA_API_KEY"); + + stop?: string[]; + + base_url = "https://api.aleph-alpha.com/complete"; + + constructor(fields: Partial) { + super(fields ?? {}); + this.model = fields?.model ?? this.model; + this.temperature = fields?.temperature ?? this.temperature; + this.maximum_tokens = fields?.maximum_tokens ?? this.maximum_tokens; + this.minimum_tokens = fields?.minimum_tokens ?? this.minimum_tokens; + this.top_k = fields?.top_k ?? this.top_k; + this.top_p = fields?.top_p ?? this.top_p; + this.presence_penalty = fields?.presence_penalty ?? this.presence_penalty; + this.frequency_penalty = + fields?.frequency_penalty ?? this.frequency_penalty; + this.sequence_penalty = fields?.sequence_penalty ?? this.sequence_penalty; + this.sequence_penalty_min_length = + fields?.sequence_penalty_min_length ?? this.sequence_penalty_min_length; + this.repetition_penalties_include_prompt = + fields?.repetition_penalties_include_prompt ?? + this.repetition_penalties_include_prompt; + this.repetition_penalties_include_completion = + fields?.repetition_penalties_include_completion ?? + this.repetition_penalties_include_completion; + this.use_multiplicative_presence_penalty = + fields?.use_multiplicative_presence_penalty ?? + this.use_multiplicative_presence_penalty; + this.use_multiplicative_frequency_penalty = + fields?.use_multiplicative_frequency_penalty ?? + this.use_multiplicative_frequency_penalty; + this.use_multiplicative_sequence_penalty = + fields?.use_multiplicative_sequence_penalty ?? + this.use_multiplicative_sequence_penalty; + this.penalty_bias = fields?.penalty_bias ?? this.penalty_bias; + this.penalty_exceptions = + fields?.penalty_exceptions ?? this.penalty_exceptions; + this.penalty_exceptions_include_stop_sequences = + fields?.penalty_exceptions_include_stop_sequences ?? + this.penalty_exceptions_include_stop_sequences; + this.best_of = fields?.best_of ?? this.best_of; + this.n = fields?.n ?? this.n; + this.logit_bias = fields?.logit_bias ?? this.logit_bias; + this.log_probs = fields?.log_probs ?? this.log_probs; + this.tokens = fields?.tokens ?? this.tokens; + this.raw_completion = fields?.raw_completion ?? this.raw_completion; + this.disable_optimizations = + fields?.disable_optimizations ?? this.disable_optimizations; + this.completion_bias_inclusion = + fields?.completion_bias_inclusion ?? this.completion_bias_inclusion; + this.completion_bias_inclusion_first_token_only = + fields?.completion_bias_inclusion_first_token_only ?? + this.completion_bias_inclusion_first_token_only; + this.completion_bias_exclusion = + fields?.completion_bias_exclusion ?? this.completion_bias_exclusion; + this.completion_bias_exclusion_first_token_only = + fields?.completion_bias_exclusion_first_token_only ?? + this.completion_bias_exclusion_first_token_only; + this.contextual_control_threshold = + fields?.contextual_control_threshold ?? this.contextual_control_threshold; + this.control_log_additive = + fields?.control_log_additive ?? this.control_log_additive; + this.aleph_alpha_api_key = + fields?.aleph_alpha_api_key ?? this.aleph_alpha_api_key; + this.stop = fields?.stop ?? this.stop; + } + + /** + * Validates the environment by ensuring the necessary Aleph Alpha API key + * is available. Throws an error if the API key is missing. + */ + validateEnvironment() { + if (!this.aleph_alpha_api_key) { + throw new Error( + "Aleph Alpha API Key is missing in environment variables." + ); + } + } + + /** Get the default parameters for calling Aleph Alpha API. */ + get defaultParams() { + return { + model: this.model, + temperature: this.temperature, + maximum_tokens: this.maximum_tokens, + minimum_tokens: this.minimum_tokens, + top_k: this.top_k, + top_p: this.top_p, + presence_penalty: this.presence_penalty, + frequency_penalty: this.frequency_penalty, + sequence_penalty: this.sequence_penalty, + sequence_penalty_min_length: this.sequence_penalty_min_length, + repetition_penalties_include_prompt: + this.repetition_penalties_include_prompt, + repetition_penalties_include_completion: + this.repetition_penalties_include_completion, + use_multiplicative_presence_penalty: + this.use_multiplicative_presence_penalty, + use_multiplicative_frequency_penalty: + this.use_multiplicative_frequency_penalty, + use_multiplicative_sequence_penalty: + this.use_multiplicative_sequence_penalty, + penalty_bias: this.penalty_bias, + penalty_exceptions: this.penalty_exceptions, + penalty_exceptions_include_stop_sequences: + this.penalty_exceptions_include_stop_sequences, + best_of: this.best_of, + n: this.n, + logit_bias: this.logit_bias, + log_probs: this.log_probs, + tokens: this.tokens, + raw_completion: this.raw_completion, + disable_optimizations: this.disable_optimizations, + completion_bias_inclusion: this.completion_bias_inclusion, + completion_bias_inclusion_first_token_only: + this.completion_bias_inclusion_first_token_only, + completion_bias_exclusion: this.completion_bias_exclusion, + completion_bias_exclusion_first_token_only: + this.completion_bias_exclusion_first_token_only, + contextual_control_threshold: this.contextual_control_threshold, + control_log_additive: this.control_log_additive, + }; + } + + /** Get the identifying parameters for this LLM. */ + get identifyingParams() { + return { ...this.defaultParams }; + } + + /** Get the type of LLM. */ + _llmType(): string { + return "aleph_alpha"; + } + + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + let stop = options?.stop; + this.validateEnvironment(); + if (this.stop && stop && this.stop.length > 0 && stop.length > 0) { + throw new Error("`stop` found in both the input and default params."); + } + stop = this.stop ?? stop ?? []; + const headers = { + Authorization: `Bearer ${this.aleph_alpha_api_key}`, + "Content-Type": "application/json", + Accept: "application/json", + }; + const data = { prompt, stop_sequences: stop, ...this.defaultParams }; + const responseData = await this.caller.call(async () => { + const response = await fetch(this.base_url, { + method: "POST", + headers, + body: JSON.stringify(data), + signal: options.signal, + }); + if (!response.ok) { + // consume the response body to release the connection + // https://undici.nodejs.org/#/?id=garbage-collection + const text = await response.text(); + const error = new Error( + `Aleph Alpha call failed with status ${response.status} and body ${text}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + return response.json(); + }); + + if ( + !responseData.completions || + responseData.completions.length === 0 || + !responseData.completions[0].completion + ) { + throw new Error("No completions found in response"); + } + + return responseData.completions[0].completion ?? ""; + } +} diff --git a/libs/langchain-community/src/llms/bedrock/index.ts b/libs/langchain-community/src/llms/bedrock/index.ts new file mode 100644 index 000000000000..d87440d0641f --- /dev/null +++ b/libs/langchain-community/src/llms/bedrock/index.ts @@ -0,0 +1,17 @@ +import { defaultProvider } from "@aws-sdk/credential-provider-node"; +import type { BaseLLMParams } from "@langchain/core/language_models/llms"; +import { BaseBedrockInput } from "../../util/bedrock.js"; +import { Bedrock as BaseBedrock } from "./web.js"; + +export class Bedrock extends BaseBedrock { + static lc_name() { + return "Bedrock"; + } + + constructor(fields?: Partial & BaseLLMParams) { + super({ + ...fields, + credentials: fields?.credentials ?? defaultProvider(), + }); + } +} diff --git a/libs/langchain-community/src/llms/bedrock/web.ts b/libs/langchain-community/src/llms/bedrock/web.ts new file mode 100644 index 000000000000..9f9d280e6967 --- /dev/null +++ b/libs/langchain-community/src/llms/bedrock/web.ts @@ -0,0 +1,359 @@ +import { SignatureV4 } from "@smithy/signature-v4"; + +import { HttpRequest } from "@smithy/protocol-http"; +import { EventStreamCodec } from "@smithy/eventstream-codec"; +import { fromUtf8, toUtf8 } from "@smithy/util-utf8"; +import { Sha256 } from "@aws-crypto/sha256-js"; + +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk } from "@langchain/core/outputs"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; + +import { + BaseBedrockInput, + BedrockLLMInputOutputAdapter, + type CredentialType, +} from "../../util/bedrock.js"; +import type { SerializedFields } from "../../load/map_keys.js"; + +/** + * A type of Large Language Model (LLM) that interacts with the Bedrock + * service. It extends the base `LLM` class and implements the + * `BaseBedrockInput` interface. The class is designed to authenticate and + * interact with the Bedrock service, which is a part of Amazon Web + * Services (AWS). It uses AWS credentials for authentication and can be + * configured with various parameters such as the model to use, the AWS + * region, and the maximum number of tokens to generate. + */ +export class Bedrock extends LLM implements BaseBedrockInput { + model = "amazon.titan-tg1-large"; + + region: string; + + credentials: CredentialType; + + temperature?: number | undefined = undefined; + + maxTokens?: number | undefined = undefined; + + fetchFn: typeof fetch; + + endpointHost?: string; + + /** @deprecated */ + stopSequences?: string[]; + + modelKwargs?: Record; + + codec: EventStreamCodec = new EventStreamCodec(toUtf8, fromUtf8); + + streaming = false; + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + get lc_aliases(): Record { + return { + model: "model_id", + region: "region_name", + }; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + "credentials.accessKeyId": "BEDROCK_AWS_ACCESS_KEY_ID", + "credentials.secretAccessKey": "BEDROCK_AWS_SECRET_ACCESS_KEY", + }; + } + + get lc_attributes(): SerializedFields | undefined { + return { region: this.region }; + } + + _llmType() { + return "bedrock"; + } + + static lc_name() { + return "Bedrock"; + } + + constructor(fields?: Partial & BaseLLMParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + const allowedModels = ["ai21", "anthropic", "amazon", "cohere", "meta"]; + if (!allowedModels.includes(this.model.split(".")[0])) { + throw new Error( + `Unknown model: '${this.model}', only these are supported: ${allowedModels}` + ); + } + const region = + fields?.region ?? getEnvironmentVariable("AWS_DEFAULT_REGION"); + if (!region) { + throw new Error( + "Please set the AWS_DEFAULT_REGION environment variable or pass it to the constructor as the region field." + ); + } + this.region = region; + + const credentials = fields?.credentials; + if (!credentials) { + throw new Error( + "Please set the AWS credentials in the 'credentials' field." + ); + } + this.credentials = credentials; + + this.temperature = fields?.temperature ?? this.temperature; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.fetchFn = fields?.fetchFn ?? fetch.bind(globalThis); + this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl; + this.stopSequences = fields?.stopSequences; + this.modelKwargs = fields?.modelKwargs; + this.streaming = fields?.streaming ?? this.streaming; + } + + /** Call out to Bedrock service model. + Arguments: + prompt: The prompt to pass into the model. + + Returns: + The string generated by the model. + + Example: + response = model.call("Tell me a joke.") + */ + async _call( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const service = "bedrock-runtime"; + const endpointHost = + this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; + const provider = this.model.split(".")[0]; + if (this.streaming) { + const stream = this._streamResponseChunks(prompt, options, runManager); + let finalResult: GenerationChunk | undefined; + for await (const chunk of stream) { + if (finalResult === undefined) { + finalResult = chunk; + } else { + finalResult = finalResult.concat(chunk); + } + } + return finalResult?.text ?? ""; + } + const response = await this._signedFetch(prompt, options, { + bedrockMethod: "invoke", + endpointHost, + provider, + }); + const json = await response.json(); + if (!response.ok) { + throw new Error( + `Error ${response.status}: ${json.message ?? JSON.stringify(json)}` + ); + } + const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); + return text; + } + + async _signedFetch( + prompt: string, + options: this["ParsedCallOptions"], + fields: { + bedrockMethod: "invoke" | "invoke-with-response-stream"; + endpointHost: string; + provider: string; + } + ) { + const { bedrockMethod, endpointHost, provider } = fields; + const inputBody = BedrockLLMInputOutputAdapter.prepareInput( + provider, + prompt, + this.maxTokens, + this.temperature, + options.stop ?? this.stopSequences, + this.modelKwargs, + fields.bedrockMethod + ); + + const url = new URL( + `https://${endpointHost}/model/${this.model}/${bedrockMethod}` + ); + + const request = new HttpRequest({ + hostname: url.hostname, + path: url.pathname, + protocol: url.protocol, + method: "POST", // method must be uppercase + body: JSON.stringify(inputBody), + query: Object.fromEntries(url.searchParams.entries()), + headers: { + // host is required by AWS Signature V4: https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + host: url.host, + accept: "application/json", + "content-type": "application/json", + }, + }); + + const signer = new SignatureV4({ + credentials: this.credentials, + service: "bedrock", + region: this.region, + sha256: Sha256, + }); + + const signedRequest = await signer.sign(request); + + // Send request to AWS using the low-level fetch API + const response = await this.caller.callWithOptions( + { signal: options.signal }, + async () => + this.fetchFn(url, { + headers: signedRequest.headers, + body: signedRequest.body, + method: signedRequest.method, + }) + ); + return response; + } + + invocationParams(options?: this["ParsedCallOptions"]) { + return { + model: this.model, + region: this.region, + temperature: this.temperature, + maxTokens: this.maxTokens, + stop: options?.stop ?? this.stopSequences, + modelKwargs: this.modelKwargs, + }; + } + + async *_streamResponseChunks( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const provider = this.model.split(".")[0]; + const bedrockMethod = + provider === "anthropic" || provider === "cohere" || provider === "meta" + ? "invoke-with-response-stream" + : "invoke"; + + const service = "bedrock-runtime"; + const endpointHost = + this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; + + // Send request to AWS using the low-level fetch API + const response = await this._signedFetch(prompt, options, { + bedrockMethod, + endpointHost, + provider, + }); + + if (response.status < 200 || response.status >= 300) { + throw Error( + `Failed to access underlying url '${endpointHost}': got ${ + response.status + } ${response.statusText}: ${await response.text()}` + ); + } + + if ( + provider === "anthropic" || + provider === "cohere" || + provider === "meta" + ) { + const reader = response.body?.getReader(); + const decoder = new TextDecoder(); + for await (const chunk of this._readChunks(reader)) { + const event = this.codec.decode(chunk); + if ( + (event.headers[":event-type"] !== undefined && + event.headers[":event-type"].value !== "chunk") || + event.headers[":content-type"].value !== "application/json" + ) { + throw Error(`Failed to get event chunk: got ${chunk}`); + } + const body = JSON.parse(decoder.decode(event.body)); + if (body.message) { + throw new Error(body.message); + } + if (body.bytes !== undefined) { + const chunkResult = JSON.parse( + decoder.decode( + Uint8Array.from(atob(body.bytes), (m) => m.codePointAt(0) ?? 0) + ) + ); + const text = BedrockLLMInputOutputAdapter.prepareOutput( + provider, + chunkResult + ); + yield new GenerationChunk({ + text, + generationInfo: {}, + }); + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(text); + } + } + } else { + const json = await response.json(); + const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); + yield new GenerationChunk({ + text, + generationInfo: {}, + }); + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(text); + } + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + _readChunks(reader: any) { + function _concatChunks(a: Uint8Array, b: Uint8Array) { + const newBuffer = new Uint8Array(a.length + b.length); + newBuffer.set(a); + newBuffer.set(b, a.length); + return newBuffer; + } + + function getMessageLength(buffer: Uint8Array) { + if (buffer.byteLength === 0) return 0; + const view = new DataView( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength + ); + + return view.getUint32(0, false); + } + + return { + async *[Symbol.asyncIterator]() { + let readResult = await reader.read(); + + let buffer: Uint8Array = new Uint8Array(0); + while (!readResult.done) { + const chunk: Uint8Array = readResult.value; + + buffer = _concatChunks(buffer, chunk); + let messageLength = getMessageLength(buffer); + + while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) { + yield buffer.slice(0, messageLength); + buffer = buffer.slice(messageLength); + messageLength = getMessageLength(buffer); + } + + readResult = await reader.read(); + } + }, + }; + } +} diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts new file mode 100644 index 000000000000..bba69550aa2b --- /dev/null +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -0,0 +1,192 @@ +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk } from "@langchain/core/outputs"; + +import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; + +/** + * Interface for CloudflareWorkersAI input parameters. + */ +export interface CloudflareWorkersAIInput { + cloudflareAccountId?: string; + cloudflareApiToken?: string; + model?: string; + baseUrl?: string; + streaming?: boolean; +} + +/** + * Class representing the CloudflareWorkersAI language model. It extends the LLM (Large + * Language Model) class, providing a standard interface for interacting + * with the CloudflareWorkersAI language model. + */ +export class CloudflareWorkersAI + extends LLM + implements CloudflareWorkersAIInput +{ + model = "@cf/meta/llama-2-7b-chat-int8"; + + cloudflareAccountId?: string; + + cloudflareApiToken?: string; + + baseUrl: string; + + streaming = false; + + static lc_name() { + return "CloudflareWorkersAI"; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + constructor(fields?: CloudflareWorkersAIInput & BaseLLMParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.streaming = fields?.streaming ?? this.streaming; + this.cloudflareAccountId = + fields?.cloudflareAccountId ?? + getEnvironmentVariable("CLOUDFLARE_ACCOUNT_ID"); + this.cloudflareApiToken = + fields?.cloudflareApiToken ?? + getEnvironmentVariable("CLOUDFLARE_API_TOKEN"); + this.baseUrl = + fields?.baseUrl ?? + `https://api.cloudflare.com/client/v4/accounts/${this.cloudflareAccountId}/ai/run`; + if (this.baseUrl.endsWith("/")) { + this.baseUrl = this.baseUrl.slice(0, -1); + } + } + + /** + * Method to validate the environment. + */ + validateEnvironment() { + if (this.baseUrl === undefined) { + if (!this.cloudflareAccountId) { + throw new Error( + `No Cloudflare account ID found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_ACCOUNT_ID" in your environment variables.` + ); + } + if (!this.cloudflareApiToken) { + throw new Error( + `No Cloudflare API key found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_API_KEY" in your environment variables.` + ); + } + } + } + + /** Get the identifying parameters for this LLM. */ + get identifyingParams() { + return { model: this.model }; + } + + /** + * Get the parameters used to invoke the model + */ + invocationParams() { + return { + model: this.model, + }; + } + + /** Get the type of LLM. */ + _llmType() { + return "cloudflare"; + } + + async _request( + prompt: string, + options: this["ParsedCallOptions"], + stream?: boolean + ) { + this.validateEnvironment(); + + const url = `${this.baseUrl}/${this.model}`; + const headers = { + Authorization: `Bearer ${this.cloudflareApiToken}`, + "Content-Type": "application/json", + }; + + const data = { prompt, stream }; + return this.caller.call(async () => { + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(data), + signal: options.signal, + }); + if (!response.ok) { + const error = new Error( + `Cloudflare LLM call failed with status code ${response.status}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).response = response; + throw error; + } + return response; + }); + } + + async *_streamResponseChunks( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const response = await this._request(prompt, options, true); + if (!response.body) { + throw new Error("Empty response from Cloudflare. Please try again."); + } + const stream = convertEventStreamToIterableReadableDataStream( + response.body + ); + for await (const chunk of stream) { + if (chunk !== "[DONE]") { + const parsedChunk = JSON.parse(chunk); + const generationChunk = new GenerationChunk({ + text: parsedChunk.response, + }); + yield generationChunk; + // eslint-disable-next-line no-void + void runManager?.handleLLMNewToken(generationChunk.text ?? ""); + } + } + } + + /** Call out to CloudflareWorkersAI's complete endpoint. + Args: + prompt: The prompt to pass into the model. + Returns: + The string generated by the model. + Example: + let response = CloudflareWorkersAI.call("Tell me a joke."); + */ + async _call( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + if (!this.streaming) { + const response = await this._request(prompt, options); + + const responseData = await response.json(); + + return responseData.result.response; + } else { + const stream = this._streamResponseChunks(prompt, options, runManager); + let finalResult: GenerationChunk | undefined; + for await (const chunk of stream) { + if (finalResult === undefined) { + finalResult = chunk; + } else { + finalResult = finalResult.concat(chunk); + } + } + return finalResult?.text ?? ""; + } + } +} diff --git a/libs/langchain-community/src/llms/cohere.ts b/libs/langchain-community/src/llms/cohere.ts new file mode 100644 index 000000000000..234b98fb8663 --- /dev/null +++ b/libs/langchain-community/src/llms/cohere.ts @@ -0,0 +1,131 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; + +/** + * Interface for the input parameters specific to the Cohere model. + */ +export interface CohereInput extends BaseLLMParams { + /** Sampling temperature to use */ + temperature?: number; + + /** + * Maximum number of tokens to generate in the completion. + */ + maxTokens?: number; + + /** Model to use */ + model?: string; + + apiKey?: string; +} + +/** + * Class representing a Cohere Large Language Model (LLM). It interacts + * with the Cohere API to generate text completions. + * @example + * ```typescript + * const model = new Cohere({ + * temperature: 0.7, + * maxTokens: 20, + * maxRetries: 5, + * }); + * + * const res = await model.call( + * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" + * ); + * console.log({ res }); + * ``` + */ +export class Cohere extends LLM implements CohereInput { + static lc_name() { + return "Cohere"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "COHERE_API_KEY", + }; + } + + get lc_aliases(): { [key: string]: string } | undefined { + return { + apiKey: "cohere_api_key", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + temperature = 0; + + maxTokens = 250; + + model: string; + + apiKey: string; + + constructor(fields?: CohereInput) { + super(fields ?? {}); + + const apiKey = fields?.apiKey ?? getEnvironmentVariable("COHERE_API_KEY"); + + if (!apiKey) { + throw new Error( + "Please set the COHERE_API_KEY environment variable or pass it to the constructor as the apiKey field." + ); + } + + this.apiKey = apiKey; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.temperature = fields?.temperature ?? this.temperature; + this.model = fields?.model ?? this.model; + } + + _llmType() { + return "cohere"; + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const { cohere } = await Cohere.imports(); + + cohere.init(this.apiKey); + + // Hit the `generate` endpoint on the `large` model + const generateResponse = await this.caller.callWithOptions( + { signal: options.signal }, + cohere.generate.bind(cohere), + { + prompt, + model: this.model, + max_tokens: this.maxTokens, + temperature: this.temperature, + end_sequences: options.stop, + } + ); + try { + return generateResponse.body.generations[0].text; + } catch { + console.log(generateResponse); + throw new Error("Could not parse response."); + } + } + + /** @ignore */ + static async imports(): Promise<{ + cohere: typeof import("cohere-ai"); + }> { + try { + const { default: cohere } = await import("cohere-ai"); + return { cohere }; + } catch (e) { + throw new Error( + "Please install cohere-ai as a dependency with, e.g. `yarn add cohere-ai`" + ); + } + } +} diff --git a/libs/langchain-community/src/llms/fireworks.ts b/libs/langchain-community/src/llms/fireworks.ts new file mode 100644 index 000000000000..e2d82bb2bd41 --- /dev/null +++ b/libs/langchain-community/src/llms/fireworks.ts @@ -0,0 +1,138 @@ +import { type OpenAIClient, type OpenAICallOptions, type OpenAIInput, type OpenAICoreRequestOptions, OpenAI } from "@langchain/openai"; +import type { BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +type FireworksUnsupportedArgs = + | "frequencyPenalty" + | "presencePenalty" + | "bestOf" + | "logitBias"; + +type FireworksUnsupportedCallOptions = "functions" | "function_call" | "tools"; + +export type FireworksCallOptions = Partial< + Omit +>; + +/** + * Wrapper around Fireworks API for large language models + * + * Fireworks API is compatible to the OpenAI API with some limitations described in + * https://readme.fireworks.ai/docs/openai-compatibility. + * + * To use, you should have the `openai` package installed and + * the `FIREWORKS_API_KEY` environment variable set. + */ +export class Fireworks extends OpenAI { + static lc_name() { + return "Fireworks"; + } + + _llmType() { + return "fireworks"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + fireworksApiKey: "FIREWORKS_API_KEY", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + fireworksApiKey?: string; + + constructor( + fields?: Partial< + Omit + > & + BaseLLMParams & { fireworksApiKey?: string } + ) { + const fireworksApiKey = + fields?.fireworksApiKey || getEnvironmentVariable("FIREWORKS_API_KEY"); + + if (!fireworksApiKey) { + throw new Error( + `Fireworks API key not found. Please set the FIREWORKS_API_KEY environment variable or provide the key into "fireworksApiKey"` + ); + } + + super({ + ...fields, + openAIApiKey: fireworksApiKey, + modelName: fields?.modelName || "accounts/fireworks/models/llama-v2-13b", + configuration: { + baseURL: "https://api.fireworks.ai/inference/v1", + }, + }); + + this.fireworksApiKey = fireworksApiKey; + } + + toJSON() { + const result = super.toJSON(); + + if ( + "kwargs" in result && + typeof result.kwargs === "object" && + result.kwargs != null + ) { + delete result.kwargs.openai_api_key; + delete result.kwargs.configuration; + } + + return result; + } + + async completionWithRetry( + request: OpenAIClient.CompletionCreateParamsStreaming, + options?: OpenAICoreRequestOptions + ): Promise>; + + async completionWithRetry( + request: OpenAIClient.CompletionCreateParamsNonStreaming, + options?: OpenAICoreRequestOptions + ): Promise; + + /** + * Calls the Fireworks API with retry logic in case of failures. + * @param request The request to send to the Fireworks API. + * @param options Optional configuration for the API call. + * @returns The response from the Fireworks API. + */ + async completionWithRetry( + request: + | OpenAIClient.CompletionCreateParamsStreaming + | OpenAIClient.CompletionCreateParamsNonStreaming, + options?: OpenAICoreRequestOptions + ): Promise< + AsyncIterable | OpenAIClient.Completions.Completion + > { + // https://readme.fireworks.ai/docs/openai-compatibility#api-compatibility + if (Array.isArray(request.prompt)) { + if (request.prompt.length > 1) { + throw new Error("Multiple prompts are not supported by Fireworks"); + } + + const prompt = request.prompt[0]; + if (typeof prompt !== "string") { + throw new Error("Only string prompts are supported by Fireworks"); + } + + request.prompt = prompt; + } + + delete request.frequency_penalty; + delete request.presence_penalty; + delete request.best_of; + delete request.logit_bias; + + if (request.stream === true) { + return super.completionWithRetry(request, options); + } + + return super.completionWithRetry(request, options); + } +} diff --git a/libs/langchain-community/src/llms/googlepalm.ts b/libs/langchain-community/src/llms/googlepalm.ts new file mode 100644 index 000000000000..82c1f9249402 --- /dev/null +++ b/libs/langchain-community/src/llms/googlepalm.ts @@ -0,0 +1,207 @@ +import { TextServiceClient, protos } from "@google-ai/generativelanguage"; +import { GoogleAuth } from "google-auth-library"; +import { type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Input for Text generation for Google Palm + */ +export interface GooglePaLMTextInput extends BaseLLMParams { + /** + * Model Name to use + * + * Note: The format must follow the pattern - `models/{model}` + */ + modelName?: string; + + /** + * Controls the randomness of the output. + * + * Values can range from [0.0,1.0], inclusive. A value closer to 1.0 + * will produce responses that are more varied and creative, while + * a value closer to 0.0 will typically result in more straightforward + * responses from the model. + * + * Note: The default value varies by model + */ + temperature?: number; + + /** + * Maximum number of tokens to generate in the completion. + */ + maxOutputTokens?: number; + + /** + * Top-p changes how the model selects tokens for output. + * + * Tokens are selected from most probable to least until the sum + * of their probabilities equals the top-p value. + * + * For example, if tokens A, B, and C have a probability of + * .3, .2, and .1 and the top-p value is .5, then the model will + * select either A or B as the next token (using temperature). + * + * Note: The default value varies by model + */ + topP?: number; + + /** + * Top-k changes how the model selects tokens for output. + * + * A top-k of 1 means the selected token is the most probable among + * all tokens in the model’s vocabulary (also called greedy decoding), + * while a top-k of 3 means that the next token is selected from + * among the 3 most probable tokens (using temperature). + * + * Note: The default value varies by model + */ + topK?: number; + + /** + * The set of character sequences (up to 5) that will stop output generation. + * If specified, the API will stop at the first appearance of a stop + * sequence. + * + * Note: The stop sequence will not be included as part of the response. + */ + stopSequences?: string[]; + + /** + * A list of unique `SafetySetting` instances for blocking unsafe content. The API will block + * any prompts and responses that fail to meet the thresholds set by these settings. If there + * is no `SafetySetting` for a given `SafetyCategory` provided in the list, the API will use + * the default safety setting for that category. + */ + safetySettings?: protos.google.ai.generativelanguage.v1beta2.ISafetySetting[]; + + /** + * Google Palm API key to use + */ + apiKey?: string; +} + +/** + * Google Palm 2 Language Model Wrapper to generate texts + */ +export class GooglePaLM extends LLM implements GooglePaLMTextInput { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "GOOGLE_PALM_API_KEY", + }; + } + + modelName = "models/text-bison-001"; + + temperature?: number; // default value chosen based on model + + maxOutputTokens?: number; // defaults to 64 + + topP?: number; // default value chosen based on model + + topK?: number; // default value chosen based on model + + stopSequences: string[] = []; + + safetySettings?: protos.google.ai.generativelanguage.v1beta2.ISafetySetting[]; // default safety setting for that category + + apiKey?: string; + + private client: TextServiceClient; + + constructor(fields?: GooglePaLMTextInput) { + super(fields ?? {}); + + this.modelName = fields?.modelName ?? this.modelName; + + this.temperature = fields?.temperature ?? this.temperature; + if (this.temperature && (this.temperature < 0 || this.temperature > 1)) { + throw new Error("`temperature` must be in the range of [0.0,1.0]"); + } + + this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; + if (this.maxOutputTokens && this.maxOutputTokens < 0) { + throw new Error("`maxOutputTokens` must be a positive integer"); + } + + this.topP = fields?.topP ?? this.topP; + if (this.topP && this.topP < 0) { + throw new Error("`topP` must be a positive integer"); + } + + if (this.topP && this.topP > 1) { + throw new Error("Google PaLM `topP` must in the range of [0,1]"); + } + + this.topK = fields?.topK ?? this.topK; + if (this.topK && this.topK < 0) { + throw new Error("`topK` must be a positive integer"); + } + + this.stopSequences = fields?.stopSequences ?? this.stopSequences; + + this.safetySettings = fields?.safetySettings ?? this.safetySettings; + if (this.safetySettings && this.safetySettings.length > 0) { + const safetySettingsSet = new Set( + this.safetySettings.map((s) => s.category) + ); + if (safetySettingsSet.size !== this.safetySettings.length) { + throw new Error( + "The categories in `safetySettings` array must be unique" + ); + } + } + + this.apiKey = + fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); + if (!this.apiKey) { + throw new Error( + "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" + ); + } + + this.client = new TextServiceClient({ + authClient: new GoogleAuth().fromAPIKey(this.apiKey), + }); + } + + _llmType(): string { + return "googlepalm"; + } + + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const res = await this.caller.callWithOptions( + { signal: options.signal }, + this._generateText.bind(this), + prompt + ); + return res ?? ""; + } + + protected async _generateText( + prompt: string + ): Promise { + const res = await this.client.generateText({ + model: this.modelName, + temperature: this.temperature, + candidateCount: 1, + topK: this.topK, + topP: this.topP, + maxOutputTokens: this.maxOutputTokens, + stopSequences: this.stopSequences, + safetySettings: this.safetySettings, + prompt: { + text: prompt, + }, + }); + return res[0].candidates && res[0].candidates.length > 0 + ? res[0].candidates[0].output + : undefined; + } +} diff --git a/libs/langchain-community/src/llms/googlevertexai/common.ts b/libs/langchain-community/src/llms/googlevertexai/common.ts new file mode 100644 index 000000000000..00dc82ca6eed --- /dev/null +++ b/libs/langchain-community/src/llms/googlevertexai/common.ts @@ -0,0 +1,234 @@ +import { BaseLLM } from "@langchain/core/language_models/llms"; +import { Generation, GenerationChunk, LLMResult } from "@langchain/core/outputs"; +import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; + +import { + GoogleVertexAILLMConnection, + GoogleVertexAIStream, + GoogleVertexAILLMResponse, +} from "../../util/googlevertexai-connection.js"; +import { + GoogleVertexAIBaseLLMInput, + GoogleVertexAIBasePrediction, + GoogleVertexAILLMPredictions, + GoogleVertexAIModelParams, +} from "../../types/googlevertexai-types.js"; + +/** + * Interface representing the instance of text input to the Google Vertex + * AI model. + */ +interface GoogleVertexAILLMTextInstance { + content: string; +} + +/** + * Interface representing the instance of code input to the Google Vertex + * AI model. + */ +interface GoogleVertexAILLMCodeInstance { + prefix: string; +} + +/** + * Type representing an instance of either text or code input to the + * Google Vertex AI model. + */ +type GoogleVertexAILLMInstance = + | GoogleVertexAILLMTextInstance + | GoogleVertexAILLMCodeInstance; + +/** + * Models the data returned from the API call + */ +interface TextPrediction extends GoogleVertexAIBasePrediction { + content: string; +} + +/** + * Base class for Google Vertex AI LLMs. + * Implemented subclasses must provide a GoogleVertexAILLMConnection + * with an appropriate auth client. + */ +export class BaseGoogleVertexAI + extends BaseLLM + implements GoogleVertexAIBaseLLMInput +{ + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + model = "text-bison"; + + temperature = 0.7; + + maxOutputTokens = 1024; + + topP = 0.8; + + topK = 40; + + protected connection: GoogleVertexAILLMConnection< + BaseLanguageModelCallOptions, + GoogleVertexAILLMInstance, + TextPrediction, + AuthOptions + >; + + protected streamedConnection: GoogleVertexAILLMConnection< + BaseLanguageModelCallOptions, + GoogleVertexAILLMInstance, + TextPrediction, + AuthOptions + >; + + get lc_aliases(): Record { + return { + model: "model_name", + }; + } + + constructor(fields?: GoogleVertexAIBaseLLMInput) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + + // Change the defaults for code models + if (this.model.startsWith("code-gecko")) { + this.maxOutputTokens = 64; + } + if (this.model.startsWith("code-")) { + this.temperature = 0.2; + } + + this.temperature = fields?.temperature ?? this.temperature; + this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; + this.topP = fields?.topP ?? this.topP; + this.topK = fields?.topK ?? this.topK; + } + + _llmType(): string { + return "vertexai"; + } + + async *_streamResponseChunks( + _input: string, + _options: this["ParsedCallOptions"], + _runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + // Make the call as a streaming request + const instance = this.formatInstance(_input); + const parameters = this.formatParameters(); + const result = await this.streamedConnection.request( + [instance], + parameters, + _options + ); + + // Get the streaming parser of the response + const stream = result.data as GoogleVertexAIStream; + + // Loop until the end of the stream + // During the loop, yield each time we get a chunk from the streaming parser + // that is either available or added to the queue + while (!stream.streamDone) { + const output = await stream.nextChunk(); + const chunk = + output !== null + ? new GenerationChunk( + this.extractGenerationFromPrediction(output.outputs[0]) + ) + : new GenerationChunk({ + text: "", + generationInfo: { finishReason: "stop" }, + }); + yield chunk; + } + } + + async _generate( + prompts: string[], + options: this["ParsedCallOptions"] + ): Promise { + const generations: Generation[][] = await Promise.all( + prompts.map((prompt) => this._generatePrompt(prompt, options)) + ); + return { generations }; + } + + async _generatePrompt( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const instance = this.formatInstance(prompt); + const parameters = this.formatParameters(); + const result = await this.connection.request( + [instance], + parameters, + options + ); + const prediction = this.extractPredictionFromResponse(result); + return [this.extractGenerationFromPrediction(prediction)]; + } + + /** + * Formats the input instance as a text instance for the Google Vertex AI + * model. + * @param prompt Prompt to be formatted as a text instance. + * @returns A GoogleVertexAILLMInstance object representing the formatted text instance. + */ + formatInstanceText(prompt: string): GoogleVertexAILLMInstance { + return { content: prompt }; + } + + /** + * Formats the input instance as a code instance for the Google Vertex AI + * model. + * @param prompt Prompt to be formatted as a code instance. + * @returns A GoogleVertexAILLMInstance object representing the formatted code instance. + */ + formatInstanceCode(prompt: string): GoogleVertexAILLMInstance { + return { prefix: prompt }; + } + + /** + * Formats the input instance for the Google Vertex AI model based on the + * model type (text or code). + * @param prompt Prompt to be formatted as an instance. + * @returns A GoogleVertexAILLMInstance object representing the formatted instance. + */ + formatInstance(prompt: string): GoogleVertexAILLMInstance { + return this.model.startsWith("code-") + ? this.formatInstanceCode(prompt) + : this.formatInstanceText(prompt); + } + + formatParameters(): GoogleVertexAIModelParams { + return { + temperature: this.temperature, + topK: this.topK, + topP: this.topP, + maxOutputTokens: this.maxOutputTokens, + }; + } + + /** + * Extracts the prediction from the API response. + * @param result The API response from which to extract the prediction. + * @returns A TextPrediction object representing the extracted prediction. + */ + extractPredictionFromResponse( + result: GoogleVertexAILLMResponse + ): TextPrediction { + return (result?.data as GoogleVertexAILLMPredictions) + ?.predictions[0]; + } + + extractGenerationFromPrediction(prediction: TextPrediction): Generation { + return { + text: prediction.content, + generationInfo: prediction, + }; + } +} diff --git a/libs/langchain-community/src/llms/googlevertexai/index.ts b/libs/langchain-community/src/llms/googlevertexai/index.ts new file mode 100644 index 000000000000..c3c7cbd6127a --- /dev/null +++ b/libs/langchain-community/src/llms/googlevertexai/index.ts @@ -0,0 +1,66 @@ +import { GoogleAuthOptions } from "google-auth-library"; +import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; +import { BaseGoogleVertexAI } from "./common.js"; +import { GAuthClient } from "../../util/googlevertexai-gauth.js"; + +/** + * Interface representing the input to the Google Vertex AI model. + */ +export interface GoogleVertexAITextInput + extends GoogleVertexAIBaseLLMInput {} + +/** + * Enables calls to the Google Cloud's Vertex AI API to access + * Large Language Models. + * + * To use, you will need to have one of the following authentication + * methods in place: + * - You are logged into an account permitted to the Google Cloud project + * using Vertex AI. + * - You are running this on a machine using a service account permitted to + * the Google Cloud project using Vertex AI. + * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the + * path of a credentials file for a service account permitted to the + * Google Cloud project using Vertex AI. + * @example + * ```typescript + * const model = new GoogleVertexAI({ + * temperature: 0.7, + * }); + * const stream = await model.stream( + * "What would be a good company name for a company that makes colorful socks?", + * ); + * for await (const chunk of stream) { + * console.log(chunk); + * } + * ``` + */ +export class GoogleVertexAI extends BaseGoogleVertexAI { + static lc_name() { + return "VertexAI"; + } + + constructor(fields?: GoogleVertexAITextInput) { + super(fields); + + const client = new GAuthClient({ + scopes: "https://www.googleapis.com/auth/cloud-platform", + ...fields?.authOptions, + }); + + this.connection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + false + ); + + this.streamedConnection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + true + ); + } +} diff --git a/libs/langchain-community/src/llms/googlevertexai/web.ts b/libs/langchain-community/src/llms/googlevertexai/web.ts new file mode 100644 index 000000000000..0b656308d53b --- /dev/null +++ b/libs/langchain-community/src/llms/googlevertexai/web.ts @@ -0,0 +1,66 @@ +import { + WebGoogleAuth, + WebGoogleAuthOptions, +} from "../../util/googlevertexai-webauth.js"; +import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; +import { BaseGoogleVertexAI } from "./common.js"; + +/** + * Interface representing the input to the Google Vertex AI model. + */ +export interface GoogleVertexAITextInput + extends GoogleVertexAIBaseLLMInput {} + +/** + * Enables calls to the Google Cloud's Vertex AI API to access + * Large Language Models. + * + * This entrypoint and class are intended to be used in web environments like Edge + * functions where you do not have access to the file system. It supports passing + * service account credentials directly as a "GOOGLE_VERTEX_AI_WEB_CREDENTIALS" + * environment variable or directly as "authOptions.credentials". + * @example + * ```typescript + * const model = new GoogleVertexAI({ + * temperature: 0.7, + * }); + * const stream = await model.stream( + * "What would be a good company name for a company that makes colorful socks?", + * ); + * for await (const chunk of stream) { + * console.log(chunk); + * } + * ``` + */ +export class GoogleVertexAI extends BaseGoogleVertexAI { + static lc_name() { + return "VertexAI"; + } + + get lc_secrets(): { [key: string]: string } { + return { + "authOptions.credentials": "GOOGLE_VERTEX_AI_WEB_CREDENTIALS", + }; + } + + constructor(fields?: GoogleVertexAITextInput) { + super(fields); + + const client = new WebGoogleAuth(fields?.authOptions); + + this.connection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + false + ); + + this.streamedConnection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + client, + true + ); + } +} diff --git a/libs/langchain-community/src/llms/gradient_ai.ts b/libs/langchain-community/src/llms/gradient_ai.ts new file mode 100644 index 000000000000..a315ae21a223 --- /dev/null +++ b/libs/langchain-community/src/llms/gradient_ai.ts @@ -0,0 +1,140 @@ +import { Gradient } from "@gradientai/nodejs-sdk"; +import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * The GradientLLMParams interface defines the input parameters for + * the GradientLLM class. + */ +export interface GradientLLMParams extends BaseLLMParams { + /** + * Gradient AI Access Token. + * Provide Access Token if you do not wish to automatically pull from env. + */ + gradientAccessKey?: string; + /** + * Gradient Workspace Id. + * Provide workspace id if you do not wish to automatically pull from env. + */ + workspaceId?: string; + /** + * Parameters accepted by the Gradient npm package. + */ + inferenceParameters?: Record; + /** + * Gradient AI Model Slug. + */ + modelSlug?: string; + /** + * Gradient Adapter ID for custom fine tuned models. + */ + adapterId?: string; +} + +/** + * The GradientLLM class is used to interact with Gradient AI inference Endpoint models. + * This requires your Gradient AI Access Token which is autoloaded if not specified. + */ +export class GradientLLM extends LLM { + static lc_name() { + return "GradientLLM"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + gradientAccessKey: "GRADIENT_ACCESS_TOKEN", + workspaceId: "GRADIENT_WORKSPACE_ID", + }; + } + + modelSlug = "llama2-7b-chat"; + + adapterId?: string; + + gradientAccessKey?: string; + + workspaceId?: string; + + inferenceParameters?: Record; + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + // Gradient AI does not export the BaseModel type. Once it does, we can use it here. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + model: any; + + constructor(fields: GradientLLMParams) { + super(fields); + + this.modelSlug = fields?.modelSlug ?? this.modelSlug; + this.adapterId = fields?.adapterId; + this.gradientAccessKey = + fields?.gradientAccessKey ?? + getEnvironmentVariable("GRADIENT_ACCESS_TOKEN"); + this.workspaceId = + fields?.workspaceId ?? getEnvironmentVariable("GRADIENT_WORKSPACE_ID"); + + this.inferenceParameters = fields.inferenceParameters; + + if (!this.gradientAccessKey) { + throw new Error("Missing Gradient AI Access Token"); + } + + if (!this.workspaceId) { + throw new Error("Missing Gradient AI Workspace ID"); + } + } + + _llmType() { + return "gradient_ai"; + } + + /** + * Calls the Gradient AI endpoint and retrieves the result. + * @param {string} prompt The input prompt. + * @returns {Promise} A promise that resolves to the generated string. + */ + /** @ignore */ + async _call( + prompt: string, + _options: this["ParsedCallOptions"] + ): Promise { + await this.setModel(); + + // GradientLLM does not export the CompleteResponse type. Once it does, we can use it here. + interface CompleteResponse { + finishReason: string; + generatedOutput: string; + } + + const response = (await this.caller.call(async () => + this.model.complete({ + query: prompt, + ...this.inferenceParameters, + }) + )) as CompleteResponse; + + return response.generatedOutput; + } + + async setModel() { + if (this.model) return; + + const gradient = new Gradient({ + accessToken: this.gradientAccessKey, + workspaceId: this.workspaceId, + }); + + if (this.adapterId) { + this.model = await gradient.getModelAdapter({ + modelAdapterId: this.adapterId, + }); + } else { + this.model = await gradient.getBaseModel({ + baseModelSlug: this.modelSlug, + }); + } + } +} diff --git a/libs/langchain-community/src/llms/hf.ts b/libs/langchain-community/src/llms/hf.ts new file mode 100644 index 000000000000..7f5020ed06af --- /dev/null +++ b/libs/langchain-community/src/llms/hf.ts @@ -0,0 +1,159 @@ +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface defining the parameters for configuring the Hugging Face + * model for text generation. + */ +export interface HFInput { + /** Model to use */ + model: string; + + /** Custom inference endpoint URL to use */ + endpointUrl?: string; + + /** Sampling temperature to use */ + temperature?: number; + + /** + * Maximum number of tokens to generate in the completion. + */ + maxTokens?: number; + + /** Total probability mass of tokens to consider at each step */ + topP?: number; + + /** Integer to define the top tokens considered within the sample operation to create new text. */ + topK?: number; + + /** Penalizes repeated tokens according to frequency */ + frequencyPenalty?: number; + + /** API key to use. */ + apiKey?: string; + + /** + * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. + */ + includeCredentials?: string | boolean; +} + +/** + * Class implementing the Large Language Model (LLM) interface using the + * Hugging Face Inference API for text generation. + * @example + * ```typescript + * const model = new HuggingFaceInference({ + * model: "gpt2", + * temperature: 0.7, + * maxTokens: 50, + * }); + * + * const res = await model.call( + * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" + * ); + * console.log({ res }); + * ``` + */ +export class HuggingFaceInference extends LLM implements HFInput { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "HUGGINGFACEHUB_API_KEY", + }; + } + + model = "gpt2"; + + temperature: number | undefined = undefined; + + maxTokens: number | undefined = undefined; + + topP: number | undefined = undefined; + + topK: number | undefined = undefined; + + frequencyPenalty: number | undefined = undefined; + + apiKey: string | undefined = undefined; + + endpointUrl: string | undefined = undefined; + + includeCredentials: string | boolean | undefined = undefined; + + constructor(fields?: Partial & BaseLLMParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.temperature = fields?.temperature ?? this.temperature; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.topP = fields?.topP ?? this.topP; + this.topK = fields?.topK ?? this.topK; + this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; + this.apiKey = + fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY"); + this.endpointUrl = fields?.endpointUrl; + this.includeCredentials = fields?.includeCredentials; + + if (!this.apiKey) { + throw new Error( + "Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor." + ); + } + } + + _llmType() { + return "hf"; + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const { HfInference } = await HuggingFaceInference.imports(); + const hf = this.endpointUrl + ? new HfInference(this.apiKey, { + includeCredentials: this.includeCredentials, + }).endpoint(this.endpointUrl) + : new HfInference(this.apiKey, { + includeCredentials: this.includeCredentials, + }); + + const res = await this.caller.callWithOptions( + { signal: options.signal }, + hf.textGeneration.bind(hf), + { + model: this.model, + parameters: { + // make it behave similar to openai, returning only the generated text + return_full_text: false, + temperature: this.temperature, + max_new_tokens: this.maxTokens, + top_p: this.topP, + top_k: this.topK, + repetition_penalty: this.frequencyPenalty, + }, + inputs: prompt, + } + ); + return res.generated_text; + } + + /** @ignore */ + static async imports(): Promise<{ + HfInference: typeof import("@huggingface/inference").HfInference; + }> { + try { + const { HfInference } = await import("@huggingface/inference"); + return { HfInference }; + } catch (e) { + throw new Error( + "Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`" + ); + } + } +} diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts new file mode 100644 index 000000000000..55085ddeec6f --- /dev/null +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -0,0 +1,121 @@ +import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"; +import { LLM, type BaseLLMCallOptions, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk } from "@langchain/core/outputs"; + +import { + LlamaBaseCppInputs, + createLlamaModel, + createLlamaContext, + createLlamaSession, +} from "../util/llama_cpp.js"; + +/** + * Note that the modelPath is the only required parameter. For testing you + * can set this in the environment variable `LLAMA_PATH`. + */ +export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {} + +export interface LlamaCppCallOptions extends BaseLLMCallOptions { + /** The maximum number of tokens the response should contain. */ + maxTokens?: number; + /** A function called when matching the provided token array */ + onToken?: (tokens: number[]) => void; +} + +/** + * To use this model you need to have the `node-llama-cpp` module installed. + * This can be installed using `npm install -S node-llama-cpp` and the minimum + * version supported in version 2.0.0. + * This also requires that have a locally built version of Llama2 installed. + */ +export class LlamaCpp extends LLM { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + declare CallOptions: LlamaCppCallOptions; + + static inputs: LlamaCppInputs; + + maxTokens?: number; + + temperature?: number; + + topK?: number; + + topP?: number; + + trimWhitespaceSuffix?: boolean; + + _model: LlamaModel; + + _context: LlamaContext; + + _session: LlamaChatSession; + + static lc_name() { + return "LlamaCpp"; + } + + constructor(inputs: LlamaCppInputs) { + super(inputs); + this.maxTokens = inputs?.maxTokens; + this.temperature = inputs?.temperature; + this.topK = inputs?.topK; + this.topP = inputs?.topP; + this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix; + this._model = createLlamaModel(inputs); + this._context = createLlamaContext(this._model, inputs); + this._session = createLlamaSession(this._context); + } + + _llmType() { + return "llama2_cpp"; + } + + /** @ignore */ + async _call( + prompt: string, + options?: this["ParsedCallOptions"] + ): Promise { + try { + const promptOptions = { + onToken: options?.onToken, + maxTokens: this?.maxTokens, + temperature: this?.temperature, + topK: this?.topK, + topP: this?.topP, + trimWhitespaceSuffix: this?.trimWhitespaceSuffix, + }; + const completion = await this._session.prompt(prompt, promptOptions); + return completion; + } catch (e) { + throw new Error("Error getting prompt completion."); + } + } + + async *_streamResponseChunks( + prompt: string, + _options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const promptOptions = { + temperature: this?.temperature, + topK: this?.topK, + topP: this?.topP, + }; + + const stream = await this.caller.call(async () => + this._context.evaluate(this._context.encode(prompt), promptOptions) + ); + + for await (const chunk of stream) { + yield new GenerationChunk({ + text: this._context.decode([chunk]), + generationInfo: {}, + }); + await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? ""); + } + } +} diff --git a/libs/langchain-community/src/llms/ollama.ts b/libs/langchain-community/src/llms/ollama.ts new file mode 100644 index 000000000000..6e857bbec0d1 --- /dev/null +++ b/libs/langchain-community/src/llms/ollama.ts @@ -0,0 +1,248 @@ +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk } from "@langchain/core/outputs"; +import type { StringWithAutocomplete } from "@langchain/core/utils/types"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; + +import { + createOllamaStream, + OllamaInput, + OllamaCallOptions, +} from "../util/ollama.js"; + +/** + * Class that represents the Ollama language model. It extends the base + * LLM class and implements the OllamaInput interface. + * @example + * ```typescript + * const ollama = new Ollama({ + * baseUrl: "http://api.example.com", + * model: "llama2", + * }); + * + * // Streaming translation from English to German + * const stream = await ollama.stream( + * `Translate "I love programming" into German.` + * ); + * + * const chunks = []; + * for await (const chunk of stream) { + * chunks.push(chunk); + * } + * + * console.log(chunks.join("")); + * ``` + */ +export class Ollama extends LLM implements OllamaInput { + static lc_name() { + return "Ollama"; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + model = "llama2"; + + baseUrl = "http://localhost:11434"; + + embeddingOnly?: boolean; + + f16KV?: boolean; + + frequencyPenalty?: number; + + logitsAll?: boolean; + + lowVram?: boolean; + + mainGpu?: number; + + mirostat?: number; + + mirostatEta?: number; + + mirostatTau?: number; + + numBatch?: number; + + numCtx?: number; + + numGpu?: number; + + numGqa?: number; + + numKeep?: number; + + numThread?: number; + + penalizeNewline?: boolean; + + presencePenalty?: number; + + repeatLastN?: number; + + repeatPenalty?: number; + + ropeFrequencyBase?: number; + + ropeFrequencyScale?: number; + + temperature?: number; + + stop?: string[]; + + tfsZ?: number; + + topK?: number; + + topP?: number; + + typicalP?: number; + + useMLock?: boolean; + + useMMap?: boolean; + + vocabOnly?: boolean; + + format?: StringWithAutocomplete<"json">; + + constructor(fields: OllamaInput & BaseLLMParams) { + super(fields); + this.model = fields.model ?? this.model; + this.baseUrl = fields.baseUrl?.endsWith("/") + ? fields.baseUrl.slice(0, -1) + : fields.baseUrl ?? this.baseUrl; + + this.embeddingOnly = fields.embeddingOnly; + this.f16KV = fields.f16KV; + this.frequencyPenalty = fields.frequencyPenalty; + this.logitsAll = fields.logitsAll; + this.lowVram = fields.lowVram; + this.mainGpu = fields.mainGpu; + this.mirostat = fields.mirostat; + this.mirostatEta = fields.mirostatEta; + this.mirostatTau = fields.mirostatTau; + this.numBatch = fields.numBatch; + this.numCtx = fields.numCtx; + this.numGpu = fields.numGpu; + this.numGqa = fields.numGqa; + this.numKeep = fields.numKeep; + this.numThread = fields.numThread; + this.penalizeNewline = fields.penalizeNewline; + this.presencePenalty = fields.presencePenalty; + this.repeatLastN = fields.repeatLastN; + this.repeatPenalty = fields.repeatPenalty; + this.ropeFrequencyBase = fields.ropeFrequencyBase; + this.ropeFrequencyScale = fields.ropeFrequencyScale; + this.temperature = fields.temperature; + this.stop = fields.stop; + this.tfsZ = fields.tfsZ; + this.topK = fields.topK; + this.topP = fields.topP; + this.typicalP = fields.typicalP; + this.useMLock = fields.useMLock; + this.useMMap = fields.useMMap; + this.vocabOnly = fields.vocabOnly; + this.format = fields.format; + } + + _llmType() { + return "ollama"; + } + + invocationParams(options?: this["ParsedCallOptions"]) { + return { + model: this.model, + format: this.format, + options: { + embedding_only: this.embeddingOnly, + f16_kv: this.f16KV, + frequency_penalty: this.frequencyPenalty, + logits_all: this.logitsAll, + low_vram: this.lowVram, + main_gpu: this.mainGpu, + mirostat: this.mirostat, + mirostat_eta: this.mirostatEta, + mirostat_tau: this.mirostatTau, + num_batch: this.numBatch, + num_ctx: this.numCtx, + num_gpu: this.numGpu, + num_gqa: this.numGqa, + num_keep: this.numKeep, + num_thread: this.numThread, + penalize_newline: this.penalizeNewline, + presence_penalty: this.presencePenalty, + repeat_last_n: this.repeatLastN, + repeat_penalty: this.repeatPenalty, + rope_frequency_base: this.ropeFrequencyBase, + rope_frequency_scale: this.ropeFrequencyScale, + temperature: this.temperature, + stop: options?.stop ?? this.stop, + tfs_z: this.tfsZ, + top_k: this.topK, + top_p: this.topP, + typical_p: this.typicalP, + use_mlock: this.useMLock, + use_mmap: this.useMMap, + vocab_only: this.vocabOnly, + }, + }; + } + + async *_streamResponseChunks( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const stream = await this.caller.call(async () => + createOllamaStream( + this.baseUrl, + { ...this.invocationParams(options), prompt }, + options + ) + ); + for await (const chunk of stream) { + if (!chunk.done) { + yield new GenerationChunk({ + text: chunk.response, + generationInfo: { + ...chunk, + response: undefined, + }, + }); + await runManager?.handleLLMNewToken(chunk.response ?? ""); + } else { + yield new GenerationChunk({ + text: "", + generationInfo: { + model: chunk.model, + total_duration: chunk.total_duration, + load_duration: chunk.load_duration, + prompt_eval_count: chunk.prompt_eval_count, + prompt_eval_duration: chunk.prompt_eval_duration, + eval_count: chunk.eval_count, + eval_duration: chunk.eval_duration, + }, + }); + } + } + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const chunks = []; + for await (const chunk of this._streamResponseChunks( + prompt, + options, + runManager + )) { + chunks.push(chunk.text); + } + return chunks.join(""); + } +} diff --git a/libs/langchain-community/src/llms/portkey.ts b/libs/langchain-community/src/llms/portkey.ts new file mode 100644 index 000000000000..3eb6be719fc9 --- /dev/null +++ b/libs/langchain-community/src/llms/portkey.ts @@ -0,0 +1,181 @@ +import _ from "lodash"; +import { LLMOptions, Portkey as _Portkey } from "portkey-ai"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk, LLMResult } from "@langchain/core/outputs"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { BaseLLM } from "@langchain/core/language_models/llms"; + +interface PortkeyOptions { + apiKey?: string; + baseURL?: string; + mode?: string; + llms?: [LLMOptions] | null; +} + +const readEnv = (env: string, default_val?: string): string | undefined => + getEnvironmentVariable(env) ?? default_val; + +export class PortkeySession { + portkey: _Portkey; + + constructor(options: PortkeyOptions = {}) { + if (!options.apiKey) { + /* eslint-disable no-param-reassign */ + options.apiKey = readEnv("PORTKEY_API_KEY"); + } + + if (!options.baseURL) { + /* eslint-disable no-param-reassign */ + options.baseURL = readEnv("PORTKEY_BASE_URL", "https://api.portkey.ai"); + } + + this.portkey = new _Portkey({}); + this.portkey.llms = [{}]; + if (!options.apiKey) { + throw new Error("Set Portkey ApiKey in PORTKEY_API_KEY env variable"); + } + + this.portkey = new _Portkey(options); + } +} + +const defaultPortkeySession: { + session: PortkeySession; + options: PortkeyOptions; +}[] = []; + +/** + * Get a session for the Portkey API. If one already exists with the same options, + * it will be returned. Otherwise, a new session will be created. + * @param options + * @returns + */ +export function getPortkeySession(options: PortkeyOptions = {}) { + let session = defaultPortkeySession.find((session) => + _.isEqual(session.options, options) + )?.session; + + if (!session) { + session = new PortkeySession(options); + defaultPortkeySession.push({ session, options }); + } + return session; +} + +/** + * @example + * ```typescript + * const model = new Portkey({ + * mode: "single", + * llms: [ + * { + * provider: "openai", + * virtual_key: "open-ai-key-1234", + * model: "text-davinci-003", + * max_tokens: 2000, + * }, + * ], + * }); + * + * // Stream the output of the model and process it + * const res = await model.stream( + * "Question: Write a story about a king\nAnswer:" + * ); + * for await (const i of res) { + * process.stdout.write(i); + * } + * ``` + */ +export class Portkey extends BaseLLM { + apiKey?: string = undefined; + + baseURL?: string = undefined; + + mode?: string = undefined; + + llms?: [LLMOptions] | null = undefined; + + session: PortkeySession; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + constructor(init?: Partial) { + super(init ?? {}); + this.apiKey = init?.apiKey; + + this.baseURL = init?.baseURL; + + this.mode = init?.mode; + + this.llms = init?.llms; + + this.session = getPortkeySession({ + apiKey: this.apiKey, + baseURL: this.baseURL, + llms: this.llms, + mode: this.mode, + }); + } + + _llmType() { + return "portkey"; + } + + async _generate( + prompts: string[], + options: this["ParsedCallOptions"], + _?: CallbackManagerForLLMRun + ): Promise { + const choices = []; + for (let i = 0; i < prompts.length; i += 1) { + const response = await this.session.portkey.completions.create({ + prompt: prompts[i], + ...options, + stream: false, + }); + choices.push(response.choices); + } + const generations = choices.map((promptChoices) => + promptChoices.map((choice) => ({ + text: choice.text ?? "", + generationInfo: { + finishReason: choice.finish_reason, + logprobs: choice.logprobs, + }, + })) + ); + + return { + generations, + }; + } + + async *_streamResponseChunks( + input: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const response = await this.session.portkey.completions.create({ + prompt: input, + ...options, + stream: true, + }); + for await (const data of response) { + const choice = data?.choices[0]; + if (!choice) { + continue; + } + const chunk = new GenerationChunk({ + text: choice.text ?? "", + generationInfo: { + finishReason: choice.finish_reason, + }, + }); + yield chunk; + void runManager?.handleLLMNewToken(chunk.text ?? ""); + } + if (options.signal?.aborted) { + throw new Error("AbortError"); + } + } +} diff --git a/libs/langchain-community/src/llms/raycast.ts b/libs/langchain-community/src/llms/raycast.ts new file mode 100644 index 000000000000..257298079b5b --- /dev/null +++ b/libs/langchain-community/src/llms/raycast.ts @@ -0,0 +1,103 @@ +import { AI, environment } from "@raycast/api"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; + +/** + * The input parameters for the RaycastAI class, which extends the BaseLLMParams interface. + */ +export interface RaycastAIInput extends BaseLLMParams { + model?: AI.Model; + creativity?: number; + rateLimitPerMinute?: number; +} + +const wait = (ms: number) => + new Promise((resolve) => { + setTimeout(resolve, ms); + }); + +/** + * The RaycastAI class, which extends the LLM class and implements the RaycastAIInput interface. + */ +export class RaycastAI extends LLM implements RaycastAIInput { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + /** + * The model to use for generating text. + */ + model: AI.Model; + + /** + * The creativity parameter, also known as the "temperature". + */ + creativity: number; + + /** + * The rate limit for API calls, in requests per minute. + */ + rateLimitPerMinute: number; + + /** + * The timestamp of the last API call, used to enforce the rate limit. + */ + private lastCallTimestamp = 0; + + /** + * Creates a new instance of the RaycastAI class. + * @param {RaycastAIInput} fields The input parameters for the RaycastAI class. + * @throws {Error} If the Raycast AI environment is not accessible. + */ + constructor(fields: RaycastAIInput) { + super(fields ?? {}); + + if (!environment.canAccess(AI)) { + throw new Error("Raycast AI environment is not accessible."); + } + + this.model = fields.model ?? "text-davinci-003"; + this.creativity = fields.creativity ?? 0.5; + this.rateLimitPerMinute = fields.rateLimitPerMinute ?? 10; + } + + /** + * Returns the type of the LLM, which is "raycast_ai". + * @return {string} The type of the LLM. + * @ignore + */ + _llmType() { + return "raycast_ai"; + } + + /** + * Calls AI.ask with the given prompt and returns the generated text. + * @param {string} prompt The prompt to generate text from. + * @return {Promise} A Promise that resolves to the generated text. + * @ignore + */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const response = await this.caller.call(async () => { + // Rate limit calls to Raycast AI + const now = Date.now(); + const timeSinceLastCall = now - this.lastCallTimestamp; + const timeToWait = + (60 / this.rateLimitPerMinute) * 1000 - timeSinceLastCall; + + if (timeToWait > 0) { + await wait(timeToWait); + } + + return await AI.ask(prompt, { + model: this.model, + creativity: this.creativity, + signal: options.signal, + }); + }); + + // Since Raycast AI returns the response directly, no need for output transformation + return response; + } +} diff --git a/libs/langchain-community/src/llms/replicate.ts b/libs/langchain-community/src/llms/replicate.ts new file mode 100644 index 000000000000..80df0d2bc211 --- /dev/null +++ b/libs/langchain-community/src/llms/replicate.ts @@ -0,0 +1,160 @@ +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface defining the structure of the input data for the Replicate + * class. It includes details about the model to be used, any additional + * input parameters, and the API key for the Replicate service. + */ +export interface ReplicateInput { + // owner/model_name:version + model: `${string}/${string}:${string}`; + + input?: { + // different models accept different inputs + [key: string]: string | number | boolean; + }; + + apiKey?: string; + + /** The key used to pass prompts to the model. */ + promptKey?: string; +} + +/** + * Class responsible for managing the interaction with the Replicate API. + * It handles the API key and model details, makes the actual API calls, + * and converts the API response into a format usable by the rest of the + * LangChain framework. + * @example + * ```typescript + * const model = new Replicate({ + * model: "replicate/flan-t5-xl:3ae0799123a1fe11f8c89fd99632f843fc5f7a761630160521c4253149754523", + * }); + * + * const res = await model.call( + * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" + * ); + * console.log({ res }); + * ``` + */ +export class Replicate extends LLM implements ReplicateInput { + static lc_name() { + return "Replicate"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "REPLICATE_API_TOKEN", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + model: ReplicateInput["model"]; + + input: ReplicateInput["input"]; + + apiKey: string; + + promptKey?: string; + + constructor(fields: ReplicateInput & BaseLLMParams) { + super(fields); + + const apiKey = + fields?.apiKey ?? + getEnvironmentVariable("REPLICATE_API_KEY") ?? // previous environment variable for backwards compatibility + getEnvironmentVariable("REPLICATE_API_TOKEN"); // current environment variable, matching the Python library + + if (!apiKey) { + throw new Error( + "Please set the REPLICATE_API_TOKEN environment variable" + ); + } + + this.apiKey = apiKey; + this.model = fields.model; + this.input = fields.input ?? {}; + this.promptKey = fields.promptKey; + } + + _llmType() { + return "replicate"; + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const imports = await Replicate.imports(); + + const replicate = new imports.Replicate({ + userAgent: "langchain", + auth: this.apiKey, + }); + + if (this.promptKey === undefined) { + const [modelString, versionString] = this.model.split(":"); + const version = await replicate.models.versions.get( + modelString.split("/")[0], + modelString.split("/")[1], + versionString + ); + const openapiSchema = version.openapi_schema; + const inputProperties: { "x-order": number | undefined }[] = + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (openapiSchema as any)?.components?.schemas?.Input?.properties; + if (inputProperties === undefined) { + this.promptKey = "prompt"; + } else { + const sortedInputProperties = Object.entries(inputProperties).sort( + ([_keyA, valueA], [_keyB, valueB]) => { + const orderA = valueA["x-order"] || 0; + const orderB = valueB["x-order"] || 0; + return orderA - orderB; + } + ); + this.promptKey = sortedInputProperties[0][0] ?? "prompt"; + } + } + const output = await this.caller.callWithOptions( + { signal: options.signal }, + () => + replicate.run(this.model, { + input: { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + [this.promptKey!]: prompt, + ...this.input, + }, + }) + ); + + if (typeof output === "string") { + return output; + } else if (Array.isArray(output)) { + return output.join(""); + } else { + // Note this is a little odd, but the output format is not consistent + // across models, so it makes some amount of sense. + return String(output); + } + } + + /** @ignore */ + static async imports(): Promise<{ + Replicate: typeof import("replicate").default; + }> { + try { + const { default: Replicate } = await import("replicate"); + return { Replicate }; + } catch (e) { + throw new Error( + "Please install replicate as a dependency with, e.g. `yarn add replicate`" + ); + } + } +} diff --git a/libs/langchain-community/src/llms/sagemaker_endpoint.ts b/libs/langchain-community/src/llms/sagemaker_endpoint.ts new file mode 100644 index 000000000000..a8f63140d6f5 --- /dev/null +++ b/libs/langchain-community/src/llms/sagemaker_endpoint.ts @@ -0,0 +1,287 @@ +import { + InvokeEndpointCommand, + InvokeEndpointWithResponseStreamCommand, + SageMakerRuntimeClient, + SageMakerRuntimeClientConfig, +} from "@aws-sdk/client-sagemaker-runtime"; +import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; +import { GenerationChunk } from "@langchain/core/outputs"; +import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; + +/** + * A handler class to transform input from LLM to a format that SageMaker + * endpoint expects. Similarily, the class also handles transforming output from + * the SageMaker endpoint to a format that LLM class expects. + * + * Example: + * ``` + * class ContentHandler implements ContentHandlerBase { + * contentType = "application/json" + * accepts = "application/json" + * + * transformInput(prompt: string, modelKwargs: Record) { + * const inputString = JSON.stringify({ + * prompt, + * ...modelKwargs + * }) + * return Buffer.from(inputString) + * } + * + * transformOutput(output: Uint8Array) { + * const responseJson = JSON.parse(Buffer.from(output).toString("utf-8")) + * return responseJson[0].generated_text + * } + * + * } + * ``` + */ +export abstract class BaseSageMakerContentHandler { + contentType = "text/plain"; + + accepts = "text/plain"; + + /** + * Transforms the prompt and model arguments into a specific format for sending to SageMaker. + * @param {InputType} prompt The prompt to be transformed. + * @param {Record} modelKwargs Additional arguments. + * @returns {Promise} A promise that resolves to the formatted data for sending. + */ + abstract transformInput( + prompt: InputType, + modelKwargs: Record + ): Promise; + + /** + * Transforms SageMaker output into a desired format. + * @param {Uint8Array} output The raw output from SageMaker. + * @returns {Promise} A promise that resolves to the transformed data. + */ + abstract transformOutput(output: Uint8Array): Promise; +} + +export type SageMakerLLMContentHandler = BaseSageMakerContentHandler< + string, + string +>; + +/** + * The SageMakerEndpointInput interface defines the input parameters for + * the SageMakerEndpoint class, which includes the endpoint name, client + * options for the SageMaker client, the content handler, and optional + * keyword arguments for the model and the endpoint. + */ +export interface SageMakerEndpointInput extends BaseLLMParams { + /** + * The name of the endpoint from the deployed SageMaker model. Must be unique + * within an AWS Region. + */ + endpointName: string; + /** + * Options passed to the SageMaker client. + */ + clientOptions: SageMakerRuntimeClientConfig; + /** + * Key word arguments to pass to the model. + */ + modelKwargs?: Record; + /** + * Optional attributes passed to the InvokeEndpointCommand + */ + endpointKwargs?: Record; + /** + * The content handler class that provides an input and output transform + * functions to handle formats between LLM and the endpoint. + */ + contentHandler: SageMakerLLMContentHandler; + streaming?: boolean; +} + +/** + * The SageMakerEndpoint class is used to interact with SageMaker + * Inference Endpoint models. It uses the AWS client for authentication, + * which automatically loads credentials. + * If a specific credential profile is to be used, the name of the profile + * from the ~/.aws/credentials file must be passed. The credentials or + * roles used should have the required policies to access the SageMaker + * endpoint. + */ +export class SageMakerEndpoint extends LLM { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + static lc_name() { + return "SageMakerEndpoint"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + "clientOptions.credentials.accessKeyId": "AWS_ACCESS_KEY_ID", + "clientOptions.credentials.secretAccessKey": "AWS_SECRET_ACCESS_KEY", + "clientOptions.credentials.sessionToken": "AWS_SESSION_TOKEN", + }; + } + + endpointName: string; + + modelKwargs?: Record; + + endpointKwargs?: Record; + + client: SageMakerRuntimeClient; + + contentHandler: SageMakerLLMContentHandler; + + streaming: boolean; + + constructor(fields: SageMakerEndpointInput) { + super(fields); + + if (!fields.clientOptions.region) { + throw new Error( + `Please pass a "clientOptions" object with a "region" field to the constructor` + ); + } + + const endpointName = fields?.endpointName; + if (!endpointName) { + throw new Error(`Please pass an "endpointName" field to the constructor`); + } + + const contentHandler = fields?.contentHandler; + if (!contentHandler) { + throw new Error( + `Please pass a "contentHandler" field to the constructor` + ); + } + + this.endpointName = fields.endpointName; + this.contentHandler = fields.contentHandler; + this.endpointKwargs = fields.endpointKwargs; + this.modelKwargs = fields.modelKwargs; + this.streaming = fields.streaming ?? false; + this.client = new SageMakerRuntimeClient(fields.clientOptions); + } + + _llmType() { + return "sagemaker_endpoint"; + } + + /** + * Calls the SageMaker endpoint and retrieves the result. + * @param {string} prompt The input prompt. + * @param {this["ParsedCallOptions"]} options Parsed call options. + * @param {CallbackManagerForLLMRun} runManager Optional run manager. + * @returns {Promise} A promise that resolves to the generated string. + */ + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + return this.streaming + ? await this.streamingCall(prompt, options, runManager) + : await this.noStreamingCall(prompt, options); + } + + private async streamingCall( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): Promise { + const chunks = []; + for await (const chunk of this._streamResponseChunks( + prompt, + options, + runManager + )) { + chunks.push(chunk.text); + } + return chunks.join(""); + } + + private async noStreamingCall( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const body = await this.contentHandler.transformInput( + prompt, + this.modelKwargs ?? {} + ); + const { contentType, accepts } = this.contentHandler; + + const response = await this.caller.call(() => + this.client.send( + new InvokeEndpointCommand({ + EndpointName: this.endpointName, + Body: body, + ContentType: contentType, + Accept: accepts, + ...this.endpointKwargs, + }), + { abortSignal: options.signal } + ) + ); + + if (response.Body === undefined) { + throw new Error("Inference result missing Body"); + } + return this.contentHandler.transformOutput(response.Body); + } + + /** + * Streams response chunks from the SageMaker endpoint. + * @param {string} prompt The input prompt. + * @param {this["ParsedCallOptions"]} options Parsed call options. + * @returns {AsyncGenerator} An asynchronous generator yielding generation chunks. + */ + async *_streamResponseChunks( + prompt: string, + options: this["ParsedCallOptions"], + runManager?: CallbackManagerForLLMRun + ): AsyncGenerator { + const body = await this.contentHandler.transformInput( + prompt, + this.modelKwargs ?? {} + ); + const { contentType, accepts } = this.contentHandler; + + const stream = await this.caller.call(() => + this.client.send( + new InvokeEndpointWithResponseStreamCommand({ + EndpointName: this.endpointName, + Body: body, + ContentType: contentType, + Accept: accepts, + ...this.endpointKwargs, + }), + { abortSignal: options.signal } + ) + ); + + if (!stream.Body) { + throw new Error("Inference result missing Body"); + } + + for await (const chunk of stream.Body) { + if (chunk.PayloadPart && chunk.PayloadPart.Bytes) { + const text = await this.contentHandler.transformOutput( + chunk.PayloadPart.Bytes + ); + yield new GenerationChunk({ + text, + generationInfo: { + ...chunk, + response: undefined, + }, + }); + await runManager?.handleLLMNewToken(text); + } else if (chunk.InternalStreamFailure) { + throw new Error(chunk.InternalStreamFailure.message); + } else if (chunk.ModelStreamError) { + throw new Error(chunk.ModelStreamError.message); + } + } + } +} diff --git a/libs/langchain-community/src/llms/tests/ai21.int.test.ts b/libs/langchain-community/src/llms/tests/ai21.int.test.ts new file mode 100644 index 000000000000..a922bb68a33b --- /dev/null +++ b/libs/langchain-community/src/llms/tests/ai21.int.test.ts @@ -0,0 +1,51 @@ +import { test, describe, expect } from "@jest/globals"; +import { AI21 } from "../ai21.js"; + +describe.skip("AI21", () => { + test("test call", async () => { + const ai21 = new AI21({}); + const result = await ai21.call( + "What is a good name for a company that makes colorful socks?" + ); + console.log({ result }); + }); + + test("test translation call", async () => { + const ai21 = new AI21({}); + const result = await ai21.call( + `Translate "I love programming" into German.` + ); + console.log({ result }); + }); + + test("test JSON output call", async () => { + const ai21 = new AI21({}); + const result = await ai21.call( + `Output a JSON object with three string fields: "name", "birthplace", "bio".` + ); + console.log({ result }); + }); + + test("should abort the request", async () => { + const ai21 = new AI21({}); + const controller = new AbortController(); + + await expect(() => { + const ret = ai21.call("Respond with an extremely verbose response", { + signal: controller.signal, + }); + controller.abort(); + return ret; + }).rejects.toThrow("AbortError: This operation was aborted"); + }); + + test("throws an error when response status is not ok", async () => { + const ai21 = new AI21({ + ai21ApiKey: "BAD_KEY", + }); + + await expect(ai21.call("Test prompt")).rejects.toThrow( + "AI21 call failed with status code 401" + ); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts b/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts new file mode 100644 index 000000000000..203e5ca3a12f --- /dev/null +++ b/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts @@ -0,0 +1,54 @@ +import { test, describe, expect } from "@jest/globals"; +import { AlephAlpha } from "../aleph_alpha.js"; + +describe("Aleph Alpha", () => { + test("test call", async () => { + const aleph_alpha = new AlephAlpha({}); + const result = await aleph_alpha.call( + "What is a good name for a company that makes colorful socks?" + ); + console.log({ result }); + }); + + test("test translation call", async () => { + const aleph_alpha = new AlephAlpha({}); + const result = await aleph_alpha.call( + `Translate "I love programming" into German.` + ); + console.log({ result }); + }); + + test("test JSON output call", async () => { + const aleph_alpha = new AlephAlpha({}); + const result = await aleph_alpha.call( + `Output a JSON object with three string fields: "name", "birthplace", "bio".` + ); + console.log({ result }); + }); + + test("should abort the request", async () => { + const aleph_alpha = new AlephAlpha({}); + const controller = new AbortController(); + + await expect(() => { + const ret = aleph_alpha.call( + "Respond with an extremely verbose response", + { + signal: controller.signal, + } + ); + controller.abort(); + return ret; + }).rejects.toThrow("AbortError: This operation was aborted"); + }); + + test("throws an error when response status is not ok", async () => { + const aleph_alpha = new AlephAlpha({ + aleph_alpha_api_key: "BAD_KEY", + }); + + await expect(aleph_alpha.call("Test prompt")).rejects.toThrow( + 'Aleph Alpha call failed with status 401 and body {"error":"InvalidToken","code":"UNAUTHENTICATED"}' + ); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/bedrock.int.test.ts b/libs/langchain-community/src/llms/tests/bedrock.int.test.ts new file mode 100644 index 000000000000..feb2e133028a --- /dev/null +++ b/libs/langchain-community/src/llms/tests/bedrock.int.test.ts @@ -0,0 +1,182 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +import { test, expect } from "@jest/globals"; +import { Bedrock } from "../bedrock/index.js"; + +test("Test Bedrock LLM: AI21", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "ai21.j2-grande-instruct"; + const prompt = "Human: What is your name?"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const res = await bedrock.call(prompt); + expect(typeof res).toBe("string"); + + console.log(res); +}); + +test.skip("Test Bedrock LLM: Meta Llama2", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "meta.llama2-13b-chat-v1"; + const prompt = "Human: What is your name?"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const res = await bedrock.call(prompt); + expect(typeof res).toBe("string"); + + console.log(res); +}); + +test.skip("Test Bedrock LLM streaming: Meta Llama2", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "meta.llama2-13b-chat-v1"; + const prompt = "What is your name?"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const stream = await bedrock.stream(prompt); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test("Test Bedrock LLM: Claude-v2", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "anthropic.claude-v2"; + const prompt = "Human: What is your name?\n\nAssistant:"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const res = await bedrock.call(prompt); + expect(typeof res).toBe("string"); + console.log(res); +}); + +test("Test Bedrock LLM streaming: AI21", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "ai21.j2-grande-instruct"; + const prompt = "Human: What is your name?"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const stream = await bedrock.stream(prompt); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toEqual(1); +}); + +test("Test Bedrock LLM handleLLMNewToken: Claude-v2", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "anthropic.claude-v2"; + const prompt = "Human: What is your name?\n\nAssistant:"; + const tokens: string[] = []; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + streaming: true, + callbacks: [ + { + handleLLMNewToken(token) { + tokens.push(token); + }, + }, + ], + }); + + const stream = await bedrock.call(prompt); + expect(tokens.length).toBeGreaterThan(1); + expect(stream).toEqual(tokens.join("")); +}); + +test("Test Bedrock LLM streaming: Claude-v2", async () => { + const region = process.env.BEDROCK_AWS_REGION!; + const model = "anthropic.claude-v2"; + const prompt = "Human: What is your name?\n\nAssistant:"; + + const bedrock = new Bedrock({ + maxTokens: 20, + region, + model, + maxRetries: 0, + credentials: { + accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, + sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, + }, + }); + + const stream = await bedrock.stream(prompt); + const chunks = []; + for await (const chunk of stream) { + console.log(chunk); + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts new file mode 100644 index 000000000000..6f64c199888c --- /dev/null +++ b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts @@ -0,0 +1,50 @@ +import { test } from "@jest/globals"; +import { CloudflareWorkersAI } from "../cloudflare_workersai.js"; +import { getEnvironmentVariable } from "../../util/env.js"; + +test("Test CloudflareWorkersAI", async () => { + const model = new CloudflareWorkersAI({}); + const res = await model.call("1 + 1 ="); + console.log(res); +}, 50000); + +test("generate with streaming true", async () => { + const model = new CloudflareWorkersAI({ + streaming: true, + }); + const tokens: string[] = []; + const res = await model.call("What is 2 + 2?", { + callbacks: [ + { + handleLLMNewToken: (token) => { + console.log(token); + tokens.push(token); + }, + }, + ], + }); + expect(tokens.length).toBeGreaterThan(1); + expect(tokens.join("")).toEqual(res); +}); + +test("Test CloudflareWorkersAI streaming", async () => { + const model = new CloudflareWorkersAI({}); + const stream = await model.stream("What is 2 + 2?"); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + console.log(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + console.log(chunks.join("")); +}, 50000); + +test.skip("Test custom base url", async () => { + const model = new CloudflareWorkersAI({ + baseUrl: `https://gateway.ai.cloudflare.com/v1/${getEnvironmentVariable( + "CLOUDFLARE_ACCOUNT_ID" + )}/lang-chainjs/workers-ai/`, + }); + const res = await model.call("1 + 1 ="); + console.log(res); +}); diff --git a/libs/langchain-community/src/llms/tests/cohere.int.test.ts b/libs/langchain-community/src/llms/tests/cohere.int.test.ts new file mode 100644 index 000000000000..2964d63804e2 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/cohere.int.test.ts @@ -0,0 +1,8 @@ +import { test } from "@jest/globals"; +import { Cohere } from "../cohere.js"; + +test("Test Cohere", async () => { + const model = new Cohere({ maxTokens: 20 }); + const res = await model.call("1 + 1 ="); + console.log(res); +}, 50000); diff --git a/libs/langchain-community/src/llms/tests/fake.test.ts b/libs/langchain-community/src/llms/tests/fake.test.ts new file mode 100644 index 000000000000..3fc53c0cf1b8 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/fake.test.ts @@ -0,0 +1,98 @@ +import { describe, test, expect, jest } from "@jest/globals"; +import { FakeListLLM } from "../fake.js"; + +describe("Test FakeListLLM", () => { + test("Should exist", async () => { + const llm = new FakeListLLM({ responses: ["test response"] }); + const response = await llm.call("test prompt"); + + expect(typeof response).toBe("string"); + }); + + test("Should return responses in order", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + }); + const response1 = await llm.call("test prompt"); + const response2 = await llm.call("test prompt"); + + expect(response1).toBe("test response 1"); + expect(response2).toBe("test response 2"); + }); + + test("Should reset index when all responses have been returned", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + }); + const response1 = await llm.call("test prompt"); + const response2 = await llm.call("test prompt"); + const response3 = await llm.call("test prompt"); + + expect(response1).toBe("test response 1"); + expect(response2).toBe("test response 2"); + expect(response3).toBe("test response 1"); + }); + + test("Should return responses after sleep if requested", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + sleep: 10, + }); + const sleepSpy = jest.spyOn(llm, "_sleep"); + + await llm.call("test prompt"); + + expect(sleepSpy).toHaveBeenCalledTimes(1); + }, 3000); + + test("Should stream responses if requested", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + }); + const chunks = []; + + const response = await llm.stream("test prompt"); + for await (const chunk of response) { + chunks.push(chunk); + } + + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.join("")).toBe("test response 1"); + }); + + test("Should return responses in order when streaming", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + }); + const chunks1 = []; + const chunks2 = []; + + const response1 = await llm.stream("test prompt"); + for await (const chunk of response1) { + chunks1.push(chunk); + } + const response2 = await llm.stream("test prompt"); + for await (const chunk of response2) { + chunks2.push(chunk); + } + + expect(chunks1.join("")).toBe("test response 1"); + expect(chunks2.join("")).toBe("test response 2"); + }); + + test("Should stream responses after sleep if requested", async () => { + const llm = new FakeListLLM({ + responses: ["test response 1", "test response 2"], + sleep: 10, + }); + const sleepSpy = jest.spyOn(llm, "_sleep"); + const chunks = []; + + const response = await llm.stream("test prompt"); + for await (const chunk of response) { + chunks.push(chunk); + } + + expect(sleepSpy).toHaveBeenCalledTimes(chunks.length); + }, 3000); +}); diff --git a/libs/langchain-community/src/llms/tests/fireworks.int.test.ts b/libs/langchain-community/src/llms/tests/fireworks.int.test.ts new file mode 100644 index 000000000000..9eb2f604288f --- /dev/null +++ b/libs/langchain-community/src/llms/tests/fireworks.int.test.ts @@ -0,0 +1,20 @@ +import { test, expect } from "@jest/globals"; +import { Fireworks } from "../fireworks.js"; + +describe("Fireworks", () => { + test("call", async () => { + const model = new Fireworks({ maxTokens: 50 }); + const res = await model.call("1 + 1 = "); + console.log({ res }); + }); + + test("generate", async () => { + const model = new Fireworks({ maxTokens: 50 }); + const res = await model.generate(["1 + 1 = "]); + console.log(JSON.stringify(res, null, 2)); + + await expect( + async () => await model.generate(["1 + 1 = ", "2 + 2 = "]) + ).rejects.toThrow(); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts b/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts new file mode 100644 index 000000000000..653d955db68a --- /dev/null +++ b/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts @@ -0,0 +1,32 @@ +import { test } from "@jest/globals"; +import { GooglePaLM } from "../googlepalm.js"; + +test.skip("Test Google Palm", async () => { + const model = new GooglePaLM(); + const res = await model.call("what is 1 + 1?"); + console.log({ res }); + expect(res).toBeTruthy(); +}); + +test.skip("Test Google Palm generation", async () => { + const model = new GooglePaLM(); + const res = await model.generate(["what is 1 + 1?"]); + console.log(JSON.stringify(res, null, 2)); + expect(res).toBeTruthy(); +}); + +test.skip("Test Google Palm generation", async () => { + const model = new GooglePaLM(); + const res = await model.generate(["Print hello world."]); + console.log(JSON.stringify(res, null, 2)); + expect(res).toBeTruthy(); +}); + +test.skip("Test Google Palm generation", async () => { + const model = new GooglePaLM(); + const res = await model.generate([ + `Translate "I love programming" into Korean.`, + ]); + console.log(JSON.stringify(res, null, 2)); + expect(res).toBeTruthy(); +}); diff --git a/libs/langchain-community/src/llms/tests/googlepalm.test.ts b/libs/langchain-community/src/llms/tests/googlepalm.test.ts new file mode 100644 index 000000000000..ac28f763b40a --- /dev/null +++ b/libs/langchain-community/src/llms/tests/googlepalm.test.ts @@ -0,0 +1,75 @@ +import { test } from "@jest/globals"; +import { GooglePaLM } from "../googlepalm.js"; + +test("Google Palm - `temperature` must be in range [0.0,1.0]", async () => { + expect( + () => + new GooglePaLM({ + temperature: -1.0, + }) + ).toThrow(); + expect( + () => + new GooglePaLM({ + temperature: 1.1, + }) + ).toThrow(); +}); + +test("Google Palm - `maxOutputTokens` must be positive", async () => { + expect( + () => + new GooglePaLM({ + maxOutputTokens: -1, + }) + ).toThrow(); +}); + +test("Google Palm - `topP` must be positive", async () => { + expect( + () => + new GooglePaLM({ + topP: -1, + }) + ).toThrow(); +}); + +test("Google Palm - `topP` must be in the range [0,1]", async () => { + expect( + () => + new GooglePaLM({ + topP: 3, + }) + ).toThrow(); +}); + +test("Google Palm - `topK` must be positive", async () => { + expect( + () => + new GooglePaLM({ + topK: -1, + }) + ).toThrow(); +}); + +test("Google Palm - `safetySettings` category array must be unique", async () => { + expect( + () => + new GooglePaLM({ + safetySettings: [ + { + category: "HARM_CATEGORY_DANGEROUS", + threshold: 1, + }, + { + category: "HARM_CATEGORY_DANGEROUS", + threshold: 2, + }, + { + category: "HARM_CATEGORY_DEROGATORY", + threshold: 1, + }, + ], + }) + ).toThrow(); +}); diff --git a/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts b/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts new file mode 100644 index 000000000000..8d601101d290 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts @@ -0,0 +1,78 @@ +import { expect, test } from "@jest/globals"; +import { GoogleVertexAI } from "../googlevertexai/index.js"; + +describe("Vertex AI", () => { + test("Test Google Vertex", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.call("1 + 1 = "); + console.log({ res }); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate(["1 + 1 = "]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate(["Print hello world."]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate([ + `Translate "I love programming" into Korean.`, + ]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex Codey gecko model", async () => { + const model = new GoogleVertexAI({ model: "code-gecko" }); + expect(model.model).toEqual("code-gecko"); + expect(model.temperature).toEqual(0.2); + expect(model.maxOutputTokens).toEqual(64); + + const res = await model.call("for( let co = 0"); + console.log(res); + }); + + test("Test Google Vertex Codey bison model", async () => { + const model = new GoogleVertexAI({ + model: "code-bison", + maxOutputTokens: 2048, + }); + expect(model.model).toEqual("code-bison"); + + const res = await model.call("Count to 10 in JavaScript."); + console.log(res); + }); + + test("Test Google Vertex bison-32k model", async () => { + const model = new GoogleVertexAI({ + model: "text-bison-32k", + maxOutputTokens: 50, + }); + const res = await model.call("1 + 1 = "); + console.log({ res }); + }); + + test("streaming text", async () => { + const model = new GoogleVertexAI({ + model: "text-bison", + maxOutputTokens: 2048, + }); + + const stream = await model.stream( + "What is the answer to life, the universe, and everything. Be Verbose." + ); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + console.log("chunk", chunk); + } + expect(chunks.length).toBeGreaterThan(1); + expect(chunks[chunks.length - 1]).toEqual(""); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts b/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts new file mode 100644 index 000000000000..553c47d75106 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts @@ -0,0 +1,77 @@ +import { expect, test } from "@jest/globals"; +import { GoogleVertexAI } from "../googlevertexai/web.js"; + +describe("Web Vertex AI", () => { + test("Test Google Vertex", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.call("1 + 1 = "); + console.log({ res }); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate(["1 + 1 = "]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate(["Print hello world."]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex generation", async () => { + const model = new GoogleVertexAI({ maxOutputTokens: 50 }); + const res = await model.generate([ + `Translate "I love programming" into Korean.`, + ]); + console.log(JSON.stringify(res, null, 2)); + }); + + test("Test Google Vertex Codey gecko model", async () => { + const model = new GoogleVertexAI({ model: "code-gecko" }); + expect(model.model).toEqual("code-gecko"); + expect(model.temperature).toEqual(0.2); + expect(model.maxOutputTokens).toEqual(64); + + const res = await model.call("for( let co = 0"); + console.log(res); + }); + + test("Test Google Vertex Codey bison model", async () => { + const model = new GoogleVertexAI({ + model: "code-bison", + maxOutputTokens: 2048, + }); + expect(model.model).toEqual("code-bison"); + + const res = await model.call("Count to 10 in JavaScript."); + console.log(res); + }); + + test("Test Google Vertex bison-32k model", async () => { + const model = new GoogleVertexAI({ + model: "text-bison-32k", + maxOutputTokens: 50, + }); + const res = await model.call("1 + 1 = "); + console.log({ res }); + }); + + test("Test Google Vertex stream returns one chunk", async () => { + const model = new GoogleVertexAI({ + model: "text-bison", + maxOutputTokens: 2048, + }); + + const stream = await model.stream( + "What is the answer to life, the universe, and everything?" + ); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + console.log(chunk); + } + expect(chunks.length).toBeGreaterThan(1); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts b/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts new file mode 100644 index 000000000000..f2d061bb339b --- /dev/null +++ b/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts @@ -0,0 +1,8 @@ +import { test } from "@jest/globals"; +import { HuggingFaceInference } from "../hf.js"; + +test("Test HuggingFace", async () => { + const model = new HuggingFaceInference({ temperature: 0.1, topP: 0.5 }); + const res = await model.call("1 + 1 ="); + console.log(res); +}, 50000); diff --git a/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts b/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts new file mode 100644 index 000000000000..d0fe6cc4268e --- /dev/null +++ b/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts @@ -0,0 +1,47 @@ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { test } from "@jest/globals"; +import { getEnvironmentVariable } from "../../util/env.js"; +import { LlamaCpp } from "../llama_cpp.js"; + +const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; + +test.skip("Test Llama_CPP", async () => { + const model = new LlamaCpp({ modelPath: llamaPath }); + const res = await model.call("Where do Llamas live?"); + console.log(res); +}, 100000); + +test.skip("Test Llama_CPP", async () => { + const model = new LlamaCpp({ modelPath: llamaPath }); + const res = await model.call("Where do Pandas live?"); + console.log(res); +}, 100000); + +test.skip("Test Llama_CPP", async () => { + const model = new LlamaCpp({ modelPath: llamaPath }); + + // Attempt to make several queries and make sure that the system prompt + // is not returned as part of any follow-on query. + for (let i = 0; i < 5; i += 1) { + const res = await model.call("Where do Pandas live?"); + expect(res).not.toContain( + "You are a helpful, respectful and honest assistant." + ); + } +}, 100000); + +test.skip("Test Llama_CPP", async () => { + const model = new LlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); + + const stream = await model.stream( + "Tell me a short story about a happy Llama." + ); + + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + process.stdout.write(chunks.join("")); + } + + expect(chunks.length).toBeGreaterThan(1); +}); diff --git a/libs/langchain-community/src/llms/tests/ollama.int.test.ts b/libs/langchain-community/src/llms/tests/ollama.int.test.ts new file mode 100644 index 000000000000..d63d3cc0dbd4 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/ollama.int.test.ts @@ -0,0 +1,113 @@ +import { test } from "@jest/globals"; +import { Ollama } from "../ollama.js"; +import { PromptTemplate } from "../../prompts/prompt.js"; +import { + BytesOutputParser, + StringOutputParser, +} from "../../schema/output_parser.js"; + +test.skip("test call", async () => { + const ollama = new Ollama({}); + const result = await ollama.call( + "What is a good name for a company that makes colorful socks?" + ); + console.log({ result }); +}); + +test.skip("test call with callback", async () => { + const ollama = new Ollama({ + baseUrl: "http://localhost:11434", + }); + const tokens: string[] = []; + const result = await ollama.predict( + "What is a good name for a company that makes colorful socks?", + { + callbacks: [ + { + handleLLMNewToken(token) { + tokens.push(token); + }, + }, + ], + } + ); + expect(tokens.length).toBeGreaterThan(1); + expect(result).toEqual(tokens.join("")); +}); + +test.skip("test streaming call", async () => { + const ollama = new Ollama({ + baseUrl: "http://localhost:11434", + }); + const stream = await ollama.stream( + `Translate "I love programming" into German.` + ); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + console.log(chunks.join("")); + expect(chunks.length).toBeGreaterThan(1); +}); + +test.skip("should abort the request", async () => { + const ollama = new Ollama({ + baseUrl: "http://localhost:11434", + }); + const controller = new AbortController(); + + await expect(() => { + const ret = ollama.call("Respond with an extremely verbose response", { + signal: controller.signal, + }); + controller.abort(); + return ret; + }).rejects.toThrow("This operation was aborted"); +}); + +test.skip("should stream through with a bytes output parser", async () => { + const TEMPLATE = `You are a pirate named Patchy. All responses must be extremely verbose and in pirate dialect. + + User: {input} + AI:`; + + const prompt = PromptTemplate.fromTemplate(TEMPLATE); + + const ollama = new Ollama({ + model: "llama2", + baseUrl: "http://127.0.0.1:11434", + }); + const outputParser = new BytesOutputParser(); + const chain = prompt.pipe(ollama).pipe(outputParser); + const stream = await chain.stream({ + input: `Translate "I love programming" into German.`, + }); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + console.log(chunks.join("")); + expect(chunks.length).toBeGreaterThan(1); +}); + +test.skip("JSON mode", async () => { + const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value. + + User: {input} + AI:`; + + // Infer the input variables from the template + const prompt = PromptTemplate.fromTemplate(TEMPLATE); + + const ollama = new Ollama({ + model: "llama2", + baseUrl: "http://127.0.0.1:11434", + format: "json", + }); + const outputParser = new StringOutputParser(); + const chain = prompt.pipe(ollama).pipe(outputParser); + const res = await chain.invoke({ + input: `Translate "I love programming" into German.`, + }); + expect(JSON.parse(res).response).toBeDefined(); +}); diff --git a/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts b/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts new file mode 100644 index 000000000000..43f737c552b8 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts @@ -0,0 +1,142 @@ +import { expect, test } from "@jest/globals"; +import { OpenAIChat } from "../openai-chat.js"; +import { CallbackManager } from "../../callbacks/index.js"; + +test("Test OpenAI", async () => { + const model = new OpenAIChat({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); + const res = await model.call("Print hello world"); + console.log({ res }); +}); + +test("Test OpenAI with prefix messages", async () => { + const model = new OpenAIChat({ + prefixMessages: [ + { role: "user", content: "My name is John" }, + { role: "assistant", content: "Hi there" }, + ], + maxTokens: 10, + }); + const res = await model.call("What is my name"); + console.log({ res }); +}); + +test("Test OpenAI in streaming mode", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new OpenAIChat({ + maxTokens: 10, + modelName: "gpt-3.5-turbo", + streaming: true, + callbackManager: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }), + }); + const res = await model.call("Print hello world"); + console.log({ res }); + + expect(nrNewTokens > 0).toBe(true); + expect(res).toBe(streamedCompletion); +}, 30000); + +test("Test OpenAI with stop", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + const res = await model.call("Print hello world", ["world"]); + console.log({ res }); +}); + +test("Test OpenAI with stop in object", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + const res = await model.call("Print hello world", { stop: ["world"] }); + console.log({ res }); +}); + +test("Test OpenAI with timeout in call options", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + await expect(() => + model.call("Print hello world", { + timeout: 10, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with timeout in call options and node adapter", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + await expect(() => + model.call("Print hello world", { + timeout: 10, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call("Print hello world", { + signal: controller.signal, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options and node adapter", async () => { + const model = new OpenAIChat({ maxTokens: 5 }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call("Print hello world", { + signal: controller.signal, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test("Test OpenAIChat stream method", async () => { + const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream("Print hello world."); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + console.log(chunks); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test("Test OpenAIChat stream method with abort", async () => { + await expect(async () => { + const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream( + "How is your day going? Be extremely verbose.", + { + signal: AbortSignal.timeout(1000), + } + ); + for await (const chunk of stream) { + console.log(chunk); + } + }).rejects.toThrow(); +}); + +test("Test OpenAIChat stream method with early break", async () => { + const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); + const stream = await model.stream( + "How is your day going? Be extremely verbose." + ); + let i = 0; + for await (const chunk of stream) { + console.log(chunk); + i += 1; + if (i > 5) { + break; + } + } +}); diff --git a/libs/langchain-community/src/llms/tests/openai.int.test.ts b/libs/langchain-community/src/llms/tests/openai.int.test.ts new file mode 100644 index 000000000000..53421b1ff326 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/openai.int.test.ts @@ -0,0 +1,310 @@ +import { test, expect } from "@jest/globals"; +import { LLMResult } from "../../schema/index.js"; +import { OpenAIChat } from "../openai-chat.js"; +import { OpenAI } from "../openai.js"; +import { StringPromptValue } from "../../prompts/index.js"; +import { CallbackManager } from "../../callbacks/index.js"; +import { NewTokenIndices } from "../../callbacks/base.js"; + +test("Test OpenAI", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const res = await model.call("Print hello world"); + console.log({ res }); +}); + +test("Test OpenAI with stop", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const res = await model.call("Print hello world", ["world"]); + console.log({ res }); +}); + +test("Test OpenAI with stop in object", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const res = await model.call("Print hello world", { stop: ["world"] }); + console.log({ res }); +}); + +test("Test OpenAI with timeout in call options", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + await expect(() => + model.call("Print hello world", { + timeout: 10, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with timeout in call options and node adapter", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + await expect(() => + model.call("Print hello world", { + timeout: 10, + }) + ).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call("Print hello world", { + signal: controller.signal, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with signal in call options and node adapter", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const controller = new AbortController(); + await expect(() => { + const ret = model.call("Print hello world", { + signal: controller.signal, + }); + + controller.abort(); + + return ret; + }).rejects.toThrow(); +}, 5000); + +test("Test OpenAI with concurrency == 1", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + maxConcurrency: 1, + }); + const res = await Promise.all([ + model.call("Print hello world"), + model.call("Print hello world"), + ]); + console.log({ res }); +}); + +test("Test OpenAI with maxTokens -1", async () => { + const model = new OpenAI({ + maxTokens: -1, + modelName: "gpt-3.5-turbo-instruct", + }); + const res = await model.call("Print hello world", ["world"]); + console.log({ res }); +}); + +test("Test OpenAI with chat model returns OpenAIChat", async () => { + const model = new OpenAI({ modelName: "gpt-3.5-turbo" }); + expect(model).toBeInstanceOf(OpenAIChat); + const res = await model.call("Print hello world"); + console.log({ res }); + expect(typeof res).toBe("string"); +}); + +test("Test OpenAI with instruct model returns OpenAI", async () => { + const model = new OpenAI({ modelName: "gpt-3.5-turbo-instruct" }); + expect(model).toBeInstanceOf(OpenAI); + const res = await model.call("Print hello world"); + console.log({ res }); + expect(typeof res).toBe("string"); +}); + +test("Test OpenAI with versioned instruct model returns OpenAI", async () => { + const model = new OpenAI({ modelName: "gpt-3.5-turbo-instruct-0914" }); + expect(model).toBeInstanceOf(OpenAI); + const res = await model.call("Print hello world"); + console.log({ res }); + expect(typeof res).toBe("string"); +}); + +test("Test ChatOpenAI tokenUsage", async () => { + let tokenUsage = { + completionTokens: 0, + promptTokens: 0, + totalTokens: 0, + }; + + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + callbackManager: CallbackManager.fromHandlers({ + async handleLLMEnd(output: LLMResult) { + tokenUsage = output.llmOutput?.tokenUsage; + }, + }), + }); + const res = await model.call("Hello"); + console.log({ res }); + + expect(tokenUsage.promptTokens).toBe(1); +}); + +test("Test OpenAI in streaming mode", async () => { + let nrNewTokens = 0; + let streamedCompletion = ""; + + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + streaming: true, + callbacks: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string) { + nrNewTokens += 1; + streamedCompletion += token; + }, + }), + }); + const res = await model.call("Print hello world"); + console.log({ res }); + + expect(nrNewTokens > 0).toBe(true); + expect(res).toBe(streamedCompletion); +}); + +test("Test OpenAI in streaming mode with multiple prompts", async () => { + let nrNewTokens = 0; + const completions = [ + ["", ""], + ["", ""], + ]; + + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + streaming: true, + n: 2, + callbacks: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string, idx: NewTokenIndices) { + nrNewTokens += 1; + completions[idx.prompt][idx.completion] += token; + }, + }), + }); + const res = await model.generate(["Print hello world", "print hello sea"]); + console.log( + res.generations, + res.generations.map((g) => g[0].generationInfo) + ); + + expect(nrNewTokens > 0).toBe(true); + expect(res.generations.length).toBe(2); + expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( + completions + ); +}); + +test("Test OpenAIChat in streaming mode with multiple prompts", async () => { + let nrNewTokens = 0; + const completions = [[""], [""]]; + + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo", + streaming: true, + n: 1, + callbacks: CallbackManager.fromHandlers({ + async handleLLMNewToken(token: string, idx: NewTokenIndices) { + nrNewTokens += 1; + completions[idx.prompt][idx.completion] += token; + }, + }), + }); + const res = await model.generate(["Print hello world", "print hello sea"]); + console.log( + res.generations, + res.generations.map((g) => g[0].generationInfo) + ); + + expect(nrNewTokens > 0).toBe(true); + expect(res.generations.length).toBe(2); + expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( + completions + ); +}); + +test("Test OpenAI prompt value", async () => { + const model = new OpenAI({ + maxTokens: 5, + modelName: "gpt-3.5-turbo-instruct", + }); + const res = await model.generatePrompt([ + new StringPromptValue("Print hello world"), + ]); + expect(res.generations.length).toBe(1); + for (const generation of res.generations) { + expect(generation.length).toBe(1); + for (const g of generation) { + console.log(g.text); + } + } + console.log({ res }); +}); + +test("Test OpenAI stream method", async () => { + const model = new OpenAI({ + maxTokens: 50, + modelName: "gpt-3.5-turbo-instruct", + }); + const stream = await model.stream("Print hello world."); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + expect(chunks.length).toBeGreaterThan(1); +}); + +test("Test OpenAI stream method with abort", async () => { + await expect(async () => { + const model = new OpenAI({ + maxTokens: 250, + modelName: "gpt-3.5-turbo-instruct", + }); + const stream = await model.stream( + "How is your day going? Be extremely verbose.", + { + signal: AbortSignal.timeout(1000), + } + ); + for await (const chunk of stream) { + console.log(chunk); + } + }).rejects.toThrow(); +}); + +test("Test OpenAI stream method with early break", async () => { + const model = new OpenAI({ + maxTokens: 50, + modelName: "gpt-3.5-turbo-instruct", + }); + const stream = await model.stream( + "How is your day going? Be extremely verbose." + ); + let i = 0; + for await (const chunk of stream) { + console.log(chunk); + i += 1; + if (i > 5) { + break; + } + } +}); diff --git a/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts b/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts new file mode 100644 index 000000000000..f86b90ce76f6 --- /dev/null +++ b/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts @@ -0,0 +1,95 @@ +import { test, expect } from "@jest/globals"; +import { PromptLayerOpenAI } from "../openai.js"; +import { PromptLayerOpenAIChat } from "../openai-chat.js"; + +import { PromptLayerChatOpenAI } from "../../chat_models/openai.js"; +import { SystemMessage } from "../../schema/index.js"; + +test("Test PromptLayerOpenAI returns promptLayerID if returnPromptLayerId=true", async () => { + const model = new PromptLayerOpenAI({ + maxTokens: 5, + modelName: "text-ada-001", + returnPromptLayerId: true, + }); + const res = await model.generate(["Print hello world"]); + console.log(JSON.stringify({ res }, null, 2)); + + expect( + typeof res.generations[0][0].generationInfo?.promptLayerRequestId + ).toBe("number"); + + const modelB = new PromptLayerOpenAI({ + maxTokens: 5, + modelName: "text-ada-001", + }); + const resB = await modelB.generate(["Print hello world"]); + + expect( + resB.generations[0][0].generationInfo?.promptLayerRequestId + ).toBeUndefined(); +}); + +test("Test PromptLayerOpenAIChat returns promptLayerID if returnPromptLayerId=true", async () => { + const model = new PromptLayerOpenAIChat({ + prefixMessages: [ + { + role: "system", + content: "You are a helpful assistant that answers in pirate language", + }, + ], + maxTokens: 5, + returnPromptLayerId: true, + }); + const res = await model.generate(["Print hello world"]); + + expect( + typeof res.generations[0][0].generationInfo?.promptLayerRequestId + ).toBe("number"); + + const modelB = new PromptLayerOpenAIChat({ + prefixMessages: [ + { + role: "system", + content: "You are a helpful assistant that answers in pirate language", + }, + ], + maxTokens: 5, + }); + const resB = await modelB.generate(["Print hello world"]); + + expect( + resB.generations[0][0].generationInfo?.promptLayerRequestId + ).toBeUndefined(); +}); + +test("Test PromptLayerChatOpenAI returns promptLayerID if returnPromptLayerId=true", async () => { + const chat = new PromptLayerChatOpenAI({ + returnPromptLayerId: true, + }); + + const respA = await chat.generate([ + [ + new SystemMessage( + "You are a helpful assistant that translates English to French." + ), + ], + ]); + + expect( + typeof respA.generations[0][0].generationInfo?.promptLayerRequestId + ).toBe("number"); + + const chatB = new PromptLayerChatOpenAI(); + + const respB = await chatB.generate([ + [ + new SystemMessage( + "You are a helpful assistant that translates English to French." + ), + ], + ]); + + expect( + respB.generations[0][0].generationInfo?.promptLayerRequestId + ).toBeUndefined(); +}); diff --git a/libs/langchain-community/src/llms/tests/replicate.int.test.ts b/libs/langchain-community/src/llms/tests/replicate.int.test.ts new file mode 100644 index 000000000000..c4c389277a4e --- /dev/null +++ b/libs/langchain-community/src/llms/tests/replicate.int.test.ts @@ -0,0 +1,57 @@ +import { test, expect } from "@jest/globals"; +import { Replicate } from "../replicate.js"; + +// Test skipped because Replicate appears to be timing out often when called +test("Test Replicate", async () => { + const model = new Replicate({ + model: + "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", + input: { + max_length: 10, + }, + }); + + const res = await model.call("Hello, my name is "); + + console.log({ res }); + + expect(typeof res).toBe("string"); +}); + +test("Serialise Replicate", () => { + const model = new Replicate({ + model: + "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", + input: { + max_length: 10, + }, + }); + + const serialised = JSON.stringify(model.toJSON()); + + expect(JSON.parse(serialised)).toMatchInlineSnapshot(` + { + "id": [ + "langchain", + "llms", + "replicate", + "Replicate", + ], + "kwargs": { + "api_key": { + "id": [ + "REPLICATE_API_TOKEN", + ], + "lc": 1, + "type": "secret", + }, + "input": { + "max_length": 10, + }, + "model": "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", + }, + "lc": 1, + "type": "constructor", + } + `); +}); diff --git a/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts b/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts new file mode 100644 index 000000000000..ff7d4dd8a96c --- /dev/null +++ b/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts @@ -0,0 +1,133 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { expect, test } from "@jest/globals"; +import { + SageMakerEndpoint, + SageMakerLLMContentHandler, +} from "../sagemaker_endpoint.js"; + +// yarn test:single /{path_to}/langchain/src/llms/tests/sagemaker.int.test.ts +describe.skip("Test SageMaker LLM", () => { + test("without streaming", async () => { + interface ResponseJsonInterface { + generation: { + content: string; + }; + } + + class LLama213BHandler implements SageMakerLLMContentHandler { + contentType = "application/json"; + + accepts = "application/json"; + + async transformInput( + prompt: string, + modelKwargs: Record + ): Promise { + const payload = { + inputs: [[{ role: "user", content: prompt }]], + parameters: modelKwargs, + }; + + const input_str = JSON.stringify(payload); + + return new TextEncoder().encode(input_str); + } + + async transformOutput(output: Uint8Array): Promise { + const response_json = JSON.parse( + new TextDecoder("utf-8").decode(output) + ) as ResponseJsonInterface[]; + const content = response_json[0]?.generation.content ?? ""; + return content; + } + } + + const contentHandler = new LLama213BHandler(); + const model = new SageMakerEndpoint({ + endpointName: "aws-productbot-ai-dev-llama-2-13b-chat", + streaming: false, + modelKwargs: { + temperature: 0.5, + max_new_tokens: 700, + top_p: 0.9, + }, + endpointKwargs: { + CustomAttributes: "accept_eula=true", + }, + contentHandler, + clientOptions: { + region: "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }, + }); + + const response = await model.call( + "hello, my name is John Doe, tell me a fun story about llamas." + ); + + expect(response.length).toBeGreaterThan(0); + }); + + test("with streaming", async () => { + class LLama213BHandler implements SageMakerLLMContentHandler { + contentType = "application/json"; + + accepts = "application/json"; + + async transformInput( + prompt: string, + modelKwargs: Record + ): Promise { + const payload = { + inputs: [[{ role: "user", content: prompt }]], + parameters: modelKwargs, + }; + + const input_str = JSON.stringify(payload); + + return new TextEncoder().encode(input_str); + } + + async transformOutput(output: Uint8Array): Promise { + return new TextDecoder("utf-8").decode(output); + } + } + + const contentHandler = new LLama213BHandler(); + const model = new SageMakerEndpoint({ + endpointName: "aws-productbot-ai-dev-llama-2-13b-chat", + streaming: true, // specify streaming + modelKwargs: { + temperature: 0.5, + max_new_tokens: 700, + top_p: 0.9, + }, + endpointKwargs: { + CustomAttributes: "accept_eula=true", + }, + contentHandler, + clientOptions: { + region: "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }, + }); + + const response = await model.call( + "hello, my name is John Doe, tell me a fun story about llamas in 3 paragraphs" + ); + + const chunks = []; + for await (const chunk of response) { + chunks.push(chunk); + } + + expect(response.length).toBeGreaterThan(0); + }); +}); diff --git a/libs/langchain-community/src/llms/tests/writer.int.test.ts b/libs/langchain-community/src/llms/tests/writer.int.test.ts new file mode 100644 index 000000000000..9c45c789aafe --- /dev/null +++ b/libs/langchain-community/src/llms/tests/writer.int.test.ts @@ -0,0 +1,8 @@ +import { test } from "@jest/globals"; +import { Writer } from "../writer.js"; + +test.skip("Test Writer", async () => { + const model = new Writer({ maxTokens: 20 }); + const res = await model.invoke("1 + 1 ="); + console.log(res); +}, 50000); diff --git a/libs/langchain-community/src/llms/watsonx_ai.ts b/libs/langchain-community/src/llms/watsonx_ai.ts new file mode 100644 index 000000000000..7abf9585f820 --- /dev/null +++ b/libs/langchain-community/src/llms/watsonx_ai.ts @@ -0,0 +1,198 @@ +import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * The WatsonxAIParams interface defines the input parameters for + * the WatsonxAI class. + */ +export interface WatsonxAIParams extends BaseLLMParams { + /** + * WatsonX AI Complete Endpoint. + * Can be used if you want a fully custom endpoint. + */ + endpoint?: string; + /** + * IBM Cloud Compute Region. + * eg. us-south, us-east, etc. + */ + region?: string; + /** + * WatsonX AI Version. + * Date representing the WatsonX AI Version. + * eg. 2023-05-29 + */ + version?: string; + /** + * WatsonX AI Key. + * Provide API Key if you do not wish to automatically pull from env. + */ + ibmCloudApiKey?: string; + /** + * WatsonX AI Key. + * Provide API Key if you do not wish to automatically pull from env. + */ + projectId?: string; + /** + * Parameters accepted by the WatsonX AI Endpoint. + */ + modelParameters?: Record; + /** + * WatsonX AI Model ID. + */ + modelId?: string; +} + +const endpointConstructor = (region: string, version: string) => + `https://${region}.ml.cloud.ibm.com/ml/v1-beta/generation/text?version=${version}`; + +/** + * The WatsonxAI class is used to interact with Watsonx AI + * Inference Endpoint models. It uses IBM Cloud for authentication. + * This requires your IBM Cloud API Key which is autoloaded if not specified. + */ + +export class WatsonxAI extends LLM { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + static lc_name() { + return "WatsonxAI"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + ibmCloudApiKey: "IBM_CLOUD_API_KEY", + projectId: "WATSONX_PROJECT_ID", + }; + } + + endpoint: string; + + region = "us-south"; + + version = "2023-05-29"; + + modelId = "meta-llama/llama-2-70b-chat"; + + modelKwargs?: Record; + + ibmCloudApiKey?: string; + + ibmCloudToken?: string; + + ibmCloudTokenExpiresAt?: number; + + projectId?: string; + + modelParameters?: Record; + + constructor(fields: WatsonxAIParams) { + super(fields); + + this.region = fields?.region ?? this.region; + this.version = fields?.version ?? this.version; + this.modelId = fields?.modelId ?? this.modelId; + this.ibmCloudApiKey = + fields?.ibmCloudApiKey ?? getEnvironmentVariable("IBM_CLOUD_API_KEY"); + this.projectId = + fields?.projectId ?? getEnvironmentVariable("WATSONX_PROJECT_ID"); + + this.endpoint = + fields?.endpoint ?? endpointConstructor(this.region, this.version); + this.modelParameters = fields.modelParameters; + + if (!this.ibmCloudApiKey) { + throw new Error("Missing IBM Cloud API Key"); + } + + if (!this.projectId) { + throw new Error("Missing WatsonX AI Project ID"); + } + } + + _llmType() { + return "watsonx_ai"; + } + + /** + * Calls the WatsonX AI endpoint and retrieves the result. + * @param {string} prompt The input prompt. + * @returns {Promise} A promise that resolves to the generated string. + */ + /** @ignore */ + async _call( + prompt: string, + _options: this["ParsedCallOptions"] + ): Promise { + interface WatsonxAIResponse { + results: { + generated_text: string; + generated_token_count: number; + input_token_count: number; + }[]; + errors: { + code: string; + message: string; + }[]; + } + const response = (await this.caller.call(async () => + fetch(this.endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + Authorization: `Bearer ${await this.generateToken()}`, + }, + body: JSON.stringify({ + project_id: this.projectId, + model_id: this.modelId, + input: prompt, + parameters: this.modelParameters, + }), + }).then((res) => res.json()) + )) as WatsonxAIResponse; + + /** + * Handle Errors for invalid requests. + */ + if (response.errors) { + throw new Error(response.errors[0].message); + } + + return response.results[0].generated_text; + } + + async generateToken(): Promise { + if (this.ibmCloudToken && this.ibmCloudTokenExpiresAt) { + if (this.ibmCloudTokenExpiresAt > Date.now()) { + return this.ibmCloudToken; + } + } + + interface TokenResponse { + access_token: string; + expiration: number; + } + + const urlTokenParams = new URLSearchParams(); + urlTokenParams.append( + "grant_type", + "urn:ibm:params:oauth:grant-type:apikey" + ); + urlTokenParams.append("apikey", this.ibmCloudApiKey as string); + + const data = (await fetch("https://iam.cloud.ibm.com/identity/token", { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + body: urlTokenParams, + }).then((res) => res.json())) as TokenResponse; + + this.ibmCloudTokenExpiresAt = data.expiration * 1000; + this.ibmCloudToken = data.access_token; + + return this.ibmCloudToken; + } +} diff --git a/libs/langchain-community/src/llms/writer.ts b/libs/langchain-community/src/llms/writer.ts new file mode 100644 index 000000000000..8404cfc2088e --- /dev/null +++ b/libs/langchain-community/src/llms/writer.ts @@ -0,0 +1,174 @@ +import { Writer as WriterClient } from "@writerai/writer-sdk"; + +import { type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface for the input parameters specific to the Writer model. + */ +export interface WriterInput extends BaseLLMParams { + /** Writer API key */ + apiKey?: string; + + /** Writer organization ID */ + orgId?: string | number; + + /** Model to use */ + model?: string; + + /** Sampling temperature to use */ + temperature?: number; + + /** Minimum number of tokens to generate. */ + minTokens?: number; + + /** Maximum number of tokens to generate in the completion. */ + maxTokens?: number; + + /** Generates this many completions server-side and returns the "best"." */ + bestOf?: number; + + /** Penalizes repeated tokens according to frequency. */ + frequencyPenalty?: number; + + /** Whether to return log probabilities. */ + logprobs?: number; + + /** Number of completions to generate. */ + n?: number; + + /** Penalizes repeated tokens regardless of frequency. */ + presencePenalty?: number; + + /** Total probability mass of tokens to consider at each step. */ + topP?: number; +} + +/** + * Class representing a Writer Large Language Model (LLM). It interacts + * with the Writer API to generate text completions. + */ +export class Writer extends LLM implements WriterInput { + static lc_name() { + return "Writer"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "WRITER_API_KEY", + orgId: "WRITER_ORG_ID", + }; + } + + get lc_aliases(): { [key: string]: string } | undefined { + return { + apiKey: "writer_api_key", + orgId: "writer_org_id", + }; + } + + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + apiKey: string; + + orgId: number; + + model = "palmyra-instruct"; + + temperature?: number; + + minTokens?: number; + + maxTokens?: number; + + bestOf?: number; + + frequencyPenalty?: number; + + logprobs?: number; + + n?: number; + + presencePenalty?: number; + + topP?: number; + + constructor(fields?: WriterInput) { + super(fields ?? {}); + + const apiKey = fields?.apiKey ?? getEnvironmentVariable("WRITER_API_KEY"); + const orgId = fields?.orgId ?? getEnvironmentVariable("WRITER_ORG_ID"); + + if (!apiKey) { + throw new Error( + "Please set the WRITER_API_KEY environment variable or pass it to the constructor as the apiKey field." + ); + } + + if (!orgId) { + throw new Error( + "Please set the WRITER_ORG_ID environment variable or pass it to the constructor as the orgId field." + ); + } + + this.apiKey = apiKey; + this.orgId = typeof orgId === "string" ? parseInt(orgId, 10) : orgId; + this.model = fields?.model ?? this.model; + this.temperature = fields?.temperature ?? this.temperature; + this.minTokens = fields?.minTokens ?? this.minTokens; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.bestOf = fields?.bestOf ?? this.bestOf; + this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; + this.logprobs = fields?.logprobs ?? this.logprobs; + this.n = fields?.n ?? this.n; + this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; + this.topP = fields?.topP ?? this.topP; + } + + _llmType() { + return "writer"; + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + const sdk = new WriterClient({ + security: { + apiKey: this.apiKey, + }, + organizationId: this.orgId, + }); + + return this.caller.callWithOptions({ signal: options.signal }, async () => { + try { + const res = await sdk.completions.create({ + completionRequest: { + prompt, + stop: options.stop, + temperature: this.temperature, + minTokens: this.minTokens, + maxTokens: this.maxTokens, + bestOf: this.bestOf, + n: this.n, + frequencyPenalty: this.frequencyPenalty, + logprobs: this.logprobs, + presencePenalty: this.presencePenalty, + topP: this.topP, + }, + modelId: this.model, + }); + return ( + res.completionResponse?.choices?.[0].text ?? "No completion found." + ); + } catch (e) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (e as any).response = (e as any).rawResponse; + throw e; + } + }); + } +} diff --git a/libs/langchain-community/src/llms/yandex.ts b/libs/langchain-community/src/llms/yandex.ts new file mode 100644 index 000000000000..a98157f70848 --- /dev/null +++ b/libs/langchain-community/src/llms/yandex.ts @@ -0,0 +1,127 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; + +const apiUrl = "https://llm.api.cloud.yandex.net/llm/v1alpha/instruct"; + +export interface YandexGPTInputs extends BaseLLMParams { + /** + * What sampling temperature to use. + * Should be a double number between 0 (inclusive) and 1 (inclusive). + */ + temperature?: number; + + /** + * Maximum limit on the total number of tokens + * used for both the input prompt and the generated response. + */ + maxTokens?: number; + + /** Model name to use. */ + model?: string; + + /** + * Yandex Cloud Api Key for service account + * with the `ai.languageModels.user` role. + */ + apiKey?: string; + + /** + * Yandex Cloud IAM token for service account + * with the `ai.languageModels.user` role. + */ + iamToken?: string; +} + +export class YandexGPT extends LLM implements YandexGPTInputs { + lc_serializable = true; + + lc_namespace = ["langchain-community", "llms", this._llmType()]; + + static lc_name() { + return "Yandex GPT"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "YC_API_KEY", + iamToken: "YC_IAM_TOKEN", + }; + } + + temperature = 0.6; + + maxTokens = 1700; + + model = "general"; + + apiKey?: string; + + iamToken?: string; + + constructor(fields?: YandexGPTInputs) { + super(fields ?? {}); + + const apiKey = fields?.apiKey ?? getEnvironmentVariable("YC_API_KEY"); + + const iamToken = fields?.iamToken ?? getEnvironmentVariable("YC_IAM_TOKEN"); + + if (apiKey === undefined && iamToken === undefined) { + throw new Error( + "Please set the YC_API_KEY or YC_IAM_TOKEN environment variable or pass it to the constructor as the apiKey or iamToken field." + ); + } + + this.apiKey = apiKey; + this.iamToken = iamToken; + this.maxTokens = fields?.maxTokens ?? this.maxTokens; + this.temperature = fields?.temperature ?? this.temperature; + this.model = fields?.model ?? this.model; + } + + _llmType() { + return "yandexgpt"; + } + + /** @ignore */ + async _call( + prompt: string, + options: this["ParsedCallOptions"] + ): Promise { + // Hit the `generate` endpoint on the `large` model + return this.caller.callWithOptions({ signal: options.signal }, async () => { + const headers = { "Content-Type": "application/json", Authorization: "" }; + if (this.apiKey !== undefined) { + headers.Authorization = `Api-Key ${this.apiKey}`; + } else { + headers.Authorization = `Bearer ${this.iamToken}`; + } + const bodyData = { + model: this.model, + generationOptions: { + temperature: this.temperature, + maxTokens: this.maxTokens, + }, + + requestText: prompt, + }; + + try { + const response = await fetch(apiUrl, { + method: "POST", + headers, + body: JSON.stringify(bodyData), + }); + if (!response.ok) { + throw new Error( + `Failed to fetch ${apiUrl} from YandexGPT: ${response.status}` + ); + } + + const responseData = await response.json(); + return responseData.result.alternatives[0].text; + } catch (error) { + throw new Error(`Failed to fetch ${apiUrl} from YandexGPT ${error}`); + } + }); + } +} From 92b30249f6ed2b97fc669d59366c510255caabf8 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 6 Dec 2023 17:05:22 -0800 Subject: [PATCH 03/22] Add tools --- .../src/tools/IFTTTWebhook.ts | 83 + .../langchain-community/src/tools/aiplugin.ts | 85 + libs/langchain-community/src/tools/aws_sfn.ts | 229 + .../src/tools/bingserpapi.ts | 82 + .../src/tools/brave_search.ts | 81 + libs/langchain-community/src/tools/connery.ts | 357 ++ .../src/tools/dadjokeapi.ts | 48 + .../src/tools/dataforseo_api_search.ts | 382 ++ .../src/tools/fixtures/wordoftheday.html | 3892 +++++++++++++++++ .../src/tools/gmail/base.ts | 79 + .../src/tools/gmail/create_draft.ts | 74 + .../src/tools/gmail/descriptions.ts | 119 + .../src/tools/gmail/get_message.ts | 95 + .../src/tools/gmail/get_thread.ts | 105 + .../src/tools/gmail/index.ts | 12 + .../src/tools/gmail/search.ts | 135 + .../src/tools/gmail/send_message.ts | 84 + .../src/tools/google_calendar/base.ts | 102 + .../commands/run-create-events.ts | 129 + .../commands/run-view-events.ts | 85 + .../src/tools/google_calendar/create.ts | 51 + .../src/tools/google_calendar/descriptions.ts | 24 + .../src/tools/google_calendar/index.ts | 3 + .../prompts/create-event-prompt.ts | 56 + .../tools/google_calendar/prompts/index.ts | 2 + .../prompts/view-events-prompt.ts | 34 + .../utils/get-timezone-offset-in-hours.ts | 7 + .../src/tools/google_calendar/view.ts | 51 + .../src/tools/google_custom_search.ts | 87 + .../src/tools/google_places.ts | 100 + libs/langchain-community/src/tools/json.ts | 154 + .../src/tools/searchapi.ts | 208 + .../src/tools/searxng_search.ts | 262 ++ libs/langchain-community/src/tools/serpapi.ts | 509 +++ libs/langchain-community/src/tools/serper.ts | 111 + .../src/tools/tests/aiplugin.int.test.ts | 19 + .../src/tools/tests/brave_search.int.test.ts | 10 + .../src/tools/tests/chain.test.ts | 142 + .../src/tools/tests/gmail.test.ts | 63 + .../src/tools/tests/google_calendar.test.ts | 110 + .../tests/google_custom_search.int.test.ts | 10 + .../src/tools/tests/google_places.int.test.ts | 26 + .../src/tools/tests/searchapi.test.ts | 20 + .../src/tools/tests/serpapi.test.ts | 37 + .../src/tools/tests/webbrowser.int.test.ts | 124 + .../src/tools/tests/webbrowser.test.ts | 36 + .../src/tools/tests/wikipedia.int.test.ts | 22 + .../src/tools/tests/wolframalpha.test.ts | 47 + .../src/tools/wikipedia_query_run.ts | 185 + .../src/tools/wolframalpha.ts | 41 + 50 files changed, 8809 insertions(+) create mode 100644 libs/langchain-community/src/tools/IFTTTWebhook.ts create mode 100644 libs/langchain-community/src/tools/aiplugin.ts create mode 100644 libs/langchain-community/src/tools/aws_sfn.ts create mode 100644 libs/langchain-community/src/tools/bingserpapi.ts create mode 100644 libs/langchain-community/src/tools/brave_search.ts create mode 100644 libs/langchain-community/src/tools/connery.ts create mode 100644 libs/langchain-community/src/tools/dadjokeapi.ts create mode 100644 libs/langchain-community/src/tools/dataforseo_api_search.ts create mode 100644 libs/langchain-community/src/tools/fixtures/wordoftheday.html create mode 100644 libs/langchain-community/src/tools/gmail/base.ts create mode 100644 libs/langchain-community/src/tools/gmail/create_draft.ts create mode 100644 libs/langchain-community/src/tools/gmail/descriptions.ts create mode 100644 libs/langchain-community/src/tools/gmail/get_message.ts create mode 100644 libs/langchain-community/src/tools/gmail/get_thread.ts create mode 100644 libs/langchain-community/src/tools/gmail/index.ts create mode 100644 libs/langchain-community/src/tools/gmail/search.ts create mode 100644 libs/langchain-community/src/tools/gmail/send_message.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/base.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/create.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/descriptions.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/index.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/index.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts create mode 100644 libs/langchain-community/src/tools/google_calendar/view.ts create mode 100644 libs/langchain-community/src/tools/google_custom_search.ts create mode 100644 libs/langchain-community/src/tools/google_places.ts create mode 100644 libs/langchain-community/src/tools/json.ts create mode 100644 libs/langchain-community/src/tools/searchapi.ts create mode 100644 libs/langchain-community/src/tools/searxng_search.ts create mode 100644 libs/langchain-community/src/tools/serpapi.ts create mode 100644 libs/langchain-community/src/tools/serper.ts create mode 100644 libs/langchain-community/src/tools/tests/aiplugin.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/brave_search.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/chain.test.ts create mode 100644 libs/langchain-community/src/tools/tests/gmail.test.ts create mode 100644 libs/langchain-community/src/tools/tests/google_calendar.test.ts create mode 100644 libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/google_places.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/searchapi.test.ts create mode 100644 libs/langchain-community/src/tools/tests/serpapi.test.ts create mode 100644 libs/langchain-community/src/tools/tests/webbrowser.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/webbrowser.test.ts create mode 100644 libs/langchain-community/src/tools/tests/wikipedia.int.test.ts create mode 100644 libs/langchain-community/src/tools/tests/wolframalpha.test.ts create mode 100644 libs/langchain-community/src/tools/wikipedia_query_run.ts create mode 100644 libs/langchain-community/src/tools/wolframalpha.ts diff --git a/libs/langchain-community/src/tools/IFTTTWebhook.ts b/libs/langchain-community/src/tools/IFTTTWebhook.ts new file mode 100644 index 000000000000..72ea904f048b --- /dev/null +++ b/libs/langchain-community/src/tools/IFTTTWebhook.ts @@ -0,0 +1,83 @@ +/** From https://github.com/SidU/teams-langchain-js/wiki/Connecting-IFTTT-Services. + +# Creating a webhook +- Go to https://ifttt.com/create + +# Configuring the "If This" +- Click on the "If This" button in the IFTTT interface. +- Search for "Webhooks" in the search bar. +- Choose the first option for "Receive a web request with a JSON payload." +- Choose an Event Name that is specific to the service you plan to connect to. +This will make it easier for you to manage the webhook URL. +For example, if you're connecting to Spotify, you could use "Spotify" as your +Event Name. +- Click the "Create Trigger" button to save your settings and create your webhook. + +# Configuring the "Then That" +- Tap on the "Then That" button in the IFTTT interface. +- Search for the service you want to connect, such as Spotify. +- Choose an action from the service, such as "Add track to a playlist". +- Configure the action by specifying the necessary details, such as the playlist name, +e.g., "Songs from AI". +- Reference the JSON Payload received by the Webhook in your action. For the Spotify +scenario, choose "{{JsonPayload}}" as your search query. +- Tap the "Create Action" button to save your action settings. +- Once you have finished configuring your action, click the "Finish" button to +complete the setup. +- Congratulations! You have successfully connected the Webhook to the desired +service, and you're ready to start receiving data and triggering actions 🎉 + +# Finishing up +- To get your webhook URL go to https://ifttt.com/maker_webhooks/settings +- Copy the IFTTT key value from there. The URL is of the form +https://maker.ifttt.com/use/YOUR_IFTTT_KEY. Grab the YOUR_IFTTT_KEY value. + */ +import { Tool } from "@langchain/core/tools"; + +/** + * Represents a tool for creating and managing webhooks with the IFTTT (If + * This Then That) service. The IFTTT service allows users to create + * chains of simple conditional statements, called applets, which are + * triggered based on changes to other web services. + */ +export class IFTTTWebhook extends Tool { + static lc_name() { + return "IFTTTWebhook"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + private url: string; + + name: string; + + description: string; + + constructor(url: string, name: string, description: string) { + super(...arguments); + this.url = url; + this.name = name; + this.description = description; + } + + /** @ignore */ + async _call(input: string): Promise { + const headers = { "Content-Type": "application/json" }; + const body = JSON.stringify({ this: input }); + + const response = await fetch(this.url, { + method: "POST", + headers, + body, + }); + + if (!response.ok) { + throw new Error(`HTTP error ${response.status}`); + } + + const result = await response.text(); + return result; + } +} diff --git a/libs/langchain-community/src/tools/aiplugin.ts b/libs/langchain-community/src/tools/aiplugin.ts new file mode 100644 index 000000000000..5e50f4309c0e --- /dev/null +++ b/libs/langchain-community/src/tools/aiplugin.ts @@ -0,0 +1,85 @@ +import { Tool, type ToolParams } from "@langchain/core/tools"; + +/** + * Interface for parameters required to create an instance of + * AIPluginTool. + */ +export interface AIPluginToolParams extends ToolParams { + name: string; + description: string; + apiSpec: string; +} + +/** + * Class for creating instances of AI tools from plugins. It extends the + * Tool class and implements the AIPluginToolParams interface. + */ +export class AIPluginTool extends Tool implements AIPluginToolParams { + static lc_name() { + return "AIPluginTool"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + private _name: string; + + private _description: string; + + apiSpec: string; + + get name() { + return this._name; + } + + get description() { + return this._description; + } + + constructor(params: AIPluginToolParams) { + super(params); + this._name = params.name; + this._description = params.description; + this.apiSpec = params.apiSpec; + } + + /** @ignore */ + async _call(_input: string) { + return this.apiSpec; + } + + /** + * Static method that creates an instance of AIPluginTool from a given + * plugin URL. It fetches the plugin and its API specification from the + * provided URL and returns a new instance of AIPluginTool with the + * fetched data. + * @param url The URL of the AI plugin. + * @returns A new instance of AIPluginTool. + */ + static async fromPluginUrl(url: string) { + const aiPluginRes = await fetch(url); + if (!aiPluginRes.ok) { + throw new Error( + `Failed to fetch plugin from ${url} with status ${aiPluginRes.status}` + ); + } + const aiPluginJson = await aiPluginRes.json(); + + const apiUrlRes = await fetch(aiPluginJson.api.url); + if (!apiUrlRes.ok) { + throw new Error( + `Failed to fetch API spec from ${aiPluginJson.api.url} with status ${apiUrlRes.status}` + ); + } + const apiUrlJson = await apiUrlRes.text(); + + return new AIPluginTool({ + name: aiPluginJson.name_for_model, + description: `Call this tool to get the OpenAPI spec (and usage guide) for interacting with the ${aiPluginJson.name_for_human} API. You should only call this ONCE! What is the ${aiPluginJson.name_for_human} API useful for? ${aiPluginJson.description_for_human}`, + apiSpec: `Usage Guide: ${aiPluginJson.description_for_model} + +OpenAPI Spec in JSON or YAML format:\n${apiUrlJson}`, + }); + } +} diff --git a/libs/langchain-community/src/tools/aws_sfn.ts b/libs/langchain-community/src/tools/aws_sfn.ts new file mode 100644 index 000000000000..3c11430cf28d --- /dev/null +++ b/libs/langchain-community/src/tools/aws_sfn.ts @@ -0,0 +1,229 @@ +import { + SFNClient as Client, + StartExecutionCommand as Invoker, + DescribeExecutionCommand as Describer, + SendTaskSuccessCommand as TaskSuccessSender, +} from "@aws-sdk/client-sfn"; + +import { Tool, ToolParams } from "@langchain/core/tools"; + +/** + * Interface for AWS Step Functions configuration. + */ +export interface SfnConfig { + stateMachineArn: string; + region?: string; + accessKeyId?: string; + secretAccessKey?: string; +} + +/** + * Interface for AWS Step Functions client constructor arguments. + */ +interface SfnClientConstructorArgs { + region?: string; + credentials?: { + accessKeyId: string; + secretAccessKey: string; + }; +} + +/** + * Class for starting the execution of an AWS Step Function. + */ +export class StartExecutionAWSSfnTool extends Tool { + static lc_name() { + return "StartExecutionAWSSfnTool"; + } + + private sfnConfig: SfnConfig; + + public name: string; + + public description: string; + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + constructor({ + name, + description, + ...rest + }: SfnConfig & { name: string; description: string }) { + super(); + this.name = name; + this.description = description; + this.sfnConfig = rest; + } + + /** + * Generates a formatted description for the StartExecutionAWSSfnTool. + * @param name Name of the state machine. + * @param description Description of the state machine. + * @returns A formatted description string. + */ + static formatDescription(name: string, description: string): string { + return `Use to start executing the ${name} state machine. Use to run ${name} workflows. Whenever you need to start (or execute) an asynchronous workflow (or state machine) about ${description} you should ALWAYS use this. Input should be a valid JSON string.`; + } + + /** @ignore */ + async _call(input: string): Promise { + const clientConstructorArgs: SfnClientConstructorArgs = + getClientConstructorArgs(this.sfnConfig); + const sfnClient = new Client(clientConstructorArgs); + + return new Promise((resolve) => { + let payload; + try { + payload = JSON.parse(input); + } catch (e) { + console.error("Error starting state machine execution:", e); + resolve("failed to complete request"); + } + + const command = new Invoker({ + stateMachineArn: this.sfnConfig.stateMachineArn, + input: JSON.stringify(payload), + }); + + sfnClient + .send(command) + .then((response) => + resolve( + response.executionArn ? response.executionArn : "request completed." + ) + ) + .catch((error: Error) => { + console.error("Error starting state machine execution:", error); + resolve("failed to complete request"); + }); + }); + } +} + +/** + * Class for checking the status of an AWS Step Function execution. + */ +export class DescribeExecutionAWSSfnTool extends Tool { + static lc_name() { + return "DescribeExecutionAWSSfnTool"; + } + + name = "describe-execution-aws-sfn"; + + description = + "This tool should ALWAYS be used for checking the status of any AWS Step Function execution (aka. state machine execution). Input to this tool is a properly formatted AWS Step Function Execution ARN (executionArn). The output is a stringified JSON object containing the executionArn, name, status, startDate, stopDate, input, output, error, and cause of the execution."; + + sfnConfig: Omit; + + constructor(config: Omit & ToolParams) { + super(config); + this.sfnConfig = config; + } + + /** @ignore */ + async _call(input: string) { + const clientConstructorArgs: SfnClientConstructorArgs = + getClientConstructorArgs(this.sfnConfig); + const sfnClient = new Client(clientConstructorArgs); + + const command = new Describer({ + executionArn: input, + }); + return await sfnClient + .send(command) + .then((response) => + response.executionArn + ? JSON.stringify({ + executionArn: response.executionArn, + name: response.name, + status: response.status, + startDate: response.startDate, + stopDate: response.stopDate, + input: response.input, + output: response.output, + error: response.error, + cause: response.cause, + }) + : "{}" + ) + .catch((error: Error) => { + console.error("Error describing state machine execution:", error); + return "failed to complete request"; + }); + } +} + +/** + * Class for sending a task success signal to an AWS Step Function + * execution. + */ +export class SendTaskSuccessAWSSfnTool extends Tool { + static lc_name() { + return "SendTaskSuccessAWSSfnTool"; + } + + name = "send-task-success-aws-sfn"; + + description = + "This tool should ALWAYS be used for sending task success to an AWS Step Function execution (aka. statemachine exeuction). Input to this tool is a stringify JSON object containing the taskToken and output."; + + sfnConfig: Omit; + + constructor(config: Omit & ToolParams) { + super(config); + this.sfnConfig = config; + } + + /** @ignore */ + async _call(input: string) { + const clientConstructorArgs: SfnClientConstructorArgs = + getClientConstructorArgs(this.sfnConfig); + const sfnClient = new Client(clientConstructorArgs); + + let payload; + try { + payload = JSON.parse(input); + } catch (e) { + console.error("Error starting state machine execution:", e); + return "failed to complete request"; + } + + const command = new TaskSuccessSender({ + taskToken: payload.taskToken, + output: JSON.stringify(payload.output), + }); + + return await sfnClient + .send(command) + .then(() => "request completed.") + .catch((error: Error) => { + console.error( + "Error sending task success to state machine execution:", + error + ); + return "failed to complete request"; + }); + } +} + +/** + * Helper function to construct the AWS SFN client. + */ +function getClientConstructorArgs(config: Partial) { + const clientConstructorArgs: SfnClientConstructorArgs = {}; + + if (config.region) { + clientConstructorArgs.region = config.region; + } + + if (config.accessKeyId && config.secretAccessKey) { + clientConstructorArgs.credentials = { + accessKeyId: config.accessKeyId, + secretAccessKey: config.secretAccessKey, + }; + } + + return clientConstructorArgs; +} diff --git a/libs/langchain-community/src/tools/bingserpapi.ts b/libs/langchain-community/src/tools/bingserpapi.ts new file mode 100644 index 000000000000..5c0bec62f6a1 --- /dev/null +++ b/libs/langchain-community/src/tools/bingserpapi.ts @@ -0,0 +1,82 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * A tool for web search functionality using Bing's search engine. It + * extends the base `Tool` class and implements the `_call` method to + * perform the search operation. Requires an API key for Bing's search + * engine, which can be set in the environment variables. Also accepts + * additional parameters for the search query. + */ +class BingSerpAPI extends Tool { + static lc_name() { + return "BingSerpAPI"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + /** + * Not implemented. Will throw an error if called. + */ + toJSON() { + return this.toJSONNotImplemented(); + } + + name = "bing-search"; + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; + + key: string; + + params: Record; + + constructor( + apiKey: string | undefined = getEnvironmentVariable("BingApiKey"), + params: Record = {} + ) { + super(...arguments); + + if (!apiKey) { + throw new Error( + "BingSerpAPI API key not set. You can set it as BingApiKey in your .env file." + ); + } + + this.key = apiKey; + this.params = params; + } + + /** @ignore */ + async _call(input: string): Promise { + const headers = { "Ocp-Apim-Subscription-Key": this.key }; + const params = { q: input, textDecorations: "true", textFormat: "HTML" }; + const searchUrl = new URL("https://api.bing.microsoft.com/v7.0/search"); + + Object.entries(params).forEach(([key, value]) => { + searchUrl.searchParams.append(key, value); + }); + + const response = await fetch(searchUrl, { headers }); + + if (!response.ok) { + throw new Error(`HTTP error ${response.status}`); + } + + const res = await response.json(); + const results: [] = res.webPages.value; + + if (results.length === 0) { + return "No good results found."; + } + const snippets = results + .map((result: { snippet: string }) => result.snippet) + .join(" "); + + return snippets; + } +} + +export { BingSerpAPI }; diff --git a/libs/langchain-community/src/tools/brave_search.ts b/libs/langchain-community/src/tools/brave_search.ts new file mode 100644 index 000000000000..10e1380ff365 --- /dev/null +++ b/libs/langchain-community/src/tools/brave_search.ts @@ -0,0 +1,81 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * Interface for the parameters required to instantiate a BraveSearch + * instance. + */ +export interface BraveSearchParams { + apiKey?: string; +} + +/** + * Class for interacting with the Brave Search engine. It extends the Tool + * class and requires an API key to function. The API key can be passed in + * during instantiation or set as an environment variable named + * 'BRAVE_SEARCH_API_KEY'. + */ +export class BraveSearch extends Tool { + static lc_name() { + return "BraveSearch"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "brave-search"; + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; + + apiKey: string; + + constructor( + fields: BraveSearchParams = { + apiKey: getEnvironmentVariable("BRAVE_SEARCH_API_KEY"), + } + ) { + super(); + + if (!fields.apiKey) { + throw new Error( + `Brave API key not set. Please pass it in or set it as an environment variable named "BRAVE_SEARCH_API_KEY".` + ); + } + + this.apiKey = fields.apiKey; + } + + /** @ignore */ + async _call(input: string): Promise { + const headers = { + "X-Subscription-Token": this.apiKey, + Accept: "application/json", + }; + const searchUrl = new URL( + `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent( + input + )}` + ); + + const response = await fetch(searchUrl, { headers }); + + if (!response.ok) { + throw new Error(`HTTP error ${response.status}`); + } + + const parsedResponse = await response.json(); + const webSearchResults = parsedResponse.web?.results; + const finalResults = Array.isArray(webSearchResults) + ? webSearchResults.map( + (item: { title?: string; url?: string; description?: string }) => ({ + title: item.title, + link: item.url, + snippet: item.description, + }) + ) + : []; + return JSON.stringify(finalResults); + } +} diff --git a/libs/langchain-community/src/tools/connery.ts b/libs/langchain-community/src/tools/connery.ts new file mode 100644 index 000000000000..86f28093414c --- /dev/null +++ b/libs/langchain-community/src/tools/connery.ts @@ -0,0 +1,357 @@ +import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * An object containing configuration parameters for the ConneryService class. + * @extends AsyncCallerParams + */ +export interface ConneryServiceParams extends AsyncCallerParams { + runnerUrl: string; + apiKey: string; +} + +type ApiResponse = { + status: "success"; + data: T; +}; + +type ApiErrorResponse = { + status: "error"; + error: { + message: string; + }; +}; + +type Parameter = { + key: string; + title: string; + description: string; + type: string; + validation?: { + required?: boolean; + }; +}; + +type Action = { + id: string; + key: string; + title: string; + description: string; + type: string; + inputParameters: Parameter[]; + outputParameters: Parameter[]; + pluginId: string; +}; + +type Input = { + [key: string]: string; +}; + +type Output = { + [key: string]: string; +}; + +type RunActionResult = { + output: Output; + used: { + actionId: string; + input: Input; + }; +}; + +/** + * A LangChain Tool object wrapping a Connery action. + * @extends Tool + */ +export class ConneryAction extends Tool { + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name: string; + + description: string; + + /** + * Creates a ConneryAction instance based on the provided Connery action. + * @param _action The Connery action. + * @param _service The ConneryService instance. + * @returns A ConneryAction instance. + */ + constructor(protected _action: Action, protected _service: ConneryService) { + super(); + + this.name = this._action.title; + this.description = this.getDescription(); + } + + /** + * Runs the Connery action. + * @param prompt This is a plain English prompt with all the information needed to run the action. + * @returns A promise that resolves to a JSON string containing the output of the action. + */ + protected _call(prompt: string): Promise { + return this._service.runAction(this._action.id, prompt); + } + + /** + * Returns the description of the Connery action. + * @returns A string containing the description of the Connery action together with the instructions on how to use it. + */ + protected getDescription(): string { + const { title, description } = this._action; + const inputParameters = this.prepareJsonForTemplate( + this._action.inputParameters + ); + const example1InputParametersSchema = this.prepareJsonForTemplate([ + { + key: "recipient", + title: "Email Recipient", + description: "Email address of the email recipient.", + type: "string", + validation: { + required: true, + }, + }, + { + key: "subject", + title: "Email Subject", + description: "Subject of the email.", + type: "string", + validation: { + required: true, + }, + }, + { + key: "body", + title: "Email Body", + description: "Body of the email.", + type: "string", + validation: { + required: true, + }, + }, + ]); + + const descriptionTemplate = + "# Instructions about tool input:\n" + + "The input to this tool is a plain English prompt with all the input parameters needed to call it. " + + "The input parameters schema of this tool is provided below. " + + "Use the input parameters schema to construct the prompt for the tool. " + + "If the input parameter is required in the schema, it must be provided in the prompt. " + + "Do not come up with the values for the input parameters yourself. " + + "If you do not have enough information to fill in the input parameter, ask the user to provide it. " + + "See examples below on how to construct the prompt based on the provided tool information. " + + "\n\n" + + "# Instructions about tool output:\n" + + "The output of this tool is a JSON string. " + + "Retrieve the output parameters from the JSON string and use them in the next tool. " + + "Do not return the JSON string as the output of the tool. " + + "\n\n" + + "# Example:\n" + + "Tool information:\n" + + "- Title: Send email\n" + + "- Description: Send an email to a recipient.\n" + + `- Input parameters schema in JSON fromat: ${example1InputParametersSchema}\n` + + "The tool input prompt:\n" + + "recipient: test@example.com, subject: 'Test email', body: 'This is a test email sent from Langchain Connery tool.'\n" + + "\n\n" + + "# The tool information\n" + + `- Title: ${title}\n` + + `- Description: ${description}\n` + + `- Input parameters schema in JSON fromat: ${inputParameters}\n`; + + return descriptionTemplate; + } + + /** + * Converts the provided object to a JSON string and escapes '{' and '}' characters. + * @param obj The object to convert to a JSON string. + * @returns A string containing the JSON representation of the provided object with '{' and '}' characters escaped. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + protected prepareJsonForTemplate(obj: any): string { + // Convert the object to a JSON string + const jsonString = JSON.stringify(obj); + + // Replace '{' with '{{' and '}' with '}}' + const escapedJSON = jsonString.replace(/{/g, "{{").replace(/}/g, "}}"); + + return escapedJSON; + } +} + +/** + * A service for working with Connery actions. + * + * Connery is an open-source plugin infrastructure for AI. + * Source code: https://github.com/connery-io/connery-platform + */ +export class ConneryService { + protected runnerUrl: string; + + protected apiKey: string; + + protected asyncCaller: AsyncCaller; + + /** + * Creates a ConneryService instance. + * @param params A ConneryServiceParams object. + * If not provided, the values are retrieved from the CONNERY_RUNNER_URL + * and CONNERY_RUNNER_API_KEY environment variables. + * @returns A ConneryService instance. + */ + constructor(params?: ConneryServiceParams) { + const runnerUrl = + params?.runnerUrl ?? getEnvironmentVariable("CONNERY_RUNNER_URL"); + const apiKey = + params?.apiKey ?? getEnvironmentVariable("CONNERY_RUNNER_API_KEY"); + + if (!runnerUrl || !apiKey) { + throw new Error( + "CONNERY_RUNNER_URL and CONNERY_RUNNER_API_KEY environment variables must be set." + ); + } + + this.runnerUrl = runnerUrl; + this.apiKey = apiKey; + + this.asyncCaller = new AsyncCaller(params ?? {}); + } + + /** + * Returns the list of Connery actions wrapped as a LangChain Tool objects. + * @returns A promise that resolves to an array of ConneryAction objects. + */ + async listActions(): Promise { + const actions = await this._listActions(); + return actions.map((action) => new ConneryAction(action, this)); + } + + /** + * Returns the specified Connery action wrapped as a LangChain Tool object. + * @param actionId The ID of the action to return. + * @returns A promise that resolves to a ConneryAction object. + */ + async getAction(actionId: string): Promise { + const action = await this._getAction(actionId); + return new ConneryAction(action, this); + } + + /** + * Runs the specified Connery action with the provided input. + * @param actionId The ID of the action to run. + * @param prompt This is a plain English prompt with all the information needed to run the action. + * @param input The input expected by the action. + * If provided together with the prompt, the input takes precedence over the input specified in the prompt. + * @returns A promise that resolves to a JSON string containing the output of the action. + */ + async runAction( + actionId: string, + prompt?: string, + input?: Input + ): Promise { + const result = await this._runAction(actionId, prompt, input); + return JSON.stringify(result); + } + + /** + * Returns the list of actions available in the Connery runner. + * @returns A promise that resolves to an array of Action objects. + */ + protected async _listActions(): Promise { + const response = await this.asyncCaller.call( + fetch, + `${this.runnerUrl}/v1/actions`, + { + method: "GET", + headers: this._getHeaders(), + } + ); + await this._handleError(response, "Failed to list actions"); + + const apiResponse: ApiResponse = await response.json(); + return apiResponse.data; + } + + /** + * Returns the specified action available in the Connery runner. + * @param actionId The ID of the action to return. + * @returns A promise that resolves to an Action object. + * @throws An error if the action with the specified ID is not found. + */ + protected async _getAction(actionId: string): Promise { + const actions = await this._listActions(); + const action = actions.find((a) => a.id === actionId); + if (!action) { + throw new Error( + `The action with ID "${actionId}" was not found in the list of available actions in the Connery runner.` + ); + } + return action; + } + + /** + * Runs the specified Connery action with the provided input. + * @param actionId The ID of the action to run. + * @param prompt This is a plain English prompt with all the information needed to run the action. + * @param input The input object expected by the action. + * If provided together with the prompt, the input takes precedence over the input specified in the prompt. + * @returns A promise that resolves to a RunActionResult object. + */ + protected async _runAction( + actionId: string, + prompt?: string, + input?: Input + ): Promise { + const response = await this.asyncCaller.call( + fetch, + `${this.runnerUrl}/v1/actions/${actionId}/run`, + { + method: "POST", + headers: this._getHeaders(), + body: JSON.stringify({ + prompt, + input, + }), + } + ); + await this._handleError(response, "Failed to run action"); + + const apiResponse: ApiResponse = await response.json(); + return apiResponse.data.output; + } + + /** + * Returns a standard set of HTTP headers to be used in API calls to the Connery runner. + * @returns An object containing the standard set of HTTP headers. + */ + protected _getHeaders(): Record { + return { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + }; + } + + /** + * Shared error handler for API calls to the Connery runner. + * If the response is not ok, an error is thrown containing the error message returned by the Connery runner. + * Otherwise, the promise resolves to void. + * @param response The response object returned by the Connery runner. + * @param errorMessage The error message to be used in the error thrown if the response is not ok. + * @returns A promise that resolves to void. + * @throws An error containing the error message returned by the Connery runner. + */ + protected async _handleError( + response: Response, + errorMessage: string + ): Promise { + if (response.ok) return; + + const apiErrorResponse: ApiErrorResponse = await response.json(); + throw new Error( + `${errorMessage}. Status code: ${response.status}. Error message: ${apiErrorResponse.error.message}` + ); + } +} diff --git a/libs/langchain-community/src/tools/dadjokeapi.ts b/libs/langchain-community/src/tools/dadjokeapi.ts new file mode 100644 index 000000000000..c367ad8ac481 --- /dev/null +++ b/libs/langchain-community/src/tools/dadjokeapi.ts @@ -0,0 +1,48 @@ +import { Tool } from "@langchain/core/tools"; + +/** + * The DadJokeAPI class is a tool for generating dad jokes based on a + * specific topic. It fetches jokes from an external API and returns a + * random joke from the results. If no jokes are found for the given + * search term, it returns a message indicating that no jokes were found. + */ +class DadJokeAPI extends Tool { + static lc_name() { + return "DadJokeAPI"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "dadjoke"; + + description = + "a dad joke generator. get a dad joke about a specific topic. input should be a search term."; + + /** @ignore */ + async _call(input: string): Promise { + const headers = { Accept: "application/json" }; + const searchUrl = `https://icanhazdadjoke.com/search?term=${input}`; + + const response = await fetch(searchUrl, { headers }); + + if (!response.ok) { + throw new Error(`HTTP error ${response.status}`); + } + + const data = await response.json(); + const jokes = data.results; + + if (jokes.length === 0) { + return `No dad jokes found about ${input}`; + } + + const randomIndex = Math.floor(Math.random() * jokes.length); + const randomJoke = jokes[randomIndex].joke; + + return randomJoke; + } +} + +export { DadJokeAPI }; diff --git a/libs/langchain-community/src/tools/dataforseo_api_search.ts b/libs/langchain-community/src/tools/dataforseo_api_search.ts new file mode 100644 index 000000000000..450da1eecc4b --- /dev/null +++ b/libs/langchain-community/src/tools/dataforseo_api_search.ts @@ -0,0 +1,382 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * @interface DataForSeoApiConfig + * @description Represents the configuration object used to set up a DataForSeoAPISearch instance. + */ +export interface DataForSeoApiConfig { + /** + * @property apiLogin + * @type {string} + * @description The API login credential for DataForSEO. If not provided, it will be fetched from environment variables. + */ + apiLogin?: string; + + /** + * @property apiPassword + * @type {string} + * @description The API password credential for DataForSEO. If not provided, it will be fetched from environment variables. + */ + apiPassword?: string; + + /** + * @property params + * @type {Record} + * @description Additional parameters to customize the API request. + */ + params?: Record; + + /** + * @property useJsonOutput + * @type {boolean} + * @description Determines if the output should be in JSON format. + */ + useJsonOutput?: boolean; + + /** + * @property jsonResultTypes + * @type {Array} + * @description Specifies the types of results to include in the output. + */ + jsonResultTypes?: Array; + + /** + * @property jsonResultFields + * @type {Array} + * @description Specifies the fields to include in each result object. + */ + jsonResultFields?: Array; + + /** + * @property topCount + * @type {number} + * @description Specifies the maximum number of results to return. + */ + topCount?: number; +} + +/** + * Represents a task in the API response. + */ +type Task = { + id: string; + status_code: number; + status_message: string; + time: string; + result: Result[]; +}; + +/** + * Represents a result in the API response. + */ +type Result = { + keyword: string; + check_url: string; + datetime: string; + spell?: string; + item_types: string[]; + se_results_count: number; + items_count: number; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + items: any[]; +}; + +/** + * Represents the API response. + */ +type ApiResponse = { + status_code: number; + status_message: string; + tasks: Task[]; +}; + +/** + * @class DataForSeoAPISearch + * @extends {Tool} + * @description Represents a wrapper class to work with DataForSEO SERP API. + */ +export class DataForSeoAPISearch extends Tool { + static lc_name() { + return "DataForSeoAPISearch"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "dataforseo-api-wrapper"; + + description = + "A robust Google Search API provided by DataForSeo. This tool is handy when you need information about trending topics or current events."; + + protected apiLogin: string; + + protected apiPassword: string; + + /** + * @property defaultParams + * @type {Record} + * @description These are the default parameters to be used when making an API request. + */ + protected defaultParams: Record = { + location_name: "United States", + language_code: "en", + depth: 10, + se_name: "google", + se_type: "organic", + }; + + protected params: Record = {}; + + protected jsonResultTypes: Array | undefined; + + protected jsonResultFields: Array | undefined; + + protected topCount: number | undefined; + + protected useJsonOutput = false; + + /** + * @constructor + * @param {DataForSeoApiConfig} config + * @description Sets up the class, throws an error if the API login/password isn't provided. + */ + constructor(config: DataForSeoApiConfig = {}) { + super(); + const apiLogin = + config.apiLogin ?? getEnvironmentVariable("DATAFORSEO_LOGIN"); + const apiPassword = + config.apiPassword ?? getEnvironmentVariable("DATAFORSEO_PASSWORD"); + const params = config.params ?? {}; + if (!apiLogin || !apiPassword) { + throw new Error( + "DataForSEO login or password not set. You can set it as DATAFORSEO_LOGIN and DATAFORSEO_PASSWORD in your .env file, or pass it to DataForSeoAPISearch." + ); + } + this.params = { ...this.defaultParams, ...params }; + this.apiLogin = apiLogin; + this.apiPassword = apiPassword; + this.jsonResultTypes = config.jsonResultTypes; + this.jsonResultFields = config.jsonResultFields; + this.useJsonOutput = config.useJsonOutput ?? false; + this.topCount = config.topCount; + } + + /** + * @method _call + * @param {string} keyword + * @returns {Promise} + * @description Initiates a call to the API and processes the response. + */ + async _call(keyword: string): Promise { + return this.useJsonOutput + ? JSON.stringify(await this.results(keyword)) + : this.processResponse(await this.getResponseJson(keyword)); + } + + /** + * @method results + * @param {string} keyword + * @returns {Promise>} + * @description Fetches the results from the API for the given keyword. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async results(keyword: string): Promise> { + const res = await this.getResponseJson(keyword); + return this.filterResults(res, this.jsonResultTypes); + } + + /** + * @method prepareRequest + * @param {string} keyword + * @returns {{url: string; headers: HeadersInit; data: BodyInit}} + * @description Prepares the request details for the API call. + */ + protected prepareRequest(keyword: string): { + url: string; + headers: HeadersInit; + data: BodyInit; + } { + if (this.apiLogin === undefined || this.apiPassword === undefined) { + throw new Error("api_login or api_password is not provided"); + } + + const credentials = Buffer.from( + `${this.apiLogin}:${this.apiPassword}`, + "utf-8" + ).toString("base64"); + const headers = { + Authorization: `Basic ${credentials}`, + "Content-Type": "application/json", + }; + + const params = { ...this.params }; + params.keyword ??= keyword; + const data = [params]; + + return { + url: `https://api.dataforseo.com/v3/serp/${params.se_name}/${params.se_type}/live/advanced`, + headers, + data: JSON.stringify(data), + }; + } + + /** + * @method getResponseJson + * @param {string} keyword + * @returns {Promise} + * @description Executes a POST request to the provided URL and returns a parsed JSON response. + */ + protected async getResponseJson(keyword: string): Promise { + const requestDetails = this.prepareRequest(keyword); + const response = await fetch(requestDetails.url, { + method: "POST", + headers: requestDetails.headers, + body: requestDetails.data, + }); + + if (!response.ok) { + throw new Error( + `Got ${response.status} error from DataForSEO: ${response.statusText}` + ); + } + + const result: ApiResponse = await response.json(); + return this.checkResponse(result); + } + + /** + * @method checkResponse + * @param {ApiResponse} response + * @returns {ApiResponse} + * @description Checks the response status code. + */ + private checkResponse(response: ApiResponse): ApiResponse { + if (response.status_code !== 20000) { + throw new Error( + `Got error from DataForSEO SERP API: ${response.status_message}` + ); + } + for (const task of response.tasks) { + if (task.status_code !== 20000) { + throw new Error( + `Got error from DataForSEO SERP API: ${task.status_message}` + ); + } + } + return response; + } + + /* eslint-disable @typescript-eslint/no-explicit-any */ + /** + * @method filterResults + * @param {ApiResponse} res + * @param {Array | undefined} types + * @returns {Array} + * @description Filters the results based on the specified result types. + */ + private filterResults( + res: ApiResponse, + types: Array | undefined + ): Array { + const output: Array = []; + for (const task of res.tasks || []) { + for (const result of task.result || []) { + for (const item of result.items || []) { + if ( + types === undefined || + types.length === 0 || + types.includes(item.type) + ) { + const newItem = this.cleanupUnnecessaryItems(item); + if (Object.keys(newItem).length !== 0) { + output.push(newItem); + } + } + if (this.topCount !== undefined && output.length >= this.topCount) { + break; + } + } + } + } + return output; + } + + /* eslint-disable @typescript-eslint/no-explicit-any */ + /* eslint-disable no-param-reassign */ + /** + * @method cleanupUnnecessaryItems + * @param {any} d + * @description Removes unnecessary items from the response. + */ + private cleanupUnnecessaryItems(d: any): any { + if (Array.isArray(d)) { + return d.map((item) => this.cleanupUnnecessaryItems(item)); + } + + const toRemove = ["xpath", "position", "rectangle"]; + if (typeof d === "object" && d !== null) { + return Object.keys(d).reduce((newObj: any, key: string) => { + if ( + (this.jsonResultFields === undefined || + this.jsonResultFields.includes(key)) && + !toRemove.includes(key) + ) { + if (typeof d[key] === "object" && d[key] !== null) { + newObj[key] = this.cleanupUnnecessaryItems(d[key]); + } else { + newObj[key] = d[key]; + } + } + return newObj; + }, {}); + } + + return d; + } + + /** + * @method processResponse + * @param {ApiResponse} res + * @returns {string} + * @description Processes the response to extract meaningful data. + */ + protected processResponse(res: ApiResponse): string { + let returnValue = "No good search result found"; + for (const task of res.tasks || []) { + for (const result of task.result || []) { + const { item_types } = result; + const items = result.items || []; + if (item_types.includes("answer_box")) { + returnValue = items.find( + (item: { type: string; text: string }) => item.type === "answer_box" + ).text; + } else if (item_types.includes("knowledge_graph")) { + returnValue = items.find( + (item: { type: string; description: string }) => + item.type === "knowledge_graph" + ).description; + } else if (item_types.includes("featured_snippet")) { + returnValue = items.find( + (item: { type: string; description: string }) => + item.type === "featured_snippet" + ).description; + } else if (item_types.includes("shopping")) { + returnValue = items.find( + (item: { type: string; price: string }) => item.type === "shopping" + ).price; + } else if (item_types.includes("organic")) { + returnValue = items.find( + (item: { type: string; description: string }) => + item.type === "organic" + ).description; + } + if (returnValue) { + break; + } + } + } + return returnValue; + } +} diff --git a/libs/langchain-community/src/tools/fixtures/wordoftheday.html b/libs/langchain-community/src/tools/fixtures/wordoftheday.html new file mode 100644 index 000000000000..09baddceca01 --- /dev/null +++ b/libs/langchain-community/src/tools/fixtures/wordoftheday.html @@ -0,0 +1,3892 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Word of the Day: Foible | Merriam-Webster + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
+ +
+
+
+ +
+
+ +
+
+
+
+
+

Word of the Day

+ : April 10, 2023 +
+ +
+ +
+
+
+

foible

+ play +
+
+ + + +
+ noun + + FOY-bul + +
+ + + + +
+
+ +
+ +
+
+
+ +
+

What It Means

+

+ Foibles are minor flaws or shortcomings in + character or behavior. In fencing, + foible refers to the part of a sword's blade + between the middle and point, which is considered the + weakest part. +

+ +

+ // He was amused daily by the foibles of his + eccentric neighbor. +

+ +

+ See the entry > +

+ + +

+ foible in + Context +

+
+
+

+ "Films about important historical moments are often + marked by a heavy solemnity, a sometimes suffocating + respectfulness that can make one forget that these + events involved real people, human beings with + passions and foibles." — Michael Ordoña, + The Los Angeles Times, 20 Jan. 2023 +

+ +
+
+
+ +
+

+ Build your vocabulary! Get Word of the Day in + your inbox every day. +

+
+ + +
+
+ +
+ +
+
+
+
+ + +
+

+ +

+ What Did You Just Call Me? +

+
+

+
+
+ +
+
+
    +
  • +
    + brown chihuahua sitting on the floor with squinting eyes looking at the camera +
    +
  • +
  • + Before we went to her house, Hannah told + us her aunt was a + flibbertigibbet. +
  • +
+
+ +
+
+ +
+
+ Name That Thing +
+
+

+ You know what it looks like… but what is + it called? +

+ TAKE THE QUIZ +
+
+ +
+
+ Solve today's spelling word game by finding as many words as you can with using just 7 letters. Longer words score more points. +
+
+

+ Can you make 12 words with 7 letters? +

+ PLAY +
+
+
+
+
+
+
+
+ +
+ + +
+

Did You Know?

+

+ Many word lovers agree that the pen is mightier than the + sword. But be they + honed + in wit or form, even the sharpest tools in the shed have + their flaws. That’s where foible comes in + handy. Borrowed from French in the 1600s, the word + originally referred to the weakest part of a fencing + sword, that part being the portion between the middle + and the pointed tip. The English foible soon + came to be applied not only to weaknesses in blades but + also to minor failings in character. The French source + of foible is also at a remove from the fencing + arena; the French foible means "weak," and it + comes from the same Old French term, feble, + that gave us + feeble. +

+ +
+
+ + +
+ +
+

Test Your Vocabulary

+

+ Unscramble the letters to create a word that refers to a + particular kind of fencing sword: BRASE. +

+ VIEW THE ANSWER +
+ +
+ + +
+
+

Podcast

+
+ + + +
+
+
+
+ +
+ + +
+
+

More Words of the Day

+
+ + + + +
+ +
+
+ + + + +
+ + + +
+
+ Love words? Need even more definitions? +
+

+ Subscribe to America's largest dictionary and get thousands + more definitions and advanced search—ad free! +

+ +
+
+
+
+ + + + + + + + + + +
+
+ + + + + + + + + + + + +
+
+
+
+ Do Not Sell Or Share My Personal Information +
+ You have chosen to opt-out of the sale or sharing of your information + from this site and any of its affiliates. To opt back in please click + the "Customize my ad experience" link.
+
This site collects information through the use of cookies and + other tracking tools. Cookies and these tools do not contain any + information that personally identifies a user, but personal + information that would be stored about you may be linked to the + information stored in and obtained from them. This information would + be used and shared for Analytics, Ad Serving, Interest Based + Advertising, among other purposes.
+
For more information please visit this site's Privacy + Policy.
+
+
+ CANCEL +
+
+ CONTINUE +
+
+
+
+ + + + + + + + + + + diff --git a/libs/langchain-community/src/tools/gmail/base.ts b/libs/langchain-community/src/tools/gmail/base.ts new file mode 100644 index 000000000000..eedf3776bc2f --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/base.ts @@ -0,0 +1,79 @@ +import { gmail_v1, google } from "googleapis"; +import { z } from "zod"; +import { StructuredTool } from "@langchain/core/tools"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +export interface GmailBaseToolParams { + credentials?: { + clientEmail?: string; + privateKey?: string; + keyfile?: string; + }; + scopes?: string[]; +} + +export abstract class GmailBaseTool extends StructuredTool { + private CredentialsSchema = z + .object({ + clientEmail: z + .string() + .min(1) + .default(getEnvironmentVariable("GMAIL_CLIENT_EMAIL") ?? ""), + privateKey: z + .string() + .default(getEnvironmentVariable("GMAIL_PRIVATE_KEY") ?? ""), + keyfile: z + .string() + .default(getEnvironmentVariable("GMAIL_KEYFILE") ?? ""), + }) + .refine( + (credentials) => + credentials.privateKey !== "" || credentials.keyfile !== "", + { + message: + "Missing GMAIL_PRIVATE_KEY or GMAIL_KEYFILE to interact with Gmail", + } + ); + + private GmailBaseToolParamsSchema = z + .object({ + credentials: this.CredentialsSchema.default({}), + scopes: z.array(z.string()).default(["https://mail.google.com/"]), + }) + .default({}); + + name = "Gmail"; + + description = "A tool to send and view emails through Gmail"; + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + protected gmail: gmail_v1.Gmail; + + constructor(fields?: Partial) { + super(...arguments); + + const { credentials, scopes } = + this.GmailBaseToolParamsSchema.parse(fields); + + this.gmail = this.getGmail( + scopes, + credentials.clientEmail, + credentials.privateKey, + credentials.keyfile + ); + } + + private getGmail( + scopes: string[], + email: string, + key?: string, + keyfile?: string + ) { + const auth = new google.auth.JWT(email, keyfile, key, scopes); + + return google.gmail({ version: "v1", auth }); + } +} diff --git a/libs/langchain-community/src/tools/gmail/create_draft.ts b/libs/langchain-community/src/tools/gmail/create_draft.ts new file mode 100644 index 000000000000..b2d4b56e89c3 --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/create_draft.ts @@ -0,0 +1,74 @@ +import { z } from "zod"; +import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; +import { CREATE_DRAFT_DESCRIPTION } from "./descriptions.js"; + +export class GmailCreateDraft extends GmailBaseTool { + name = "create_gmail_draft"; + + schema = z.object({ + message: z.string(), + to: z.array(z.string()), + subject: z.string(), + cc: z.array(z.string()).optional(), + bcc: z.array(z.string()).optional(), + }); + + description = CREATE_DRAFT_DESCRIPTION; + + constructor(fields?: GmailBaseToolParams) { + super(fields); + } + + private prepareDraftMessage( + message: string, + to: string[], + subject: string, + cc?: string[], + bcc?: string[] + ) { + const draftMessage = { + message: { + raw: "", + }, + }; + + const email = [ + `To: ${to.join(", ")}`, + `Subject: ${subject}`, + cc ? `Cc: ${cc.join(", ")}` : "", + bcc ? `Bcc: ${bcc.join(", ")}` : "", + "", + message, + ].join("\n"); + + draftMessage.message.raw = Buffer.from(email).toString("base64url"); + + return draftMessage; + } + + async _call(arg: z.output) { + const { message, to, subject, cc, bcc } = arg; + const create_message = this.prepareDraftMessage( + message, + to, + subject, + cc, + bcc + ); + + const response = await this.gmail.users.drafts.create({ + userId: "me", + requestBody: create_message, + }); + + return `Draft created. Draft Id: ${response.data.id}`; + } +} + +export type CreateDraftSchema = { + message: string; + to: string[]; + subject: string; + cc?: string[]; + bcc?: string[]; +}; diff --git a/libs/langchain-community/src/tools/gmail/descriptions.ts b/libs/langchain-community/src/tools/gmail/descriptions.ts new file mode 100644 index 000000000000..15193966b232 --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/descriptions.ts @@ -0,0 +1,119 @@ +export const CREATE_DRAFT_DESCRIPTION = `A tool for creating draft emails in Gmail. + +INPUT example: +{ + "message": "Hello, this is a test draft", + "to": ["example1@email.com", "example2@email.com"], + "subject": "Test Draft", + "cc": ["cc1@email.com"], + "bcc": ["bcc1@email.com"] +} + +OUTPUT: +The output is a confirmation message with the draft ID. +`; + +export const GET_MESSAGE_DESCRIPTION = `A tool for retrieving a specific email message from Gmail using its message ID. + +INPUT example: +{ + "messageId": "unique_message_id_string" +} + +OUTPUT: +The output includes detailed information about the retrieved email message. This includes the subject, body, sender (from), recipients (to), date of the email, and the message ID. If any of these details are not available in the email, the tool will throw an error indicating the missing information. + +Example Output: +"Result for the prompt unique_message_id_string +{ + 'subject': 'Email Subject', + 'body': 'Email Body Content', + 'from': 'sender@email.com', + 'to': 'recipient@email.com', + 'date': 'Email Date', + 'messageId': 'unique_message_id_string' +}" +`; + +export const GET_THREAD_DESCRIPTION = `A tool for retrieving an entire email thread from Gmail using the thread ID. + +INPUT example: +{ + "threadId": "unique_thread_id_string" +} + +OUTPUT: +The output includes an array of all the messages in the specified thread. Each message in the array contains detailed information including the subject, body, sender (from), recipients (to), date of the email, and the message ID. If any of these details are not available in a message, the tool will throw an error indicating the missing information. + +Example Output: +"Result for the prompt unique_thread_id_string +[ + { + 'subject': 'Email Subject', + 'body': 'Email Body Content', + 'from': 'sender@email.com', + 'to': 'recipient@email.com', + 'date': 'Email Date', + 'messageId': 'unique_message_id_string' + }, + ... (other messages in the thread) +]" +`; + +export const SEND_MESSAGE_DESCRIPTION = `A tool for sending an email message using Gmail. It allows users to specify recipients, subject, and the content of the message, along with optional cc and bcc fields. + +INPUT example: +{ + "message": "Hello, this is a test email", + "to": ["recipient1@email.com", "recipient2@email.com"], + "subject": "Test Email", + "cc": ["cc1@email.com"], + "bcc": ["bcc1@email.com"] +} + +OUTPUT: +The output is a confirmation message with the ID of the sent email. If there is an error during the sending process, the tool will throw an error with a description of the problem. + +Example Output: +"Message sent. Message Id: unique_message_id_string" +`; + +export const SEARCH_DESCRIPTION = `A tool for searching email messages or threads in Gmail using a specific query. It offers the flexibility to choose between messages and threads as the search resource. + +INPUT example: +{ + "query": "specific search query", + "maxResults": 10, // Optional: number of results to return + "resource": "messages" // Optional: can be "messages" or "threads" +} + +OUTPUT: +The output is a JSON list of either email messages or threads, depending on the specified resource, that matches the search query. For 'messages', the output includes details like the message ID, thread ID, snippet, body, subject, and sender of each message. For 'threads', it includes the thread ID, snippet, body, subject, and sender of the first message in each thread. If no data is returned, or if the specified resource is invalid, the tool throws an error with a relevant message. + +Example Output for 'messages': +"Result for the query 'specific search query': +[ + { + 'id': 'message_id', + 'threadId': 'thread_id', + 'snippet': 'message snippet', + 'body': 'message body', + 'subject': 'message subject', + 'sender': 'sender's email' + }, + ... (other messages matching the query) +]" + +Example Output for 'threads': +"Result for the query 'specific search query': +[ + { + 'id': 'thread_id', + 'snippet': 'thread snippet', + 'body': 'first message body', + 'subject': 'first message subject', + 'sender': 'first message sender' + }, + ... (other threads matching the query) +]" +`; diff --git a/libs/langchain-community/src/tools/gmail/get_message.ts b/libs/langchain-community/src/tools/gmail/get_message.ts new file mode 100644 index 000000000000..5864c427d7ce --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/get_message.ts @@ -0,0 +1,95 @@ +import { z } from "zod"; +import { GmailBaseToolParams, GmailBaseTool } from "./base.js"; +import { GET_MESSAGE_DESCRIPTION } from "./descriptions.js"; + +export class GmailGetMessage extends GmailBaseTool { + name = "gmail_get_message"; + + schema = z.object({ + messageId: z.string(), + }); + + description = GET_MESSAGE_DESCRIPTION; + + constructor(fields?: GmailBaseToolParams) { + super(fields); + } + + async _call(arg: z.output) { + const { messageId } = arg; + + const message = await this.gmail.users.messages.get({ + userId: "me", + id: messageId, + }); + + const { data } = message; + + if (!data) { + throw new Error("No data returned from Gmail"); + } + + const { payload } = data; + + if (!payload) { + throw new Error("No payload returned from Gmail"); + } + + const { headers } = payload; + + if (!headers) { + throw new Error("No headers returned from Gmail"); + } + + const subject = headers.find((header) => header.name === "Subject"); + + if (!subject) { + throw new Error("No subject returned from Gmail"); + } + + const body = headers.find((header) => header.name === "Body"); + + if (!body) { + throw new Error("No body returned from Gmail"); + } + + const from = headers.find((header) => header.name === "From"); + + if (!from) { + throw new Error("No from returned from Gmail"); + } + + const to = headers.find((header) => header.name === "To"); + + if (!to) { + throw new Error("No to returned from Gmail"); + } + + const date = headers.find((header) => header.name === "Date"); + + if (!date) { + throw new Error("No date returned from Gmail"); + } + + const messageIdHeader = headers.find( + (header) => header.name === "Message-ID" + ); + + if (!messageIdHeader) { + throw new Error("No message id returned from Gmail"); + } + + return `Result for the prompt ${messageId} \n${JSON.stringify({ + subject: subject.value, + body: body.value, + from: from.value, + to: to.value, + date: date.value, + messageId: messageIdHeader.value, + })}`; + } +} + +export type GetMessageSchema = { + messageId: string; +}; diff --git a/libs/langchain-community/src/tools/gmail/get_thread.ts b/libs/langchain-community/src/tools/gmail/get_thread.ts new file mode 100644 index 000000000000..0310bf053d0c --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/get_thread.ts @@ -0,0 +1,105 @@ +import { z } from "zod"; +import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; +import { GET_THREAD_DESCRIPTION } from "./descriptions.js"; + +export class GmailGetThread extends GmailBaseTool { + name = "gmail_get_thread"; + + schema = z.object({ + threadId: z.string(), + }); + + description = GET_THREAD_DESCRIPTION; + + constructor(fields?: GmailBaseToolParams) { + super(fields); + } + + async _call(arg: z.output) { + const { threadId } = arg; + + const thread = await this.gmail.users.threads.get({ + userId: "me", + id: threadId, + }); + + const { data } = thread; + + if (!data) { + throw new Error("No data returned from Gmail"); + } + + const { messages } = data; + + if (!messages) { + throw new Error("No messages returned from Gmail"); + } + + return `Result for the prompt ${threadId} \n${JSON.stringify( + messages.map((message) => { + const { payload } = message; + + if (!payload) { + throw new Error("No payload returned from Gmail"); + } + + const { headers } = payload; + + if (!headers) { + throw new Error("No headers returned from Gmail"); + } + + const subject = headers.find((header) => header.name === "Subject"); + + if (!subject) { + throw new Error("No subject returned from Gmail"); + } + + const body = headers.find((header) => header.name === "Body"); + + if (!body) { + throw new Error("No body returned from Gmail"); + } + + const from = headers.find((header) => header.name === "From"); + + if (!from) { + throw new Error("No from returned from Gmail"); + } + + const to = headers.find((header) => header.name === "To"); + + if (!to) { + throw new Error("No to returned from Gmail"); + } + + const date = headers.find((header) => header.name === "Date"); + + if (!date) { + throw new Error("No date returned from Gmail"); + } + + const messageIdHeader = headers.find( + (header) => header.name === "Message-ID" + ); + + if (!messageIdHeader) { + throw new Error("No message id returned from Gmail"); + } + + return { + subject: subject.value, + body: body.value, + from: from.value, + to: to.value, + date: date.value, + messageId: messageIdHeader.value, + }; + }) + )}`; + } +} + +export type GetThreadSchema = { + threadId: string; +}; diff --git a/libs/langchain-community/src/tools/gmail/index.ts b/libs/langchain-community/src/tools/gmail/index.ts new file mode 100644 index 000000000000..d2f854da54a4 --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/index.ts @@ -0,0 +1,12 @@ +export { GmailCreateDraft } from "./create_draft.js"; +export { GmailGetMessage } from "./get_message.js"; +export { GmailGetThread } from "./get_thread.js"; +export { GmailSearch } from "./search.js"; +export { GmailSendMessage } from "./send_message.js"; + +export type { GmailBaseToolParams } from "./base.js"; +export type { CreateDraftSchema } from "./create_draft.js"; +export type { GetMessageSchema } from "./get_message.js"; +export type { GetThreadSchema } from "./get_thread.js"; +export type { SearchSchema } from "./search.js"; +export type { SendMessageSchema } from "./send_message.js"; diff --git a/libs/langchain-community/src/tools/gmail/search.ts b/libs/langchain-community/src/tools/gmail/search.ts new file mode 100644 index 000000000000..9957a11c8c3b --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/search.ts @@ -0,0 +1,135 @@ +import { gmail_v1 } from "googleapis"; +import { z } from "zod"; +import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; +import { SEARCH_DESCRIPTION } from "./descriptions.js"; + +export class GmailSearch extends GmailBaseTool { + name = "search_gmail"; + + schema = z.object({ + query: z.string(), + maxResults: z.number().optional(), + resource: z.enum(["messages", "threads"]).optional(), + }); + + description = SEARCH_DESCRIPTION; + + constructor(fields?: GmailBaseToolParams) { + super(fields); + } + + async _call(arg: z.output) { + const { query, maxResults = 10, resource = "messages" } = arg; + + const response = await this.gmail.users.messages.list({ + userId: "me", + q: query, + maxResults, + }); + + const { data } = response; + + if (!data) { + throw new Error("No data returned from Gmail"); + } + + const { messages } = data; + + if (!messages) { + throw new Error("No messages returned from Gmail"); + } + + if (resource === "messages") { + const parsedMessages = await this.parseMessages(messages); + return `Result for the query ${query}:\n${JSON.stringify( + parsedMessages + )}`; + } else if (resource === "threads") { + const parsedThreads = await this.parseThreads(messages); + return `Result for the query ${query}:\n${JSON.stringify(parsedThreads)}`; + } + + throw new Error(`Invalid resource: ${resource}`); + } + + async parseMessages( + messages: gmail_v1.Schema$Message[] + ): Promise { + const parsedMessages = await Promise.all( + messages.map(async (message) => { + const messageData = await this.gmail.users.messages.get({ + userId: "me", + format: "raw", + id: message.id ?? "", + }); + + const headers = messageData.data.payload?.headers || []; + + const subject = headers.find((header) => header.name === "Subject"); + const sender = headers.find((header) => header.name === "From"); + + let body = ""; + if (messageData.data.payload?.parts) { + body = messageData.data.payload.parts + .map((part) => part.body?.data ?? "") + .join(""); + } else if (messageData.data.payload?.body?.data) { + body = messageData.data.payload.body.data; + } + + return { + id: message.id, + threadId: message.threadId, + snippet: message.snippet, + body, + subject, + sender, + }; + }) + ); + return parsedMessages; + } + + async parseThreads( + threads: gmail_v1.Schema$Thread[] + ): Promise { + const parsedThreads = await Promise.all( + threads.map(async (thread) => { + const threadData = await this.gmail.users.threads.get({ + userId: "me", + format: "raw", + id: thread.id ?? "", + }); + + const headers = threadData.data.messages?.[0]?.payload?.headers || []; + + const subject = headers.find((header) => header.name === "Subject"); + const sender = headers.find((header) => header.name === "From"); + + let body = ""; + if (threadData.data.messages?.[0]?.payload?.parts) { + body = threadData.data.messages[0].payload.parts + .map((part) => part.body?.data ?? "") + .join(""); + } else if (threadData.data.messages?.[0]?.payload?.body?.data) { + body = threadData.data.messages[0].payload.body.data; + } + + return { + id: thread.id, + snippet: thread.snippet, + body, + subject, + sender, + }; + }) + ); + return parsedThreads; + } +} + +export type SearchSchema = { + query: string; + maxResults?: number; + resource?: "messages" | "threads"; +}; diff --git a/libs/langchain-community/src/tools/gmail/send_message.ts b/libs/langchain-community/src/tools/gmail/send_message.ts new file mode 100644 index 000000000000..b995dfbff0b2 --- /dev/null +++ b/libs/langchain-community/src/tools/gmail/send_message.ts @@ -0,0 +1,84 @@ +import { z } from "zod"; +import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; +import { GET_MESSAGE_DESCRIPTION } from "./descriptions.js"; + +export class GmailSendMessage extends GmailBaseTool { + name = "gmail_send_message"; + + schema = z.object({ + message: z.string(), + to: z.array(z.string()), + subject: z.string(), + cc: z.array(z.string()).optional(), + bcc: z.array(z.string()).optional(), + }); + + description = GET_MESSAGE_DESCRIPTION; + + constructor(fields?: GmailBaseToolParams) { + super(fields); + } + + private createEmailMessage({ + message, + to, + subject, + cc, + bcc, + }: z.infer): string { + const emailLines: string[] = []; + + // Format the recipient(s) + const formatEmailList = (emails: string | string[]): string => + Array.isArray(emails) ? emails.join(",") : emails; + + emailLines.push(`To: ${formatEmailList(to)}`); + if (cc) emailLines.push(`Cc: ${formatEmailList(cc)}`); + if (bcc) emailLines.push(`Bcc: ${formatEmailList(bcc)}`); + emailLines.push(`Subject: ${subject}`); + emailLines.push(""); + emailLines.push(message); + + // Convert the email message to base64url string + const email = emailLines.join("\r\n").trim(); + // this encode may be an issue + return Buffer.from(email).toString("base64url"); + } + + async _call({ + message, + to, + subject, + cc, + bcc, + }: z.output): Promise { + const rawMessage = this.createEmailMessage({ + message, + to, + subject, + cc, + bcc, + }); + + try { + const response = await this.gmail.users.messages.send({ + userId: "me", + requestBody: { + raw: rawMessage, + }, + }); + + return `Message sent. Message Id: ${response.data.id}`; + } catch (error) { + throw new Error(`An error occurred while sending the message: ${error}`); + } + } +} + +export type SendMessageSchema = { + message: string; + to: string[]; + subject: string; + cc?: string[]; + bcc?: string[]; +}; diff --git a/libs/langchain-community/src/tools/google_calendar/base.ts b/libs/langchain-community/src/tools/google_calendar/base.ts new file mode 100644 index 000000000000..6d4baa7a8957 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/base.ts @@ -0,0 +1,102 @@ +import { google } from "googleapis"; +import { Tool } from "@langchain/core/tools"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { BaseLLM } from "@langchain/core/language_models/llms"; + +export interface GoogleCalendarAgentParams { + credentials?: { + clientEmail?: string; + privateKey?: string; + calendarId?: string; + }; + scopes?: string[]; + model?: BaseLLM; +} + +export class GoogleCalendarBase extends Tool { + name = "Google Calendar"; + + description = + "A tool to lookup Google Calendar events and create events in Google Calendar"; + + protected clientEmail: string; + + protected privateKey: string; + + protected calendarId: string; + + protected scopes: string[]; + + protected llm: BaseLLM; + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + constructor( + fields: GoogleCalendarAgentParams = { + credentials: { + clientEmail: getEnvironmentVariable("GOOGLE_CALENDAR_CLIENT_EMAIL"), + privateKey: getEnvironmentVariable("GOOGLE_CALENDAR_PRIVATE_KEY"), + calendarId: getEnvironmentVariable("GOOGLE_CALENDAR_CALENDAR_ID"), + }, + scopes: [ + "https://www.googleapis.com/auth/calendar", + "https://www.googleapis.com/auth/calendar.events", + ], + } + ) { + super(...arguments); + + if (!fields.model) { + throw new Error("Missing llm instance to interact with Google Calendar"); + } + + if (!fields.credentials) { + throw new Error("Missing credentials to authenticate to Google Calendar"); + } + + if (!fields.credentials.clientEmail) { + throw new Error( + "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" + ); + } + + if (!fields.credentials.privateKey) { + throw new Error( + "Missing GOOGLE_CALENDAR_PRIVATE_KEY to interact with Google Calendar" + ); + } + + if (!fields.credentials.calendarId) { + throw new Error( + "Missing GOOGLE_CALENDAR_CALENDAR_ID to interact with Google Calendar" + ); + } + + this.clientEmail = fields.credentials.clientEmail; + this.privateKey = fields.credentials.privateKey; + this.calendarId = fields.credentials.calendarId; + this.scopes = fields.scopes || []; + this.llm = fields.model; + } + + getModel() { + return this.llm; + } + + async getAuth() { + const auth = new google.auth.JWT( + this.clientEmail, + undefined, + this.privateKey, + this.scopes + ); + + return auth; + } + + async _call(input: string) { + return input; + } +} diff --git a/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts b/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts new file mode 100644 index 000000000000..4ec331768179 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts @@ -0,0 +1,129 @@ +import { google, calendar_v3 } from "googleapis"; +import type { JWT, GaxiosResponse } from "googleapis-common"; +import { PromptTemplate } from "../../../prompts/index.js"; +import { LLMChain } from "../../../chains/index.js"; +import { CREATE_EVENT_PROMPT } from "../prompts/index.js"; +import { getTimezoneOffsetInHours } from "../utils/get-timezone-offset-in-hours.js"; +import { BaseLLM } from "../../../llms/base.js"; +import { CallbackManagerForToolRun } from "../../../callbacks/manager.js"; + +type CreateEventParams = { + eventSummary: string; + eventStartTime: string; + eventEndTime: string; + userTimezone: string; + eventLocation?: string; + eventDescription?: string; +}; + +const createEvent = async ( + { + eventSummary, + eventStartTime, + eventEndTime, + userTimezone, + eventLocation = "", + eventDescription = "", + }: CreateEventParams, + calendarId: string, + auth: JWT +) => { + const calendar = google.calendar("v3"); + const event = { + summary: eventSummary, + location: eventLocation, + description: eventDescription, + start: { + dateTime: eventStartTime, + timeZone: userTimezone, + }, + end: { + dateTime: eventEndTime, + timeZone: userTimezone, + }, + }; + + try { + const createdEvent = await calendar.events.insert({ + auth, + calendarId, + requestBody: event, + }); + + return createdEvent; + } catch (error) { + return { + error: `An error occurred: ${error}`, + }; + } +}; + +type RunCreateEventParams = { + calendarId: string; + auth: JWT; + model: BaseLLM; +}; + +const runCreateEvent = async ( + query: string, + { calendarId, auth, model }: RunCreateEventParams, + runManager?: CallbackManagerForToolRun +) => { + const prompt = new PromptTemplate({ + template: CREATE_EVENT_PROMPT, + inputVariables: ["date", "query", "u_timezone", "dayName"], + }); + const createEventChain = new LLMChain({ + llm: model, + prompt, + }); + + const date = new Date().toISOString(); + const u_timezone = getTimezoneOffsetInHours(); + const dayName = new Date().toLocaleString("en-us", { weekday: "long" }); + + const output = await createEventChain.call( + { + query, + date, + u_timezone, + dayName, + }, + runManager?.getChild() + ); + const loaded = JSON.parse(output.text); + + const [ + eventSummary, + eventStartTime, + eventEndTime, + eventLocation, + eventDescription, + userTimezone, + ] = Object.values(loaded); + + const event = await createEvent( + { + eventSummary, + eventStartTime, + eventEndTime, + userTimezone, + eventLocation, + eventDescription, + } as CreateEventParams, + calendarId, + auth + ); + + if (!(event as { error: string }).error) { + return `Event created successfully, details: event ${ + (event as GaxiosResponse).data.htmlLink + }`; + } + + return `An error occurred creating the event: ${ + (event as { error: string }).error + }`; +}; + +export { runCreateEvent }; diff --git a/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts b/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts new file mode 100644 index 000000000000..c757931e9ca7 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts @@ -0,0 +1,85 @@ +import { calendar_v3 } from "googleapis"; +import type { JWT } from "googleapis-common"; +import { PromptTemplate } from "../../../prompts/index.js"; +import { LLMChain } from "../../../chains/index.js"; +import { VIEW_EVENTS_PROMPT } from "../prompts/index.js"; +import { getTimezoneOffsetInHours } from "../utils/get-timezone-offset-in-hours.js"; +import { BaseLLM } from "../../../llms/base.js"; +import { CallbackManagerForToolRun } from "../../../callbacks/manager.js"; + +type RunViewEventParams = { + calendarId: string; + auth: JWT; + model: BaseLLM; +}; + +const runViewEvents = async ( + query: string, + { model, auth, calendarId }: RunViewEventParams, + runManager?: CallbackManagerForToolRun +) => { + const calendar = new calendar_v3.Calendar({}); + + const prompt = new PromptTemplate({ + template: VIEW_EVENTS_PROMPT, + inputVariables: ["date", "query", "u_timezone", "dayName"], + }); + + const viewEventsChain = new LLMChain({ + llm: model, + prompt, + }); + + const date = new Date().toISOString(); + const u_timezone = getTimezoneOffsetInHours(); + const dayName = new Date().toLocaleString("en-us", { weekday: "long" }); + + const output = await viewEventsChain.call( + { + query, + date, + u_timezone, + dayName, + }, + runManager?.getChild() + ); + const loaded = JSON.parse(output.text); + + try { + const response = await calendar.events.list({ + auth, + calendarId, + ...loaded, + }); + + const curatedItems = + response.data && response.data.items + ? response.data.items.map( + ({ + status, + summary, + description, + start, + end, + }: // eslint-disable-next-line @typescript-eslint/no-explicit-any + any) => ({ + status, + summary, + description, + start, + end, + }) + ) + : []; + + return `Result for the prompt "${query}": \n${JSON.stringify( + curatedItems, + null, + 2 + )}`; + } catch (error) { + return `An error occurred: ${error}`; + } +}; + +export { runViewEvents }; diff --git a/libs/langchain-community/src/tools/google_calendar/create.ts b/libs/langchain-community/src/tools/google_calendar/create.ts new file mode 100644 index 000000000000..fd54ab8b14e7 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/create.ts @@ -0,0 +1,51 @@ +import { CallbackManagerForToolRun } from "@langchain/core/callbacks/manager"; +import { GoogleCalendarBase, GoogleCalendarAgentParams } from "./base.js"; +import { runCreateEvent } from "./commands/run-create-events.js"; +import { CREATE_TOOL_DESCRIPTION } from "./descriptions.js"; + +/** + * @example + * ```typescript + * const googleCalendarCreateTool = new GoogleCalendarCreateTool({ + * credentials: { + * clientEmail: process.env.GOOGLE_CALENDAR_CLIENT_EMAIL, + * privateKey: process.env.GOOGLE_CALENDAR_PRIVATE_KEY, + * calendarId: process.env.GOOGLE_CALENDAR_CALENDAR_ID, + * }, + * scopes: [ + * "https: + * "https: + * ], + * model: new ChatOpenAI({}), + * }); + * const createInput = `Create a meeting with John Doe next Friday at 4pm - adding to the agenda of it the result of 99 + 99`; + * const createResult = await googleCalendarCreateTool.invoke({ + * input: createInput, + * }); + * console.log("Create Result", createResult); + * ``` + */ +export class GoogleCalendarCreateTool extends GoogleCalendarBase { + name = "google_calendar_create"; + + description = CREATE_TOOL_DESCRIPTION; + + constructor(fields: GoogleCalendarAgentParams) { + super(fields); + } + + async _call(query: string, runManager?: CallbackManagerForToolRun) { + const auth = await this.getAuth(); + const model = this.getModel(); + + return runCreateEvent( + query, + { + auth, + model, + calendarId: this.calendarId, + }, + runManager + ); + } +} diff --git a/libs/langchain-community/src/tools/google_calendar/descriptions.ts b/libs/langchain-community/src/tools/google_calendar/descriptions.ts new file mode 100644 index 000000000000..dc0a736c53c2 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/descriptions.ts @@ -0,0 +1,24 @@ +export const CREATE_TOOL_DESCRIPTION = `A tool for creating Google Calendar events and meetings. + +INPUT example: +"action": "google_calendar_create", +"action_input": "create a new meeting with John Doe tomorrow at 4pm" + +OUTPUT: +Output is a confirmation of a created event. +`; + +export const VIEW_TOOL_DESCRIPTION = `A tool for retrieving Google Calendar events and meetings. +INPUT examples: +"action": "google_calendar_view", +"action_input": "display meetings for today" + +"action": "google_calendar_view", +"action_input": "show events for tomorrow" + +"action": "google_calendar_view", +"action_input": "display meetings for tomorrow between 4pm and 8pm" + +OUTPUT: +- title, start time, end time, attendees, description (if available) +`; diff --git a/libs/langchain-community/src/tools/google_calendar/index.ts b/libs/langchain-community/src/tools/google_calendar/index.ts new file mode 100644 index 000000000000..c7c8b3a10699 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/index.ts @@ -0,0 +1,3 @@ +export { GoogleCalendarCreateTool } from "./create.js"; +export { GoogleCalendarViewTool } from "./view.js"; +export type { GoogleCalendarAgentParams } from "./base.js"; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts b/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts new file mode 100644 index 000000000000..b3ac77762ed8 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts @@ -0,0 +1,56 @@ +export const CREATE_EVENT_PROMPT = ` +Date format: YYYY-MM-DDThh:mm:ss+00:00 +Based on this event description: "Joey birthday tomorrow at 7 pm", +output a json of the following parameters: +Today's datetime on UTC time 2023-05-02T10:00:00+00:00, it's Tuesday and timezone +of the user is -5, take into account the timezone of the user and today's date. +1. event_summary +2. event_start_time +3. event_end_time +4. event_location +5. event_description +6. user_timezone +event_summary: +{{ + "event_summary": "Joey birthday", + "event_start_time": "2023-05-03T19:00:00-05:00", + "event_end_time": "2023-05-03T20:00:00-05:00", + "event_location": "", + "event_description": "", + "user_timezone": "America/New_York" +}} + +Date format: YYYY-MM-DDThh:mm:ss+00:00 +Based on this event description: "Create a meeting for 5 pm on Saturday with Joey", +output a json of the following parameters: +Today's datetime on UTC time 2023-05-04T10:00:00+00:00, it's Thursday and timezone +of the user is -5, take into account the timezone of the user and today's date. +1. event_summary +2. event_start_time +3. event_end_time +4. event_location +5. event_description +6. user_timezone +event_summary: +{{ + "event_summary": "Meeting with Joey", + "event_start_time": "2023-05-06T17:00:00-05:00", + "event_end_time": "2023-05-06T18:00:00-05:00", + "event_location": "", + "event_description": "", + "user_timezone": "America/New_York" +}} + +Date format: YYYY-MM-DDThh:mm:ss+00:00 +Based on this event description: "{query}", output a json of the +following parameters: +Today's datetime on UTC time {date}, it's {dayName} and timezone of the user {u_timezone}, +take into account the timezone of the user and today's date. +1. event_summary +2. event_start_time +3. event_end_time +4. event_location +5. event_description +6. user_timezone +event_summary: +`; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/index.ts b/libs/langchain-community/src/tools/google_calendar/prompts/index.ts new file mode 100644 index 000000000000..a8a7ca38343c --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/prompts/index.ts @@ -0,0 +1,2 @@ +export { CREATE_EVENT_PROMPT } from "./create-event-prompt.js"; +export { VIEW_EVENTS_PROMPT } from "./view-events-prompt.js"; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts b/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts new file mode 100644 index 000000000000..0053e9a1b391 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts @@ -0,0 +1,34 @@ +export const VIEW_EVENTS_PROMPT = ` +Date format: YYYY-MM-DDThh:mm:ss+00:00 +Based on this event description: 'View my events on Thursday', +output a json of the following parameters: +Today's datetime on UTC time 2023-05-02T10:00:00+00:00, it's Tuesday and timezone +of the user is -5, take into account the timezone of the user and today's date. +If the user is searching for events with a specific title, person or location, put it into the search_query parameter. +1. time_min +2. time_max +3. user_timezone +4. max_results +5. search_query +event_summary: +{{ + "time_min": "2023-05-04T00:00:00-05:00", + "time_max": "2023-05-04T23:59:59-05:00", + "user_timezone": "America/New_York", + "max_results": 10, + "search_query": "" +}} + +Date format: YYYY-MM-DDThh:mm:ss+00:00 +Based on this event description: '{query}', output a json of the +following parameters: +Today's datetime on UTC time {date}, today it's {dayName} and timezone of the user {u_timezone}, +take into account the timezone of the user and today's date. +If the user is searching for events with a specific title, person or location, put it into the search_query parameter. +1. time_min +2. time_max +3. user_timezone +4. max_results +5. search_query +event_summary: +`; diff --git a/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts b/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts new file mode 100644 index 000000000000..17bed380c094 --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts @@ -0,0 +1,7 @@ +const getTimezoneOffsetInHours = () => { + const offsetInMinutes = new Date().getTimezoneOffset(); + const offsetInHours = -offsetInMinutes / 60; + return offsetInHours; +}; + +export { getTimezoneOffsetInHours }; diff --git a/libs/langchain-community/src/tools/google_calendar/view.ts b/libs/langchain-community/src/tools/google_calendar/view.ts new file mode 100644 index 000000000000..94168bf88cbc --- /dev/null +++ b/libs/langchain-community/src/tools/google_calendar/view.ts @@ -0,0 +1,51 @@ +import { CallbackManagerForToolRun } from "@langchain/core/callbacks/manager"; + +import { GoogleCalendarBase, GoogleCalendarAgentParams } from "./base.js"; +import { VIEW_TOOL_DESCRIPTION } from "./descriptions.js"; + +import { runViewEvents } from "./commands/run-view-events.js"; + +/** + * @example + * ```typescript + * const googleCalendarViewTool = new GoogleCalendarViewTool({ + * credentials: { + * clientEmail: process.env.GOOGLE_CALENDAR_CLIENT_EMAIL, + * privateKey: process.env.GOOGLE_CALENDAR_PRIVATE_KEY, + * calendarId: process.env.GOOGLE_CALENDAR_CALENDAR_ID, + * }, + * scopes: [ + * "https: + * "https: + * ], + * model: new ChatOpenAI({}), + * }); + * const viewInput = `What meetings do I have this week?`; + * const viewResult = await googleCalendarViewTool.invoke({ input: viewInput }); + * console.log("View Result", viewResult); + * ``` + */ +export class GoogleCalendarViewTool extends GoogleCalendarBase { + name = "google_calendar_view"; + + description = VIEW_TOOL_DESCRIPTION; + + constructor(fields: GoogleCalendarAgentParams) { + super(fields); + } + + async _call(query: string, runManager?: CallbackManagerForToolRun) { + const auth = await this.getAuth(); + const model = this.getModel(); + + return runViewEvents( + query, + { + auth, + model, + calendarId: this.calendarId, + }, + runManager + ); + } +} diff --git a/libs/langchain-community/src/tools/google_custom_search.ts b/libs/langchain-community/src/tools/google_custom_search.ts new file mode 100644 index 000000000000..47b5ab6c024c --- /dev/null +++ b/libs/langchain-community/src/tools/google_custom_search.ts @@ -0,0 +1,87 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * Interface for parameters required by GoogleCustomSearch class. + */ +export interface GoogleCustomSearchParams { + apiKey?: string; + googleCSEId?: string; +} + +/** + * Class that uses the Google Search API to perform custom searches. + * Requires environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` to + * be set. + */ +export class GoogleCustomSearch extends Tool { + static lc_name() { + return "GoogleCustomSearch"; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "GOOGLE_API_KEY", + }; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "google-custom-search"; + + protected apiKey: string; + + protected googleCSEId: string; + + description = + "a custom search engine. useful for when you need to answer questions about current events. input should be a search query. outputs a JSON array of results."; + + constructor( + fields: GoogleCustomSearchParams = { + apiKey: getEnvironmentVariable("GOOGLE_API_KEY"), + googleCSEId: getEnvironmentVariable("GOOGLE_CSE_ID"), + } + ) { + super(...arguments); + if (!fields.apiKey) { + throw new Error( + `Google API key not set. You can set it as "GOOGLE_API_KEY" in your environment variables.` + ); + } + if (!fields.googleCSEId) { + throw new Error( + `Google custom search engine id not set. You can set it as "GOOGLE_CSE_ID" in your environment variables.` + ); + } + this.apiKey = fields.apiKey; + this.googleCSEId = fields.googleCSEId; + } + + async _call(input: string) { + const res = await fetch( + `https://www.googleapis.com/customsearch/v1?key=${this.apiKey}&cx=${ + this.googleCSEId + }&q=${encodeURIComponent(input)}` + ); + + if (!res.ok) { + throw new Error( + `Got ${res.status} error from Google custom search: ${res.statusText}` + ); + } + + const json = await res.json(); + + const results = + json?.items?.map( + (item: { title?: string; link?: string; snippet?: string }) => ({ + title: item.title, + link: item.link, + snippet: item.snippet, + }) + ) ?? []; + return JSON.stringify(results); + } +} diff --git a/libs/langchain-community/src/tools/google_places.ts b/libs/langchain-community/src/tools/google_places.ts new file mode 100644 index 000000000000..849e812dde95 --- /dev/null +++ b/libs/langchain-community/src/tools/google_places.ts @@ -0,0 +1,100 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * Interface for parameters required by GooglePlacesAPI class. + */ +export interface GooglePlacesAPIParams { + apiKey?: string; +} + +/** + * Tool that queries the Google Places API + */ +export class GooglePlacesAPI extends Tool { + static lc_name() { + return "GooglePlacesAPI"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "GOOGLE_PLACES_API_KEY", + }; + } + + name = "google_places"; + + protected apiKey: string; + + description = `A wrapper around Google Places API. Useful for when you need to validate or + discover addresses from ambiguous text. Input should be a search query.`; + + constructor(fields?: GooglePlacesAPIParams) { + super(...arguments); + const apiKey = + fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PLACES_API_KEY"); + if (apiKey === undefined) { + throw new Error( + `Google Places API key not set. You can set it as "GOOGLE_PLACES_API_KEY" in your environment variables.` + ); + } + this.apiKey = apiKey; + } + + async _call(input: string) { + const res = await fetch( + `https://places.googleapis.com/v1/places:searchText`, + { + method: "POST", + body: JSON.stringify({ + textQuery: input, + languageCode: "en", + }), + headers: { + "X-Goog-Api-Key": this.apiKey, + "X-Goog-FieldMask": + "places.displayName,places.formattedAddress,places.id,places.internationalPhoneNumber,places.websiteUri", + "Content-Type": "application/json", + }, + } + ); + + if (!res.ok) { + let message; + try { + const json = await res.json(); + message = json.error.message; + } catch (e) { + message = + "Unable to parse error message: Google did not return a JSON response."; + } + throw new Error( + `Got ${res.status}: ${res.statusText} error from Google Places API: ${message}` + ); + } + + const json = await res.json(); + + const results = + json?.places?.map( + (place: { + id?: string; + internationalPhoneNumber?: string; + formattedAddress?: string; + websiteUri?: string; + displayName?: { text?: string }; + }) => ({ + name: place.displayName?.text, + id: place.id, + address: place.formattedAddress, + phoneNumber: place.internationalPhoneNumber, + website: place.websiteUri, + }) + ) ?? []; + return JSON.stringify(results); + } +} diff --git a/libs/langchain-community/src/tools/json.ts b/libs/langchain-community/src/tools/json.ts new file mode 100644 index 000000000000..d921d632874f --- /dev/null +++ b/libs/langchain-community/src/tools/json.ts @@ -0,0 +1,154 @@ +import jsonpointer from "jsonpointer"; +import { Tool, type ToolParams } from "@langchain/core/tools"; +import { Serializable } from "../load/serializable.js"; + +export type Json = + | string + | number + | boolean + | null + | { [key: string]: Json } + | Json[]; + +export type JsonObject = { [key: string]: Json }; + +/** + * Represents a JSON object in the LangChain framework. Provides methods + * to get keys and values from the JSON object. + */ +export class JsonSpec extends Serializable { + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + obj: JsonObject; + + maxValueLength = 4000; + + constructor(obj: JsonObject, max_value_length = 4000) { + super(...arguments); + this.obj = obj; + this.maxValueLength = max_value_length; + } + + /** + * Retrieves all keys at a given path in the JSON object. + * @param input The path to the keys in the JSON object, provided as a string in JSON pointer syntax. + * @returns A string containing all keys at the given path, separated by commas. + */ + public getKeys(input: string): string { + const pointer = jsonpointer.compile(input); + const res = pointer.get(this.obj) as Json; + if (typeof res === "object" && !Array.isArray(res) && res !== null) { + return Object.keys(res) + .map((i) => i.replaceAll("~", "~0").replaceAll("/", "~1")) + .join(", "); + } + + throw new Error( + `Value at ${input} is not a dictionary, get the value directly instead.` + ); + } + + /** + * Retrieves the value at a given path in the JSON object. + * @param input The path to the value in the JSON object, provided as a string in JSON pointer syntax. + * @returns The value at the given path in the JSON object, as a string. If the value is a large dictionary or exceeds the maximum length, a message is returned instead. + */ + public getValue(input: string): string { + const pointer = jsonpointer.compile(input); + const res = pointer.get(this.obj) as Json; + + if (res === null || res === undefined) { + throw new Error(`Value at ${input} is null or undefined.`); + } + + const str = typeof res === "object" ? JSON.stringify(res) : res.toString(); + if ( + typeof res === "object" && + !Array.isArray(res) && + str.length > this.maxValueLength + ) { + return `Value is a large dictionary, should explore its keys directly.`; + } + + if (str.length > this.maxValueLength) { + return `${str.slice(0, this.maxValueLength)}...`; + } + return str; + } +} + +export interface JsonToolFields extends ToolParams { + jsonSpec: JsonSpec; +} + +/** + * A tool in the LangChain framework that lists all keys at a given path + * in a JSON object. + */ +export class JsonListKeysTool extends Tool { + static lc_name() { + return "JsonListKeysTool"; + } + + name = "json_list_keys"; + + jsonSpec: JsonSpec; + + constructor(jsonSpec: JsonSpec); + + constructor(fields: JsonToolFields); + + constructor(fields: JsonSpec | JsonToolFields) { + if (!("jsonSpec" in fields)) { + // eslint-disable-next-line no-param-reassign + fields = { jsonSpec: fields }; + } + super(fields); + + this.jsonSpec = fields.jsonSpec; + } + + /** @ignore */ + async _call(input: string) { + try { + return this.jsonSpec.getKeys(input); + } catch (error) { + return `${error}`; + } + } + + description = `Can be used to list all keys at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the json as json pointer syntax (e.g. /key1/0/key2).`; +} + +/** + * A tool in the LangChain framework that retrieves the value at a given + * path in a JSON object. + */ +export class JsonGetValueTool extends Tool { + static lc_name() { + return "JsonGetValueTool"; + } + + name = "json_get_value"; + + constructor(public jsonSpec: JsonSpec) { + super(); + } + + /** @ignore */ + async _call(input: string) { + try { + return this.jsonSpec.getValue(input); + } catch (error) { + return `${error}`; + } + } + + description = `Can be used to see value in string format at a given path. + Before calling this you should be SURE that the path to this exists. + The input is a text representation of the path to the json as json pointer syntax (e.g. /key1/0/key2).`; +} diff --git a/libs/langchain-community/src/tools/searchapi.ts b/libs/langchain-community/src/tools/searchapi.ts new file mode 100644 index 000000000000..201354730299 --- /dev/null +++ b/libs/langchain-community/src/tools/searchapi.ts @@ -0,0 +1,208 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +type JSONPrimitive = string | number | boolean | null; +type JSONValue = JSONPrimitive | JSONObject | JSONArray; +interface JSONObject { + [key: string]: JSONValue; +} +interface JSONArray extends Array {} + +function isJSONObject(value: JSONValue): value is JSONObject { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +/** + * SearchApiParameters Type Definition. + * + * For more parameters and supported search engines, refer specific engine documentation: + * Google - https://www.searchapi.io/docs/google + * Google News - https://www.searchapi.io/docs/google-news + * Google Scholar - https://www.searchapi.io/docs/google-scholar + * YouTube Transcripts - https://www.searchapi.io/docs/youtube-transcripts + * and others. + * + */ +export type SearchApiParameters = { + [key: string]: JSONValue; +}; + +/** + * SearchApi Class Definition. + * + * Provides a wrapper around the SearchApi. + * + * Ensure you've set the SEARCHAPI_API_KEY environment variable for authentication. + * You can obtain a free API key from https://www.searchapi.io/. + * @example + * ```typescript + * const searchApi = new SearchApi("your-api-key", { + * engine: "google_news", + * }); + * const agent = RunnableSequence.from([ + * ChatPromptTemplate.fromMessages([ + * ["ai", "Answer the following questions using a bulleted list markdown format.""], + * ["human", "{input}"], + * ]), + * new ChatOpenAI({ temperature: 0 }), + * (input: BaseMessageChunk) => ({ + * log: "test", + * returnValues: { + * output: input, + * }, + * }), + * ]); + * const executor = AgentExecutor.fromAgentAndTools({ + * agent, + * tools: [searchApi], + * }); + * const res = await executor.invoke({ + * input: "What's happening in Ukraine today?"", + * }); + * console.log(res); + * ``` + */ +export class SearchApi extends Tool { + static lc_name() { + return "SearchApi"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + /** + * Converts the SearchApi instance to JSON. This method is not implemented + * and will throw an error if called. + * @returns Throws an error. + */ + toJSON() { + return this.toJSONNotImplemented(); + } + + protected apiKey: string; + + protected params: Partial; + + constructor( + apiKey: string | undefined = getEnvironmentVariable("SEARCHAPI_API_KEY"), + params: Partial = {} + ) { + super(...arguments); + + if (!apiKey) { + throw new Error( + "SearchApi requires an API key. Please set it as SEARCHAPI_API_KEY in your .env file, or pass it as a parameter to the SearchApi constructor." + ); + } + + this.apiKey = apiKey; + this.params = params; + } + + name = "search"; + + /** + * Builds a URL for the SearchApi request. + * @param parameters The parameters for the request. + * @returns A string representing the built URL. + */ + protected buildUrl(searchQuery: string): string { + const preparedParams: [string, string][] = Object.entries({ + engine: "google", + api_key: this.apiKey, + ...this.params, + q: searchQuery, + }) + .filter( + ([key, value]) => + value !== undefined && value !== null && key !== "apiKey" + ) + .map(([key, value]) => [key, `${value}`]); + + const searchParams = new URLSearchParams(preparedParams); + return `https://www.searchapi.io/api/v1/search?${searchParams}`; + } + + /** @ignore */ + /** + * Calls the SearchAPI. + * + * Accepts an input query and fetches the result from SearchApi. + * + * @param {string} input - Search query. + * @returns {string} - Formatted search results or an error message. + * + * NOTE: This method is the core search handler and processes various types + * of search results including Google organic results, videos, jobs, and images. + */ + async _call(input: string) { + const resp = await fetch(this.buildUrl(input)); + + const json = await resp.json(); + + if (json.error) { + throw new Error( + `Failed to load search results from SearchApi due to: ${json.error}` + ); + } + + // Google Search results + if (json.answer_box?.answer) { + return json.answer_box.answer; + } + + if (json.answer_box?.snippet) { + return json.answer_box.snippet; + } + + if (json.knowledge_graph?.description) { + return json.knowledge_graph.description; + } + + // Organic results (Google, Google News) + if (json.organic_results) { + const snippets = json.organic_results + .filter((r: JSONObject) => r.snippet) + .map((r: JSONObject) => r.snippet); + return snippets.join("\n"); + } + + // Google Jobs results + if (json.jobs) { + const jobDescriptions = json.jobs + .slice(0, 1) + .filter((r: JSONObject) => r.description) + .map((r: JSONObject) => r.description); + return jobDescriptions.join("\n"); + } + + // Google Videos results + if (json.videos) { + const videoInfo = json.videos + .filter((r: JSONObject) => r.title && r.link) + .map((r: JSONObject) => `Title: "${r.title}" Link: ${r.link}`); + return videoInfo.join("\n"); + } + + // Google Images results + if (json.images) { + const image_results = json.images.slice(0, 15); + const imageInfo = image_results + .filter( + (r: JSONObject) => + r.title && r.original && isJSONObject(r.original) && r.original.link + ) + .map( + (r: JSONObject) => + `Title: "${r.title}" Link: ${(r.original as JSONObject).link}` + ); + return imageInfo.join("\n"); + } + + return "No good search result found"; + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/libs/langchain-community/src/tools/searxng_search.ts b/libs/langchain-community/src/tools/searxng_search.ts new file mode 100644 index 000000000000..7f2a404bfee6 --- /dev/null +++ b/libs/langchain-community/src/tools/searxng_search.ts @@ -0,0 +1,262 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * Interface for the results returned by the Searxng search. + */ +interface SearxngResults { + query: string; + number_of_results: number; + results: Array<{ + url: string; + title: string; + content: string; + img_src: string; + engine: string; + parsed_url: Array; + template: string; + engines: Array; + positions: Array; + score: number; + category: string; + pretty_url: string; + open_group?: boolean; + close_group?: boolean; + }>; + answers: Array; + corrections: Array; + infoboxes: Array<{ + infobox: string; + content: string; + engine: string; + engines: Array; + }>; + suggestions: Array; + unresponsive_engines: Array; +} + +/** + * Interface for custom headers used in the Searxng search. + */ +interface SearxngCustomHeaders { + [key: string]: string; +} + +interface SearxngSearchParams { + /** + * @default 10 + * Number of results included in results + */ + numResults?: number; + /** Comma separated list, specifies the active search categories + * https://docs.searxng.org/user/configured_engines.html#configured-engines + */ + categories?: string; + + /** Comma separated list, specifies the active search engines + * https://docs.searxng.org/user/configured_engines.html#configured-engines + */ + engines?: string; + + /** Code of the language. */ + language?: string; + /** Search page number. */ + pageNumber?: number; + /** + * day / month / year + * + * Time range of search for engines which support it. See if an engine supports time range search in the preferences page of an instance. + */ + timeRange?: number; + + /** + * Throws Error if format is set anything other than "json" + * Output format of results. Format needs to be activated in search: + */ + format?: "json"; + /** Open search results on new tab. */ + resultsOnNewTab?: 0 | 1; + /** Proxy image results through SearXNG. */ + imageProxy?: boolean; + autocomplete?: string; + /** + * Filter search results of engines which support safe search. See if an engine supports safe search in the preferences page of an instance. + */ + safesearch?: 0 | 1 | 2; +} + +/** + * SearxngSearch class represents a meta search engine tool. + * Use this class when you need to answer questions about current events. + * The input should be a search query, and the output is a JSON array of the query results. + * + * note: works best with *agentType*: `structured-chat-zero-shot-react-description` + * https://github.com/searxng/searxng + * @example + * ```typescript + * const executor = AgentExecutor.fromAgentAndTools({ + * agent, + * tools: [ + * new SearxngSearch({ + * params: { + * format: "json", + * engines: "google", + * }, + * headers: {}, + * }), + * ], + * }); + * const result = await executor.invoke({ + * input: `What is Langchain? Describe in 50 words`, + * }); + * ``` + */ +export class SearxngSearch extends Tool { + static lc_name() { + return "SearxngSearch"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "searxng-search"; + + description = + "A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query. Output is a JSON array of the query results"; + + protected apiBase?: string; + + protected params?: SearxngSearchParams = { + numResults: 10, + pageNumber: 1, + format: "json", + imageProxy: true, + safesearch: 0, + }; + + protected headers?: SearxngCustomHeaders; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiBase: "SEARXNG_API_BASE", + }; + } + + /** + * Constructor for the SearxngSearch class + * @param apiBase Base URL of the Searxng instance + * @param params SearxNG parameters + * @param headers Custom headers + */ + constructor({ + apiBase, + params, + headers, + }: { + /** Base URL of Searxng instance */ + apiBase?: string; + + /** SearxNG Paramerters + * + * https://docs.searxng.org/dev/search_api.html check here for more details + */ + params?: SearxngSearchParams; + + /** + * Custom headers + * Set custom headers if you're using a api from RapidAPI (https://rapidapi.com/iamrony777/api/searxng) + * No headers needed for a locally self-hosted instance + */ + headers?: SearxngCustomHeaders; + }) { + super(...arguments); + + this.apiBase = getEnvironmentVariable("SEARXNG_API_BASE") || apiBase; + this.headers = { "content-type": "application/json", ...headers }; + + if (!this.apiBase) { + throw new Error( + `SEARXNG_API_BASE not set. You can set it as "SEARXNG_API_BASE" in your environment variables.` + ); + } + + if (params) { + this.params = { ...this.params, ...params }; + } + } + + /** + * Builds the URL for the Searxng search. + * @param path The path for the URL. + * @param parameters The parameters for the URL. + * @param baseUrl The base URL. + * @returns The complete URL as a string. + */ + protected buildUrl

( + path: string, + parameters: P, + baseUrl: string + ): string { + const nonUndefinedParams: [string, string][] = Object.entries(parameters) + .filter(([_, value]) => value !== undefined) + .map(([key, value]) => [key, value.toString()]); // Avoid string conversion + const searchParams = new URLSearchParams(nonUndefinedParams); + return `${baseUrl}/${path}?${searchParams}`; + } + + async _call(input: string): Promise { + const queryParams = { + q: input, + ...this.params, + }; + const url = this.buildUrl("search", queryParams, this.apiBase as string); + + const resp = await fetch(url, { + method: "POST", + headers: this.headers, + signal: AbortSignal.timeout(5 * 1000), // 5 seconds + }); + + if (!resp.ok) { + throw new Error(resp.statusText); + } + + const res: SearxngResults = await resp.json(); + + if ( + !res.results.length && + !res.answers.length && + !res.infoboxes.length && + !res.suggestions.length + ) { + return "No good results found."; + } else if (res.results.length) { + const response: string[] = []; + + res.results.forEach((r) => { + response.push( + JSON.stringify({ + title: r.title || "", + link: r.url || "", + snippet: r.content || "", + }) + ); + }); + + return response.slice(0, this.params?.numResults).toString(); + } else if (res.answers.length) { + return res.answers[0]; + } else if (res.infoboxes.length) { + return res.infoboxes[0]?.content.replaceAll(/<[^>]+>/gi, ""); + } else if (res.suggestions.length) { + let suggestions = "Suggestions: "; + res.suggestions.forEach((s) => { + suggestions += `${s}, `; + }); + return suggestions; + } else { + return "No good results found."; + } + } +} diff --git a/libs/langchain-community/src/tools/serpapi.ts b/libs/langchain-community/src/tools/serpapi.ts new file mode 100644 index 000000000000..635afa8bb4a1 --- /dev/null +++ b/libs/langchain-community/src/tools/serpapi.ts @@ -0,0 +1,509 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * This does not use the `serpapi` package because it appears to cause issues + * when used in `jest` tests. Part of the issue seems to be that the `serpapi` + * package imports a wasm module to use instead of native `fetch`, which we + * don't want anyway. + * + * NOTE: you must provide location, gl and hl or your region and language will + * may not match your location, and will not be deterministic. + */ + +// Copied over from `serpapi` package +interface BaseParameters { + /** + * Parameter defines the device to use to get the results. It can be set to + * `desktop` (default) to use a regular browser, `tablet` to use a tablet browser + * (currently using iPads), or `mobile` to use a mobile browser (currently + * using iPhones). + */ + device?: "desktop" | "tablet" | "mobile"; + /** + * Parameter will force SerpApi to fetch the Google results even if a cached + * version is already present. A cache is served only if the query and all + * parameters are exactly the same. Cache expires after 1h. Cached searches + * are free, and are not counted towards your searches per month. It can be set + * to `false` (default) to allow results from the cache, or `true` to disallow + * results from the cache. `no_cache` and `async` parameters should not be used together. + */ + no_cache?: boolean; + /** + * Specify the client-side timeout of the request. In milliseconds. + */ + timeout?: number; +} + +export interface SerpAPIParameters extends BaseParameters { + /** + * Search Query + * Parameter defines the query you want to search. You can use anything that you + * would use in a regular Google search. e.g. `inurl:`, `site:`, `intitle:`. We + * also support advanced search query parameters such as as_dt and as_eq. See the + * [full list](https://serpapi.com/advanced-google-query-parameters) of supported + * advanced search query parameters. + */ + q: string; + /** + * Location + * Parameter defines from where you want the search to originate. If several + * locations match the location requested, we'll pick the most popular one. Head to + * [/locations.json API](https://serpapi.com/locations-api) if you need more + * precise control. location and uule parameters can't be used together. Avoid + * utilizing location when setting the location outside the U.S. when using Google + * Shopping and/or Google Product API. + */ + location?: string; + /** + * Encoded Location + * Parameter is the Google encoded location you want to use for the search. uule + * and location parameters can't be used together. + */ + uule?: string; + /** + * Google Place ID + * Parameter defines the id (`CID`) of the Google My Business listing you want to + * scrape. Also known as Google Place ID. + */ + ludocid?: string; + /** + * Additional Google Place ID + * Parameter that you might have to use to force the knowledge graph map view to + * show up. You can find the lsig ID by using our [Local Pack + * API](https://serpapi.com/local-pack) or [Local Places Results + * API](https://serpapi.com/local-results). + * lsig ID is also available via a redirect Google uses within [Google My + * Business](https://www.google.com/business/). + */ + lsig?: string; + /** + * Google Knowledge Graph ID + * Parameter defines the id (`KGMID`) of the Google Knowledge Graph listing you + * want to scrape. Also known as Google Knowledge Graph ID. Searches with kgmid + * parameter will return results for the originally encrypted search parameters. + * For some searches, kgmid may override all other parameters except start, and num + * parameters. + */ + kgmid?: string; + /** + * Google Cached Search Parameters ID + * Parameter defines the cached search parameters of the Google Search you want to + * scrape. Searches with si parameter will return results for the originally + * encrypted search parameters. For some searches, si may override all other + * parameters except start, and num parameters. si can be used to scrape Google + * Knowledge Graph Tabs. + */ + si?: string; + /** + * Domain + * Parameter defines the Google domain to use. It defaults to `google.com`. Head to + * the [Google domains page](https://serpapi.com/google-domains) for a full list of + * supported Google domains. + */ + google_domain?: string; + /** + * Country + * Parameter defines the country to use for the Google search. It's a two-letter + * country code. (e.g., `us` for the United States, `uk` for United Kingdom, or + * `fr` for France). Head to the [Google countries + * page](https://serpapi.com/google-countries) for a full list of supported Google + * countries. + */ + gl?: string; + /** + * Language + * Parameter defines the language to use for the Google search. It's a two-letter + * language code. (e.g., `en` for English, `es` for Spanish, or `fr` for French). + * Head to the [Google languages page](https://serpapi.com/google-languages) for a + * full list of supported Google languages. + */ + hl?: string; + /** + * Set Multiple Languages + * Parameter defines one or multiple languages to limit the search to. It uses + * `lang_{two-letter language code}` to specify languages and `|` as a delimiter. + * (e.g., `lang_fr|lang_de` will only search French and German pages). Head to the + * [Google lr languages page](https://serpapi.com/google-lr-languages) for a full + * list of supported languages. + */ + lr?: string; + /** + * as_dt + * Parameter controls whether to include or exclude results from the site named in + * the as_sitesearch parameter. + */ + as_dt?: string; + /** + * as_epq + * Parameter identifies a phrase that all documents in the search results must + * contain. You can also use the [phrase + * search](https://developers.google.com/custom-search/docs/xml_results#PhraseSearchqt) + * query term to search for a phrase. + */ + as_epq?: string; + /** + * as_eq + * Parameter identifies a word or phrase that should not appear in any documents in + * the search results. You can also use the [exclude + * query](https://developers.google.com/custom-search/docs/xml_results#Excludeqt) + * term to ensure that a particular word or phrase will not appear in the documents + * in a set of search results. + */ + as_eq?: string; + /** + * as_lq + * Parameter specifies that all search results should contain a link to a + * particular URL. You can also use the + * [link:](https://developers.google.com/custom-search/docs/xml_results#BackLinksqt) + * query term for this type of query. + */ + as_lq?: string; + /** + * as_nlo + * Parameter specifies the starting value for a search range. Use as_nlo and as_nhi + * to append an inclusive search range. + */ + as_nlo?: string; + /** + * as_nhi + * Parameter specifies the ending value for a search range. Use as_nlo and as_nhi + * to append an inclusive search range. + */ + as_nhi?: string; + /** + * as_oq + * Parameter provides additional search terms to check for in a document, where + * each document in the search results must contain at least one of the additional + * search terms. You can also use the [Boolean + * OR](https://developers.google.com/custom-search/docs/xml_results#BooleanOrqt) + * query term for this type of query. + */ + as_oq?: string; + /** + * as_q + * Parameter provides search terms to check for in a document. This parameter is + * also commonly used to allow users to specify additional terms to search for + * within a set of search results. + */ + as_q?: string; + /** + * as_qdr + * Parameter requests search results from a specified time period (quick date + * range). The following values are supported: + * `d[number]`: requests results from the specified number of past days. Example + * for the past 10 days: `as_qdr=d10` + * `w[number]`: requests results from the specified number of past weeks. + * `m[number]`: requests results from the specified number of past months. + * `y[number]`: requests results from the specified number of past years. Example + * for the past year: `as_qdr=y` + */ + as_qdr?: string; + /** + * as_rq + * Parameter specifies that all search results should be pages that are related to + * the specified URL. The parameter value should be a URL. You can also use the + * [related:](https://developers.google.com/custom-search/docs/xml_results#RelatedLinksqt) + * query term for this type of query. + */ + as_rq?: string; + /** + * as_sitesearch + * Parameter allows you to specify that all search results should be pages from a + * given site. By setting the as_dt parameter, you can also use it to exclude pages + * from a given site from your search resutls. + */ + as_sitesearch?: string; + /** + * Advanced Search Parameters + * (to be searched) parameter defines advanced search parameters that aren't + * possible in the regular query field. (e.g., advanced search for patents, dates, + * news, videos, images, apps, or text contents). + */ + tbs?: string; + /** + * Adult Content Filtering + * Parameter defines the level of filtering for adult content. It can be set to + * `active`, or `off` (default). + */ + safe?: string; + /** + * Exclude Auto-corrected Results + * Parameter defines the exclusion of results from an auto-corrected query that is + * spelled wrong. It can be set to `1` to exclude these results, or `0` to include + * them (default). + */ + nfpr?: string; + /** + * Results Filtering + * Parameter defines if the filters for 'Similar Results' and 'Omitted Results' are + * on or off. It can be set to `1` (default) to enable these filters, or `0` to + * disable these filters. + */ + filter?: string; + /** + * Search Type + * (to be matched) parameter defines the type of search you want to do. + * It can be set to: + * `(no tbm parameter)`: regular Google Search, + * `isch`: [Google Images API](https://serpapi.com/images-results), + * `lcl` - [Google Local API](https://serpapi.com/local-results) + * `vid`: [Google Videos API](https://serpapi.com/videos-results), + * `nws`: [Google News API](https://serpapi.com/news-results), + * `shop`: [Google Shopping API](https://serpapi.com/shopping-results), + * or any other Google service. + */ + tbm?: string; + /** + * Result Offset + * Parameter defines the result offset. It skips the given number of results. It's + * used for pagination. (e.g., `0` (default) is the first page of results, `10` is + * the 2nd page of results, `20` is the 3rd page of results, etc.). + * Google Local Results only accepts multiples of `20`(e.g. `20` for the second + * page results, `40` for the third page results, etc.) as the start value. + */ + start?: number; + /** + * Number of Results + * Parameter defines the maximum number of results to return. (e.g., `10` (default) + * returns 10 results, `40` returns 40 results, and `100` returns 100 results). + */ + num?: string; + /** + * Page Number (images) + * Parameter defines the page number for [Google + * Images](https://serpapi.com/images-results). There are 100 images per page. This + * parameter is equivalent to start (offset) = ijn * 100. This parameter works only + * for [Google Images](https://serpapi.com/images-results) (set tbm to `isch`). + */ + ijn?: string; +} + +type UrlParameters = Record< + string, + string | number | boolean | undefined | null +>; + +/** + * Wrapper around SerpAPI. + * + * To use, you should have the `serpapi` package installed and the SERPAPI_API_KEY environment variable set. + */ +export class SerpAPI extends Tool { + static lc_name() { + return "SerpAPI"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + toJSON() { + return this.toJSONNotImplemented(); + } + + protected key: string; + + protected params: Partial; + + protected baseUrl: string; + + constructor( + apiKey: string | undefined = getEnvironmentVariable("SERPAPI_API_KEY"), + params: Partial = {}, + baseUrl = "https://serpapi.com" + ) { + super(...arguments); + + if (!apiKey) { + throw new Error( + "SerpAPI API key not set. You can set it as SERPAPI_API_KEY in your .env file, or pass it to SerpAPI." + ); + } + + this.key = apiKey; + this.params = params; + this.baseUrl = baseUrl; + } + + name = "search"; + + /** + * Builds a URL for the SerpAPI request. + * @param path The path for the request. + * @param parameters The parameters for the request. + * @param baseUrl The base URL for the request. + * @returns A string representing the built URL. + */ + protected buildUrl

( + path: string, + parameters: P, + baseUrl: string + ): string { + const nonUndefinedParams: [string, string][] = Object.entries(parameters) + .filter(([_, value]) => value !== undefined) + .map(([key, value]) => [key, `${value}`]); + const searchParams = new URLSearchParams(nonUndefinedParams); + return `${baseUrl}/${path}?${searchParams}`; + } + + /** @ignore */ + async _call(input: string) { + const { timeout, ...params } = this.params; + const resp = await fetch( + this.buildUrl( + "search", + { + ...params, + api_key: this.key, + q: input, + }, + this.baseUrl + ), + { + signal: timeout ? AbortSignal.timeout(timeout) : undefined, + } + ); + + const res = await resp.json(); + + if (res.error) { + throw new Error(`Got error from serpAPI: ${res.error}`); + } + + const answer_box = res.answer_box_list + ? res.answer_box_list[0] + : res.answer_box; + if (answer_box) { + if (answer_box.result) { + return answer_box.result; + } else if (answer_box.answer) { + return answer_box.answer; + } else if (answer_box.snippet) { + return answer_box.snippet; + } else if (answer_box.snippet_highlighted_words) { + return answer_box.snippet_highlighted_words.toString(); + } else { + const answer: { [key: string]: string } = {}; + Object.keys(answer_box) + .filter( + (k) => + !Array.isArray(answer_box[k]) && + typeof answer_box[k] !== "object" && + !( + typeof answer_box[k] === "string" && + answer_box[k].startsWith("http") + ) + ) + .forEach((k) => { + answer[k] = answer_box[k]; + }); + return JSON.stringify(answer); + } + } + + if (res.events_results) { + return JSON.stringify(res.events_results); + } + + if (res.sports_results) { + return JSON.stringify(res.sports_results); + } + + if (res.top_stories) { + return JSON.stringify(res.top_stories); + } + + if (res.news_results) { + return JSON.stringify(res.news_results); + } + + if (res.jobs_results?.jobs) { + return JSON.stringify(res.jobs_results.jobs); + } + + if (res.questions_and_answers) { + return JSON.stringify(res.questions_and_answers); + } + + if (res.popular_destinations?.destinations) { + return JSON.stringify(res.popular_destinations.destinations); + } + + if (res.top_sights?.sights) { + const sights: Array<{ [key: string]: string }> = res.top_sights.sights + .map((s: { [key: string]: string }) => ({ + title: s.title, + description: s.description, + price: s.price, + })) + .slice(0, 8); + return JSON.stringify(sights); + } + + if (res.shopping_results && res.shopping_results[0]?.title) { + return JSON.stringify(res.shopping_results.slice(0, 3)); + } + + if (res.images_results && res.images_results[0]?.thumbnail) { + return res.images_results + .map((ir: { thumbnail: string }) => ir.thumbnail) + .slice(0, 10) + .toString(); + } + + const snippets = []; + if (res.knowledge_graph) { + if (res.knowledge_graph.description) { + snippets.push(res.knowledge_graph.description); + } + + const title = res.knowledge_graph.title || ""; + Object.keys(res.knowledge_graph) + .filter( + (k) => + typeof res.knowledge_graph[k] === "string" && + k !== "title" && + k !== "description" && + !k.endsWith("_stick") && + !k.endsWith("_link") && + !k.startsWith("http") + ) + .forEach((k) => + snippets.push(`${title} ${k}: ${res.knowledge_graph[k]}`) + ); + } + + const first_organic_result = res.organic_results?.[0]; + if (first_organic_result) { + if (first_organic_result.snippet) { + snippets.push(first_organic_result.snippet); + } else if (first_organic_result.snippet_highlighted_words) { + snippets.push(first_organic_result.snippet_highlighted_words); + } else if (first_organic_result.rich_snippet) { + snippets.push(first_organic_result.rich_snippet); + } else if (first_organic_result.rich_snippet_table) { + snippets.push(first_organic_result.rich_snippet_table); + } else if (first_organic_result.link) { + snippets.push(first_organic_result.link); + } + } + + if (res.buying_guide) { + snippets.push(res.buying_guide); + } + + if (res.local_results?.places) { + snippets.push(res.local_results.places); + } + + if (snippets.length > 0) { + return JSON.stringify(snippets); + } else { + return "No good search result found"; + } + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/libs/langchain-community/src/tools/serper.ts b/libs/langchain-community/src/tools/serper.ts new file mode 100644 index 000000000000..ad9d38b4184a --- /dev/null +++ b/libs/langchain-community/src/tools/serper.ts @@ -0,0 +1,111 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Tool } from "@langchain/core/tools"; + +/** + * Defines the parameters that can be passed to the Serper class during + * instantiation. It includes `gl` and `hl` which are optional. + */ +export type SerperParameters = { + gl?: string; + hl?: string; +}; + +/** + * Wrapper around serper. + * + * You can create a free API key at https://serper.dev. + * + * To use, you should have the SERPER_API_KEY environment variable set. + */ +export class Serper extends Tool { + static lc_name() { + return "Serper"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + /** + * Converts the Serper instance to JSON. This method is not implemented + * and will throw an error if called. + * @returns Throws an error. + */ + toJSON() { + return this.toJSONNotImplemented(); + } + + protected key: string; + + protected params: Partial; + + constructor( + apiKey: string | undefined = getEnvironmentVariable("SERPER_API_KEY"), + params: Partial = {} + ) { + super(); + + if (!apiKey) { + throw new Error( + "Serper API key not set. You can set it as SERPER_API_KEY in your .env file, or pass it to Serper." + ); + } + + this.key = apiKey; + this.params = params; + } + + name = "search"; + + /** @ignore */ + async _call(input: string) { + const options = { + method: "POST", + headers: { + "X-API-KEY": this.key, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + q: input, + ...this.params, + }), + }; + + const res = await fetch("https://google.serper.dev/search", options); + + if (!res.ok) { + throw new Error(`Got ${res.status} error from serper: ${res.statusText}`); + } + + const json = await res.json(); + + if (json.answerBox?.answer) { + return json.answerBox.answer; + } + + if (json.answerBox?.snippet) { + return json.answerBox.snippet; + } + + if (json.answerBox?.snippet_highlighted_words) { + return json.answerBox.snippet_highlighted_words[0]; + } + + if (json.sportsResults?.game_spotlight) { + return json.sportsResults.game_spotlight; + } + + if (json.knowledgeGraph?.description) { + return json.knowledgeGraph.description; + } + + if (json.organic?.[0]?.snippet) { + return json.organic[0].snippet; + } + + return "No good search result found"; + } + + description = + "a search engine. useful for when you need to answer questions about current events. input should be a search query."; +} diff --git a/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts b/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts new file mode 100644 index 000000000000..1af4604bd992 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts @@ -0,0 +1,19 @@ +import { test, expect } from "@jest/globals"; +import { AIPluginTool } from "../aiplugin.js"; + +test("AIPluginTool", async () => { + const tool = await AIPluginTool.fromPluginUrl( + "https://www.klarna.com/.well-known/ai-plugin.json" + ); + + expect(await tool.call(undefined)).toMatchInlineSnapshot(` + "Usage Guide: Assistant uses the Klarna plugin to get relevant product suggestions for any shopping or product discovery purpose. Assistant will reply with the following 3 paragraphs 1) Search Results 2) Product Comparison of the Search Results 3) Followup Questions. The first paragraph contains a list of the products with their attributes listed clearly and concisely as bullet points under the product, together with a link to the product and an explanation. Links will always be returned and should be shown to the user. The second paragraph compares the results returned in a summary sentence starting with "In summary". Assistant comparisons consider only the most important features of the products that will help them fit the users request, and each product mention is brief, short and concise. In the third paragraph assistant always asks helpful follow-up questions and end with a question mark. When assistant is asking a follow-up question, it uses it's product expertise to provide information pertaining to the subject of the user's request that may guide them in their search for the right product. + + OpenAPI Spec in JSON or YAML format: + {"openapi":"3.0.1","info":{"version":"v0","title":"Open AI Klarna product Api"},"servers":[{"url":"https://www.klarna.com/us/shopping"}],"tags":[{"name":"open-ai-product-endpoint","description":"Open AI Product Endpoint. Query for products."}],"paths":{"/public/openai/v0/products":{"get":{"tags":["open-ai-product-endpoint"],"summary":"API for fetching Klarna product information","operationId":"productsUsingGET","parameters":[{"name":"countryCode","in":"query","description":"ISO 3166 country code with 2 characters based on the user location. Currently, only US, GB, DE, SE and DK are supported.","required":true,"schema":{"type":"string"}},{"name":"q","in":"query","description":"A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. If the user speaks another language than English, translate their request into English (example: translate fia med knuff to ludo board game)!","required":true,"schema":{"type":"string"}},{"name":"size","in":"query","description":"number of products returned","required":false,"schema":{"type":"integer"}},{"name":"min_price","in":"query","description":"(Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for.","required":false,"schema":{"type":"integer"}},{"name":"max_price","in":"query","description":"(Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for.","required":false,"schema":{"type":"integer"}}],"responses":{"200":{"description":"Products found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProductResponse"}}}},"503":{"description":"one or more services are unavailable"}},"deprecated":false}}},"components":{"schemas":{"Product":{"type":"object","properties":{"attributes":{"type":"array","items":{"type":"string"}},"name":{"type":"string"},"price":{"type":"string"},"url":{"type":"string"}},"title":"Product"},"ProductResponse":{"type":"object","properties":{"products":{"type":"array","items":{"$ref":"#/components/schemas/Product"}}},"title":"ProductResponse"}}}}" + `); + + expect(await tool.call({})).toMatch(/Usage Guide/); + + expect(await tool.call("")).toMatch(/OpenAPI Spec/); +}); diff --git a/libs/langchain-community/src/tools/tests/brave_search.int.test.ts b/libs/langchain-community/src/tools/tests/brave_search.int.test.ts new file mode 100644 index 000000000000..8894e15bfb40 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/brave_search.int.test.ts @@ -0,0 +1,10 @@ +import { test } from "@jest/globals"; +import { BraveSearch } from "../brave_search.js"; + +test.skip("BraveSearchTool", async () => { + const tool = new BraveSearch(); + + const result = await tool.call("What is Langchain?"); + + console.log({ result }); +}); diff --git a/libs/langchain-community/src/tools/tests/chain.test.ts b/libs/langchain-community/src/tools/tests/chain.test.ts new file mode 100644 index 000000000000..8fd8571e83b6 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/chain.test.ts @@ -0,0 +1,142 @@ +import { test, expect, jest } from "@jest/globals"; + +import { ChainTool } from "../chain.js"; +import { LLMChain } from "../../chains/llm_chain.js"; +import { PromptTemplate } from "../../prompts/prompt.js"; +import { LLM } from "../../llms/base.js"; +import { VectorDBQAChain } from "../../chains/vector_db_qa.js"; +import { MemoryVectorStore } from "../../vectorstores/memory.js"; +import { FakeEmbeddings } from "../../embeddings/fake.js"; + +class FakeLLM extends LLM { + _llmType() { + return "fake"; + } + + async _call(prompt: string): Promise { + return prompt; + } +} + +test("chain tool with llm chain and local callback", async () => { + const calls: string[] = []; + const handleToolStart = jest.fn(() => { + calls.push("tool start"); + }); + const handleToolEnd = jest.fn(() => { + calls.push("tool end"); + }); + const handleLLMStart = jest.fn(() => { + calls.push("llm start"); + }); + const handleLLMEnd = jest.fn(() => { + calls.push("llm end"); + }); + const handleChainStart = jest.fn(() => { + calls.push("chain start"); + }); + const handleChainEnd = jest.fn(() => { + calls.push("chain end"); + }); + + const chain = new LLMChain({ + llm: new FakeLLM({}), + prompt: PromptTemplate.fromTemplate("hello world"), + }); + const tool = new ChainTool({ chain, name: "fake", description: "fake" }); + const result = await tool.call("hi", [ + { + awaitHandlers: true, + handleToolStart, + handleToolEnd, + handleLLMStart, + handleLLMEnd, + handleChainStart, + handleChainEnd, + }, + ]); + expect(result).toMatchInlineSnapshot(`"hello world"`); + expect(handleToolStart).toBeCalledTimes(1); + expect(handleToolEnd).toBeCalledTimes(1); + expect(handleLLMStart).toBeCalledTimes(1); + expect(handleLLMEnd).toBeCalledTimes(1); + expect(handleChainStart).toBeCalledTimes(1); + expect(handleChainEnd).toBeCalledTimes(1); + expect(calls).toMatchInlineSnapshot(` + [ + "tool start", + "chain start", + "llm start", + "llm end", + "chain end", + "tool end", + ] + `); +}); + +test("chain tool with vectordbqa chain", async () => { + const calls: string[] = []; + const handleToolStart = jest.fn(() => { + calls.push("tool start"); + }); + const handleToolEnd = jest.fn(() => { + calls.push("tool end"); + }); + const handleLLMStart = jest.fn(() => { + calls.push("llm start"); + }); + const handleLLMEnd = jest.fn(() => { + calls.push("llm end"); + }); + const handleChainStart = jest.fn(() => { + calls.push("chain start"); + }); + const handleChainEnd = jest.fn(() => { + calls.push("chain end"); + }); + + const chain = VectorDBQAChain.fromLLM( + new FakeLLM({}), + await MemoryVectorStore.fromExistingIndex(new FakeEmbeddings()) + ); + const tool = new ChainTool({ chain, name: "fake", description: "fake" }); + const result = await tool.call("hi", [ + { + awaitHandlers: true, + handleToolStart, + handleToolEnd, + handleLLMStart, + handleLLMEnd, + handleChainStart, + handleChainEnd, + }, + ]); + expect(result).toMatchInlineSnapshot(` + "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + + + + Question: hi + Helpful Answer:" + `); + expect(handleToolStart).toBeCalledTimes(1); + expect(handleToolEnd).toBeCalledTimes(1); + expect(handleLLMStart).toBeCalledTimes(1); + expect(handleLLMEnd).toBeCalledTimes(1); + expect(handleChainStart).toBeCalledTimes(3); + expect(handleChainEnd).toBeCalledTimes(3); + expect(calls).toMatchInlineSnapshot(` + [ + "tool start", + "chain start", + "chain start", + "chain start", + "llm start", + "llm end", + "chain end", + "chain end", + "chain end", + "tool end", + ] + `); +}); diff --git a/libs/langchain-community/src/tools/tests/gmail.test.ts b/libs/langchain-community/src/tools/tests/gmail.test.ts new file mode 100644 index 000000000000..e44b6f7fef36 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/gmail.test.ts @@ -0,0 +1,63 @@ +import { jest, expect, describe } from "@jest/globals"; +import { GmailGetMessage } from "../gmail/get_message.js"; + +jest.mock("googleapis", () => ({ + google: { + auth: { + JWT: jest.fn().mockImplementation(() => ({})), + }, + }, +})); + +describe("GmailBaseTool using GmailGetMessage", () => { + it("should be setup with correct parameters", async () => { + const params = { + credentials: { + clientEmail: "test@email.com", + privateKey: "privateKey", + }, + scopes: ["gmail_scope1"], + }; + const instance = new GmailGetMessage(params); + expect(instance.name).toBe("gmail_get_message"); + }); + + it("should throw an error if both privateKey and keyfile are missing", async () => { + const params = { + credentials: {}, + scopes: ["gmail_scope1"], + }; + + expect(() => new GmailGetMessage(params)).toThrow(); + }); + + it("should throw error with only client_email", async () => { + const params = { + credentials: { + clientEmail: "client_email", + }, + }; + + expect(() => new GmailGetMessage(params)).toThrow(); + }); + + it("should throw error with only private_key", async () => { + const params = { + credentials: { + privateKey: "privateKey", + }, + }; + + expect(() => new GmailGetMessage(params)).toThrow(); + }); + + it("should throw error with only keyfile", async () => { + const params = { + credentials: { + keyfile: "keyfile", + }, + }; + + expect(() => new GmailGetMessage(params)).toThrow(); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/google_calendar.test.ts b/libs/langchain-community/src/tools/tests/google_calendar.test.ts new file mode 100644 index 000000000000..78816006f9ef --- /dev/null +++ b/libs/langchain-community/src/tools/tests/google_calendar.test.ts @@ -0,0 +1,110 @@ +import { jest, expect, describe } from "@jest/globals"; +import { LLM } from "../../llms/base.js"; +import { + GoogleCalendarCreateTool, + GoogleCalendarViewTool, +} from "../google_calendar/index.js"; + +jest.mock("googleapis", () => ({ + google: { + auth: { + JWT: jest.fn().mockImplementation(() => ({})), + }, + }, +})); + +jest.mock("../../util/env.js", () => ({ + getEnvironmentVariable: () => "key", +})); + +// jest.mock("../google_calendar/commands/run-create-events.js", () => ({ +// runCreateEvent: jest.fn(), +// })); + +// jest.mock("../google_calendar/commands/run-view-events.js", () => ({ +// runViewEvents: jest.fn(), +// })); + +class FakeLLM extends LLM { + _llmType() { + return "fake"; + } + + async _call(prompt: string): Promise { + return prompt; + } +} + +describe("GoogleCalendarCreateTool", () => { + it("should be setup with correct parameters", async () => { + const params = { + credentials: { + clientEmail: "test@email.com", + privateKey: "privateKey", + calendarId: "calendarId", + }, + model: new FakeLLM({}), + }; + + const instance = new GoogleCalendarCreateTool(params); + expect(instance.name).toBe("google_calendar_create"); + }); + + it("should throw an error if missing credentials", async () => { + const params = { + credentials: {}, + model: new FakeLLM({}), + }; + expect(() => new GoogleCalendarCreateTool(params)).toThrow( + "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" + ); + }); + + it("should throw an error if missing model", async () => { + const params = { + credentials: { + clientEmail: "test", + }, + }; + expect(() => new GoogleCalendarCreateTool(params)).toThrow( + "Missing llm instance to interact with Google Calendar" + ); + }); +}); + +describe("GoogleCalendarViewTool", () => { + it("should be setup with correct parameters", async () => { + const params = { + credentials: { + clientEmail: "test@email.com", + privateKey: "privateKey", + calendarId: "calendarId", + }, + model: new FakeLLM({}), + }; + + const instance = new GoogleCalendarViewTool(params); + expect(instance.name).toBe("google_calendar_view"); + }); + + it("should throw an error if missing credentials", async () => { + const params = { + credentials: {}, + model: new FakeLLM({}), + }; + expect(() => new GoogleCalendarViewTool(params)).toThrow( + "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" + ); + }); + + it("should throw an error if missing model", async () => { + const params = { + credentials: { + clientEmail: "test", + }, + }; + expect(() => new GoogleCalendarViewTool(params)).toThrow( + "Missing llm instance to interact with Google Calendar" + ); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts b/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts new file mode 100644 index 000000000000..5ba1f005c7c9 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts @@ -0,0 +1,10 @@ +import { test } from "@jest/globals"; +import { GoogleCustomSearch } from "../google_custom_search.js"; + +test.skip("GoogleCustomSearchTool", async () => { + const tool = new GoogleCustomSearch(); + + const result = await tool.call("What is Langchain?"); + + console.log({ result }); +}); diff --git a/libs/langchain-community/src/tools/tests/google_places.int.test.ts b/libs/langchain-community/src/tools/tests/google_places.int.test.ts new file mode 100644 index 000000000000..a00bfaefc339 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/google_places.int.test.ts @@ -0,0 +1,26 @@ +import { expect, describe } from "@jest/globals"; +import { GooglePlacesAPI } from "../google_places.js"; + +describe("GooglePlacesAPI", () => { + test("should be setup with correct parameters", async () => { + const instance = new GooglePlacesAPI(); + expect(instance.name).toBe("google_places"); + }); + + test("GooglePlacesAPI returns expected result for valid query", async () => { + const tool = new GooglePlacesAPI(); + + const result = await tool.call("EatonCenter"); + + expect(result).toContain("220 Yonge St"); + expect(result).toContain("CF Toronto Eaton Centre"); + }); + + test("GooglePlacesAPI returns '' for query on an non-existent place", async () => { + const tool = new GooglePlacesAPI(); + + const result = await tool.call("ihfwehnwfi"); + + expect(result).toContain(""); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/searchapi.test.ts b/libs/langchain-community/src/tools/tests/searchapi.test.ts new file mode 100644 index 000000000000..af90291b0cec --- /dev/null +++ b/libs/langchain-community/src/tools/tests/searchapi.test.ts @@ -0,0 +1,20 @@ +import { test, expect } from "@jest/globals"; +import { SearchApi } from "../../tools/searchapi.js"; + +describe("SearchApi test suite", () => { + class SearchApiUrlTester extends SearchApi { + testThisUrl(): string { + return this.buildUrl("Query"); + } + } + + test("Test default url", async () => { + const searchApi = new SearchApiUrlTester("ApiKey", { + hl: "en", + gl: "us", + }); + expect(searchApi.testThisUrl()).toEqual( + "https://www.searchapi.io/api/v1/search?engine=google&api_key=ApiKey&hl=en&gl=us&q=Query" + ); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/serpapi.test.ts b/libs/langchain-community/src/tools/tests/serpapi.test.ts new file mode 100644 index 000000000000..f40f0ab2846f --- /dev/null +++ b/libs/langchain-community/src/tools/tests/serpapi.test.ts @@ -0,0 +1,37 @@ +import { test, expect } from "@jest/globals"; +import { SerpAPI } from "../../tools/serpapi.js"; + +describe("serp api test suite", () => { + class SerpApiUrlTester extends SerpAPI { + testThisUrl(): string { + return this.buildUrl("search", this.params, this.baseUrl); + } + } + + test("Test default url", async () => { + const serpApi = new SerpApiUrlTester( + "Not a real key but constructor error if not set", + { + hl: "en", + gl: "us", + } + ); + expect(serpApi.testThisUrl()).toEqual( + "https://serpapi.com/search?hl=en&gl=us" + ); + }); + + test("Test override url", async () => { + const serpApiProxied = new SerpApiUrlTester( + "Not a real key but constructor error if not set", + { + gl: "us", + }, + "https://totallyProxied.com" + ); + + expect( + serpApiProxied.testThisUrl() === "https://totallyProxied.com/search?gl=us" + ); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts b/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts new file mode 100644 index 000000000000..13c736de8ec9 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts @@ -0,0 +1,124 @@ +import { test, expect, describe } from "@jest/globals"; +import { WebBrowser } from "../webbrowser.js"; +import { ChatOpenAI } from "../../chat_models/openai.js"; +import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import fetchAdapter from "../../util/axios-fetch-adapter.js"; + +describe("webbrowser Test suite", () => { + test("get word of the day", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://www.merriam-webster.com/word-of-the-day","word of the day"` + ); + + expect(result).toContain("Word of the Day:"); + }); + + test("get a summary of the page when empty request with fetch adapter", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ + model, + embeddings, + axiosConfig: { + adapter: fetchAdapter, + }, + }); + const result = await browser.call( + `"https://www.merriam-webster.com/word-of-the-day",""` + ); + + // fuzzy, sometimes its capped and others not + expect(result).toMatch(/word of the day/i); + }); + + test("error no url", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call(`"",""`); + + expect(result).toEqual("TypeError [ERR_INVALID_URL]: Invalid URL"); + }); + + test("error no protocol or malformed", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"www.merriam-webster.com/word-of-the-day","word of the day"` + ); + + expect(result).toEqual("TypeError [ERR_INVALID_URL]: Invalid URL"); + }); + + test("error bad site", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://www.hDjRBKoAD0EIbF29TWM4rbXDGGM5Nhy4uzNEAdDS.com","word of the day"` + ); + + expect(result).toEqual( + "Error: getaddrinfo ENOTFOUND www.hdjrbkoad0eibf29twm4rbxdggm5nhy4uzneadds.com" + ); + }); + + test.skip("get a summary of a page that detects scraping", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://www.musicgateway.com/spotify-pre-save",""` + ); + + expect(result).not.toEqual("Error: http response 403"); + }); + + // cant we figure the headers to fix this? + test.skip("get a summary of a page that detects scraping 2", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://parade.com/991228/marynliles/couples-goals",""` + ); + expect(result).not.toEqual("Error: http response 403"); + }); + + test("get a summary of a page that rejects unauthorized", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://firstround.com/review/how-to-fix-the-co-founder-fights-youre-sick-of-having-lessons-from-couples-therapist-esther-perel",""` + ); + + expect(result).toContain("Esther Perel"); + }); + + // other urls that have done this too + // "https://wsimag.com/economy-and-politics/15473-power-and-money", + // "https://thriveglobal.com/stories/sleep-what-to-do-what-not-to-do", + test.skip("get a summary of a page that redirects too many times", async () => { + const model = new ChatOpenAI({ temperature: 0 }); + const embeddings = new OpenAIEmbeddings(); + + const browser = new WebBrowser({ model, embeddings }); + const result = await browser.call( + `"https://www.healtheuropa.eu/why-mdma-must-be-reclassified-as-a-schedule-2-drug/95780",""` + ); + expect(result).toContain("Beckley Foundation"); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/webbrowser.test.ts b/libs/langchain-community/src/tools/tests/webbrowser.test.ts new file mode 100644 index 000000000000..c950e5fac592 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/webbrowser.test.ts @@ -0,0 +1,36 @@ +import { test, expect, describe } from "@jest/globals"; +import { readFileSync } from "fs"; +import { getText, parseInputs } from "../webbrowser.js"; + +describe("webbrowser Test suite", () => { + const html = readFileSync("./src/tools/fixtures/wordoftheday.html", "utf8"); + + test("parse html to text and links", async () => { + const baseUrl = "https://www.merriam-webster.com/word-of-the-day"; + const text = getText(html, baseUrl, false); + expect(text).toContain("Word of the Day: Foible"); + }); + + test("parseInputs", () => { + expect( + parseInputs(`"https://www.merriam-webster.com/word-of-the-day",""`) + ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); + expect( + parseInputs( + `"https://www.merriam-webster.com/word-of-the-day","word of the day"` + ) + ).toEqual([ + "https://www.merriam-webster.com/word-of-the-day", + "word of the day", + ]); + expect( + parseInputs(`"https://www.merriam-webster.com/word-of-the-day","`) + ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); + expect( + parseInputs(`"https://www.merriam-webster.com/word-of-the-day",`) + ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); + expect( + parseInputs(`"https://www.merriam-webster.com/word-of-the-day"`) + ).toEqual(["https://www.merriam-webster.com/word-of-the-day", undefined]); + }); +}); diff --git a/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts b/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts new file mode 100644 index 000000000000..73c7bbdc8bef --- /dev/null +++ b/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts @@ -0,0 +1,22 @@ +import { test, expect } from "@jest/globals"; +import { WikipediaQueryRun } from "../wikipedia_query_run.js"; + +test.skip("WikipediaQueryRunTool returns a string for valid query", async () => { + const tool = new WikipediaQueryRun(); + const result = await tool.call("Langchain"); + expect(typeof result).toBe("string"); +}); + +test.skip("WikipediaQueryRunTool returns non-empty string for valid query", async () => { + const tool = new WikipediaQueryRun(); + const result = await tool.call("Langchain"); + console.log(result); + expect(result).not.toBe(""); +}); + +test.skip("WikipediaQueryRunTool returns 'No good Wikipedia Search Result was found' for bad query", async () => { + const tool = new WikipediaQueryRun(); + const result = await tool.call("kjdsfklfjskladjflkdsajflkadsjf"); + console.log(result); + expect(result).toBe("No good Wikipedia Search Result was found"); +}); diff --git a/libs/langchain-community/src/tools/tests/wolframalpha.test.ts b/libs/langchain-community/src/tools/tests/wolframalpha.test.ts new file mode 100644 index 000000000000..2c2b7f7fd297 --- /dev/null +++ b/libs/langchain-community/src/tools/tests/wolframalpha.test.ts @@ -0,0 +1,47 @@ +import { jest, afterEach, beforeEach, describe, expect } from "@jest/globals"; +import { WolframAlphaTool } from "../wolframalpha.js"; + +const MOCK_APP_ID = "[MOCK_APP_ID]"; +const QUERY_1 = "What is 2 + 2?"; +const MOCK_ANSWER = "[MOCK_ANSWER]"; + +describe("wolfram alpha test suite", () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let fetchMock: any; + + beforeEach(() => { + fetchMock = jest.spyOn(global, "fetch").mockImplementation( + async () => + ({ + text: () => Promise.resolve(MOCK_ANSWER), + } as Response) + ); + }); + + afterEach(() => { + fetchMock.mockRestore(); + }); + + test("test query parameters passed correctly", async () => { + const wolframAlpha = new WolframAlphaTool({ + appid: MOCK_APP_ID, + }); + await wolframAlpha._call(QUERY_1); + const [url] = fetchMock.mock.calls[0]; + const parsedUrl = new URL(url); + const params = new URLSearchParams(parsedUrl.search); + + expect(fetchMock).toBeCalledTimes(1); + expect(params.get("appid")).toBe(MOCK_APP_ID); + expect(params.get("input")).toBe(QUERY_1); + }); + + test("test answer retrieved", async () => { + const wolframAlpha = new WolframAlphaTool({ + appid: MOCK_APP_ID, + }); + + const answer = await wolframAlpha._call(QUERY_1); + expect(answer).toBe(MOCK_ANSWER); + }); +}); diff --git a/libs/langchain-community/src/tools/wikipedia_query_run.ts b/libs/langchain-community/src/tools/wikipedia_query_run.ts new file mode 100644 index 000000000000..0d8f145fafe3 --- /dev/null +++ b/libs/langchain-community/src/tools/wikipedia_query_run.ts @@ -0,0 +1,185 @@ +import { Tool } from "@langchain/core/tools"; + +/** + * Interface for the parameters that can be passed to the + * WikipediaQueryRun constructor. + */ +export interface WikipediaQueryRunParams { + topKResults?: number; + maxDocContentLength?: number; + baseUrl?: string; +} + +/** + * Type alias for URL parameters. Represents a record where keys are + * strings and values can be string, number, boolean, undefined, or null. + */ +type UrlParameters = Record< + string, + string | number | boolean | undefined | null +>; + +/** + * Interface for the structure of search results returned by the Wikipedia + * API. + */ +interface SearchResults { + query: { + search: Array<{ + title: string; + }>; + }; +} + +/** + * Interface for the structure of a page returned by the Wikipedia API. + */ +interface Page { + pageid: number; + ns: number; + title: string; + extract: string; +} + +/** + * Interface for the structure of a page result returned by the Wikipedia + * API. + */ +interface PageResult { + batchcomplete: string; + query: { + pages: Record; + }; +} + +/** + * Class for interacting with and fetching data from the Wikipedia API. It + * extends the Tool class. + * @example + * ```typescript + * const wikipediaQuery = new WikipediaQueryRun({ + * topKResults: 3, + * maxDocContentLength: 4000, + * }); + * const result = await wikipediaQuery.call("Langchain"); + * ``` + */ +export class WikipediaQueryRun extends Tool { + static lc_name() { + return "WikipediaQueryRun"; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + name = "wikipedia-api"; + + description = + "A tool for interacting with and fetching data from the Wikipedia API."; + + protected topKResults = 3; + + protected maxDocContentLength = 4000; + + protected baseUrl = "https://en.wikipedia.org/w/api.php"; + + constructor(params: WikipediaQueryRunParams = {}) { + super(); + + this.topKResults = params.topKResults ?? this.topKResults; + this.maxDocContentLength = + params.maxDocContentLength ?? this.maxDocContentLength; + this.baseUrl = params.baseUrl ?? this.baseUrl; + } + + async _call(query: string): Promise { + const searchResults = await this._fetchSearchResults(query); + const summaries: string[] = []; + + for ( + let i = 0; + i < Math.min(this.topKResults, searchResults.query.search.length); + i += 1 + ) { + const page = searchResults.query.search[i].title; + const pageDetails = await this._fetchPage(page, true); + + if (pageDetails) { + const summary = `Page: ${page}\nSummary: ${pageDetails.extract}`; + summaries.push(summary); + } + } + + if (summaries.length === 0) { + return "No good Wikipedia Search Result was found"; + } else { + return summaries.join("\n\n").slice(0, this.maxDocContentLength); + } + } + + /** + * Fetches the content of a specific Wikipedia page. It returns the + * extracted content as a string. + * @param page The specific Wikipedia page to fetch its content. + * @param redirect A boolean value to indicate whether to redirect or not. + * @returns The extracted content of the specific Wikipedia page as a string. + */ + public async content(page: string, redirect = true): Promise { + try { + const result = await this._fetchPage(page, redirect); + return result.extract; + } catch (error) { + throw new Error(`Failed to fetch content for page "${page}": ${error}`); + } + } + + /** + * Builds a URL for the Wikipedia API using the provided parameters. + * @param parameters The parameters to be used in building the URL. + * @returns A string representing the built URL. + */ + protected buildUrl

(parameters: P): string { + const nonUndefinedParams: [string, string][] = Object.entries(parameters) + .filter(([_, value]) => value !== undefined) + .map(([key, value]) => [key, `${value}`]); + const searchParams = new URLSearchParams(nonUndefinedParams); + return `${this.baseUrl}?${searchParams}`; + } + + private async _fetchSearchResults(query: string): Promise { + const searchParams = new URLSearchParams({ + action: "query", + list: "search", + srsearch: query, + format: "json", + }); + + const response = await fetch(`${this.baseUrl}?${searchParams.toString()}`); + if (!response.ok) throw new Error("Network response was not ok"); + + const data: SearchResults = await response.json(); + + return data; + } + + private async _fetchPage(page: string, redirect: boolean): Promise { + const params = new URLSearchParams({ + action: "query", + prop: "extracts", + explaintext: "true", + redirects: redirect ? "1" : "0", + format: "json", + titles: page, + }); + + const response = await fetch(`${this.baseUrl}?${params.toString()}`); + if (!response.ok) throw new Error("Network response was not ok"); + + const data: PageResult = await response.json(); + const { pages } = data.query; + const pageId = Object.keys(pages)[0]; + + return pages[pageId]; + } +} diff --git a/libs/langchain-community/src/tools/wolframalpha.ts b/libs/langchain-community/src/tools/wolframalpha.ts new file mode 100644 index 000000000000..730087e22c59 --- /dev/null +++ b/libs/langchain-community/src/tools/wolframalpha.ts @@ -0,0 +1,41 @@ +import { Tool, type ToolParams } from "@langchain/core/tools"; + +/** + * @example + * ```typescript + * const tool = new WolframAlphaTool({ + * appid: "YOUR_APP_ID", + * }); + * const res = await tool.invoke("What is 2 * 2?"); + * ``` + */ +export class WolframAlphaTool extends Tool { + appid: string; + + name = "wolfram_alpha"; + + description = `A wrapper around Wolfram Alpha. Useful for when you need to answer questions about Math, Science, Technology, Culture, Society and Everyday Life. Input should be a search query.`; + + constructor(fields: ToolParams & { appid: string }) { + super(fields); + + this.appid = fields.appid; + } + + get lc_namespace() { + return ["langchain-community", "tools"]; + } + + static lc_name() { + return "WolframAlphaTool"; + } + + async _call(query: string): Promise { + const url = `https://www.wolframalpha.com/api/v1/llm-api?appid=${ + this.appid + }&input=${encodeURIComponent(query)}`; + const res = await fetch(url); + + return res.text(); + } +} From 480104e3af09470c14ee337872e358313f1d3575 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 00:23:59 -0800 Subject: [PATCH 04/22] Add more integrations --- libs/langchain-community/.gitignore | 318 ++++++- libs/langchain-community/package.json | 839 +++++++++++++++++- .../scripts/check-tree-shaking.js | 6 + .../scripts/create-entrypoints.js | 297 ++++++- .../src/cache/cloudflare_kv.ts | 76 ++ libs/langchain-community/src/cache/momento.ts | 173 ++++ .../src/cache/tests/momento.test.ts | 329 +++++++ .../src/cache/tests/upstash_redis.int.test.ts | 38 + .../src/cache/tests/upstash_redis.test.ts | 21 + .../src/cache/upstash_redis.ts | 93 ++ .../src/callbacks/handlers/llmonitor.ts | 338 +++++++ libs/langchain-community/src/chat_models.ts | 88 -- .../src/chat_models/anthropic.ts | 451 ---------- .../tests/chatanthropic.int.test.ts | 313 ------- .../tests/chatbaiduwenxin.int.test.ts | 2 +- .../chat_models/tests/chatbedrock.int.test.ts | 2 +- .../chatcloudflare_workersai.int.test.ts | 6 +- .../tests/chatfireworks.int.test.ts | 4 +- .../tests/chatgooglepalm.int.test.ts | 138 --- .../chat_models/tests/chatgooglepalm.test.ts | 2 +- .../tests/chatgooglevertexai.int.test.ts | 145 --- .../tests/chatgooglevertexai.test.ts | 2 +- .../tests/chatgooglevertexai_web.int.test.ts | 146 --- .../tests/chatgooglevertexai_web.test.ts | 2 +- .../tests/chatiflytekxinghuo.int.test.ts | 2 +- .../tests/chatllama_cpp.int.test.ts | 118 --- .../chat_models/tests/chatollama.int.test.ts | 156 ---- .../tests/chatopenai-extended.int.test.ts | 176 ---- .../tests/chatopenai-vision.int.test.ts | 54 -- .../chat_models/tests/chatopenai.int.test.ts | 777 ---------------- .../src/chat_models/tests/data/hotdog.jpg | Bin 28191 -> 0 bytes .../src/chat_models/tests/minimax.int.test.ts | 10 +- .../src/chat_models/yandex.ts | 1 + .../src/embeddings/bedrock.ts | 142 +++ .../src/embeddings/cloudflare_workersai.ts | 94 ++ .../src/embeddings/cohere.ts | 155 ++++ .../src/embeddings/googlepalm.ts | 107 +++ .../src/embeddings/googlevertexai.ts | 145 +++ .../src/embeddings/gradient_ai.ts | 118 +++ libs/langchain-community/src/embeddings/hf.ts | 77 ++ .../src/embeddings/hf_transformers.ts | 105 +++ .../src/embeddings/llama_cpp.ts | 103 +++ .../src/embeddings/minimax.ts | 222 +++++ .../src/embeddings/ollama.ts | 148 +++ .../src/embeddings/tensorflow.ts | 91 ++ .../src/embeddings/voyage.ts | 152 ++++ .../src/graphs/neo4j_graph.ts | 286 ++++++ .../src/graphs/tests/neo4j_graph.int.test.ts | 56 ++ libs/langchain-community/src/index.ts | 3 - libs/langchain-community/src/llms.ts | 73 -- .../tests/cloudflare_workersai.int.test.ts | 2 +- .../src/llms/tests/fake.test.ts | 98 -- .../src/llms/tests/llama_cpp.int.test.ts | 2 +- .../src/llms/tests/ollama.int.test.ts | 4 +- .../src/llms/tests/openai-chat.int.test.ts | 142 --- .../src/llms/tests/openai.int.test.ts | 310 ------- .../src/llms/tests/prompt_layer.int.test.ts | 95 -- .../src/load/import_constants.ts | 238 ++--- .../src/load/import_map.ts | 103 +-- .../src/load/import_type.d.ts | 555 +++--------- .../src/retrievers/amazon_kendra.ts | 317 +++++++ .../src/retrievers/chaindesk.ts | 97 ++ .../src/retrievers/databerry.ts | 94 ++ .../src/retrievers/metal.ts | 70 ++ .../src/retrievers/supabase.ts | 238 +++++ .../src/retrievers/tavily_search_api.ts | 140 +++ .../tests/amazon_kendra.int.test.ts | 22 + .../src/retrievers/tests/metal.int.test.ts | 22 + .../src/retrievers/tests/supabase.int.test.ts | 27 + .../tests/tavily_search_api.int.test.ts | 17 + .../langchain-community/src/retrievers/zep.ts | 169 ++++ .../src/tests/chat_models.test.ts | 5 - .../src/tests/integration.int.test.ts | 5 - .../src/tests/llms.test.ts | 5 - .../src/tests/vectorstores.test.ts | 5 - .../src/tools/google_calendar/base.ts | 102 --- .../commands/run-create-events.ts | 129 --- .../commands/run-view-events.ts | 85 -- .../src/tools/google_calendar/create.ts | 51 -- .../src/tools/google_calendar/descriptions.ts | 24 - .../src/tools/google_calendar/index.ts | 3 - .../prompts/create-event-prompt.ts | 56 -- .../tools/google_calendar/prompts/index.ts | 2 - .../prompts/view-events-prompt.ts | 34 - .../utils/get-timezone-offset-in-hours.ts | 7 - .../src/tools/google_calendar/view.ts | 51 -- .../src/tools/{IFTTTWebhook.ts => ifttt.ts} | 0 libs/langchain-community/src/tools/json.ts | 154 ---- .../src/tools/tests/aiplugin.int.test.ts | 19 - .../src/tools/tests/brave_search.int.test.ts | 10 - .../src/tools/tests/chain.test.ts | 142 --- .../src/tools/tests/gmail.test.ts | 63 -- .../src/tools/tests/google_calendar.test.ts | 110 --- .../tests/google_custom_search.int.test.ts | 10 - .../src/tools/tests/google_places.int.test.ts | 26 - .../src/tools/tests/searchapi.test.ts | 20 - .../src/tools/tests/serpapi.test.ts | 37 - .../src/tools/tests/webbrowser.int.test.ts | 124 --- .../src/tools/tests/webbrowser.test.ts | 36 - .../src/tools/tests/wikipedia.int.test.ts | 22 - .../src/tools/tests/wolframalpha.test.ts | 47 - .../src/types/openai-types.ts | 4 +- libs/langchain-community/src/util/chunk.ts | 8 + libs/langchain-community/src/util/convex.ts | 82 ++ .../src/util/googlevertexai-connection.ts | 6 +- .../src/util/googlevertexai-webauth.ts | 2 +- libs/langchain-community/src/util/momento.ts | 26 + libs/langchain-community/src/util/testing.ts | 107 +++ libs/langchain-community/src/util/time.ts | 10 + libs/langchain-community/src/vectorstores.ts | 80 -- .../src/vectorstores/analyticdb.ts | 390 ++++++++ .../src/vectorstores/cassandra.ts | 581 ++++++++++++ .../src/vectorstores/chroma.ts | 364 ++++++++ .../src/vectorstores/clickhouse.ts | 338 +++++++ .../src/vectorstores/cloudflare_vectorize.ts | 227 +++++ .../src/vectorstores/convex.ts | 376 ++++++++ .../src/vectorstores/elasticsearch.ts | 341 +++++++ .../src/vectorstores/lancedb.ts | 152 ++++ .../src/vectorstores/milvus.ts | 674 ++++++++++++++ .../src/vectorstores/myscale.ts | 314 +++++++ .../src/vectorstores/neo4j_vector.ts | 731 +++++++++++++++ .../src/vectorstores/opensearch.ts | 326 +++++++ .../src/vectorstores/prisma.ts | 511 +++++++++++ .../src/vectorstores/qdrant.ts | 260 ++++++ .../src/vectorstores/redis.ts | 458 ++++++++++ .../src/vectorstores/rockset.ts | 452 ++++++++++ .../src/vectorstores/singlestore.ts | 294 ++++++ .../vectorstores/tests/analyticdb.int.test.ts | 169 ++++ .../vectorstores/tests/cassandra.int.test.ts | 363 ++++++++ .../src/vectorstores/tests/chroma.int.test.ts | 163 ++++ .../src/vectorstores/tests/chroma.test.ts | 135 +++ .../vectorstores/tests/clickhouse.int.test.ts | 99 +++ .../src/vectorstores/tests/convex.int.test.ts | 43 + .../tests/convex/convex/_generated/api.d.ts | 39 + .../tests/convex/convex/_generated/api.js | 23 + .../convex/convex/_generated/dataModel.d.ts | 56 ++ .../convex/convex/_generated/server.d.ts | 143 +++ .../tests/convex/convex/_generated/server.js | 90 ++ .../tests/convex/convex/langchain/db.ts | 1 + .../vectorstores/tests/convex/convex/lib.ts | 45 + .../tests/convex/convex/schema.ts | 15 + .../vectorstores/tests/convex/package.json | 8 + .../tests/elasticsearch.int.test.ts | 111 +++ .../vectorstores/tests/lancedb.int.test.ts | 45 + .../src/vectorstores/tests/milvus.int.test.ts | 169 ++++ .../vectorstores/tests/myscale.int.test.ts | 90 ++ .../tests/neo4j_vector.int.test.ts | 471 ++++++++++ .../vectorstores/tests/opensearch.int.test.ts | 44 + .../src/vectorstores/tests/qdrant.int.test.ts | 50 ++ .../src/vectorstores/tests/qdrant.test.ts | 33 + .../src/vectorstores/tests/redis.int.test.ts | 71 ++ .../src/vectorstores/tests/redis.test.ts | 222 +++++ .../tests/singlestore.int.test.ts | 175 ++++ .../src/vectorstores/tests/tigris.test.ts | 76 ++ .../vectorstores/tests/typeorm.int.test.ts | 51 ++ .../src/vectorstores/tests/typesense.test.ts | 127 +++ .../vectorstores/tests/vectara.int.test.ts | 225 +++++ .../tests/vercel_postgres.int.test.ts | 129 +++ .../src/vectorstores/tests/voy.int.test.ts | 49 + .../src/vectorstores/tests/voy.test.ts | 56 ++ .../src/vectorstores/tests/xata.int.test.ts | 166 ++++ .../src/vectorstores/tigris.ts | 177 ++++ .../src/vectorstores/typeorm.ts | 298 +++++++ .../src/vectorstores/typesense.ts | 320 +++++++ .../src/vectorstores/vectara.ts | 532 +++++++++++ .../src/vectorstores/vercel_postgres.ts | 393 ++++++++ .../src/vectorstores/voy.ts | 191 ++++ .../src/vectorstores/xata.ts | 149 ++++ yarn.lock | 2 + 169 files changed, 18780 insertions(+), 5494 deletions(-) create mode 100644 libs/langchain-community/src/cache/cloudflare_kv.ts create mode 100644 libs/langchain-community/src/cache/momento.ts create mode 100644 libs/langchain-community/src/cache/tests/momento.test.ts create mode 100644 libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts create mode 100644 libs/langchain-community/src/cache/tests/upstash_redis.test.ts create mode 100644 libs/langchain-community/src/cache/upstash_redis.ts create mode 100644 libs/langchain-community/src/callbacks/handlers/llmonitor.ts delete mode 100644 libs/langchain-community/src/chat_models.ts delete mode 100644 libs/langchain-community/src/chat_models/anthropic.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/data/hotdog.jpg create mode 100644 libs/langchain-community/src/embeddings/bedrock.ts create mode 100644 libs/langchain-community/src/embeddings/cloudflare_workersai.ts create mode 100644 libs/langchain-community/src/embeddings/cohere.ts create mode 100644 libs/langchain-community/src/embeddings/googlepalm.ts create mode 100644 libs/langchain-community/src/embeddings/googlevertexai.ts create mode 100644 libs/langchain-community/src/embeddings/gradient_ai.ts create mode 100644 libs/langchain-community/src/embeddings/hf.ts create mode 100644 libs/langchain-community/src/embeddings/hf_transformers.ts create mode 100644 libs/langchain-community/src/embeddings/llama_cpp.ts create mode 100644 libs/langchain-community/src/embeddings/minimax.ts create mode 100644 libs/langchain-community/src/embeddings/ollama.ts create mode 100644 libs/langchain-community/src/embeddings/tensorflow.ts create mode 100644 libs/langchain-community/src/embeddings/voyage.ts create mode 100644 libs/langchain-community/src/graphs/neo4j_graph.ts create mode 100644 libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts delete mode 100644 libs/langchain-community/src/index.ts delete mode 100644 libs/langchain-community/src/llms.ts delete mode 100644 libs/langchain-community/src/llms/tests/fake.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/openai-chat.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/openai.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts create mode 100644 libs/langchain-community/src/retrievers/amazon_kendra.ts create mode 100644 libs/langchain-community/src/retrievers/chaindesk.ts create mode 100644 libs/langchain-community/src/retrievers/databerry.ts create mode 100644 libs/langchain-community/src/retrievers/metal.ts create mode 100644 libs/langchain-community/src/retrievers/supabase.ts create mode 100644 libs/langchain-community/src/retrievers/tavily_search_api.ts create mode 100644 libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts create mode 100644 libs/langchain-community/src/retrievers/tests/metal.int.test.ts create mode 100644 libs/langchain-community/src/retrievers/tests/supabase.int.test.ts create mode 100644 libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts create mode 100644 libs/langchain-community/src/retrievers/zep.ts delete mode 100644 libs/langchain-community/src/tests/chat_models.test.ts delete mode 100644 libs/langchain-community/src/tests/integration.int.test.ts delete mode 100644 libs/langchain-community/src/tests/llms.test.ts delete mode 100644 libs/langchain-community/src/tests/vectorstores.test.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/base.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/create.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/descriptions.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/index.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/index.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts delete mode 100644 libs/langchain-community/src/tools/google_calendar/view.ts rename libs/langchain-community/src/tools/{IFTTTWebhook.ts => ifttt.ts} (100%) delete mode 100644 libs/langchain-community/src/tools/json.ts delete mode 100644 libs/langchain-community/src/tools/tests/aiplugin.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/brave_search.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/chain.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/gmail.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/google_calendar.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/google_places.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/searchapi.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/serpapi.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/webbrowser.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/webbrowser.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/wikipedia.int.test.ts delete mode 100644 libs/langchain-community/src/tools/tests/wolframalpha.test.ts create mode 100644 libs/langchain-community/src/util/chunk.ts create mode 100644 libs/langchain-community/src/util/convex.ts create mode 100644 libs/langchain-community/src/util/momento.ts create mode 100644 libs/langchain-community/src/util/testing.ts create mode 100644 libs/langchain-community/src/util/time.ts delete mode 100644 libs/langchain-community/src/vectorstores.ts create mode 100644 libs/langchain-community/src/vectorstores/analyticdb.ts create mode 100644 libs/langchain-community/src/vectorstores/cassandra.ts create mode 100644 libs/langchain-community/src/vectorstores/chroma.ts create mode 100644 libs/langchain-community/src/vectorstores/clickhouse.ts create mode 100644 libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts create mode 100644 libs/langchain-community/src/vectorstores/convex.ts create mode 100644 libs/langchain-community/src/vectorstores/elasticsearch.ts create mode 100644 libs/langchain-community/src/vectorstores/lancedb.ts create mode 100644 libs/langchain-community/src/vectorstores/milvus.ts create mode 100644 libs/langchain-community/src/vectorstores/myscale.ts create mode 100644 libs/langchain-community/src/vectorstores/neo4j_vector.ts create mode 100644 libs/langchain-community/src/vectorstores/opensearch.ts create mode 100644 libs/langchain-community/src/vectorstores/prisma.ts create mode 100644 libs/langchain-community/src/vectorstores/qdrant.ts create mode 100644 libs/langchain-community/src/vectorstores/redis.ts create mode 100644 libs/langchain-community/src/vectorstores/rockset.ts create mode 100644 libs/langchain-community/src/vectorstores/singlestore.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/chroma.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/package.json create mode 100644 libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/qdrant.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/redis.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/redis.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/tigris.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/typesense.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/voy.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/voy.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tests/xata.int.test.ts create mode 100644 libs/langchain-community/src/vectorstores/tigris.ts create mode 100644 libs/langchain-community/src/vectorstores/typeorm.ts create mode 100644 libs/langchain-community/src/vectorstores/typesense.ts create mode 100644 libs/langchain-community/src/vectorstores/vectara.ts create mode 100644 libs/langchain-community/src/vectorstores/vercel_postgres.ts create mode 100644 libs/langchain-community/src/vectorstores/voy.ts create mode 100644 libs/langchain-community/src/vectorstores/xata.ts diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index ae701c7fe705..34d8c555299c 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -1,3 +1,315 @@ -node_modules -dist -.yarn +load.cjs +load.js +load.d.ts +load/serializable.cjs +load/serializable.js +load/serializable.d.ts +tools/aiplugin.cjs +tools/aiplugin.js +tools/aiplugin.d.ts +tools/aws_sfn.cjs +tools/aws_sfn.js +tools/aws_sfn.d.ts +tools/bingserpapi.cjs +tools/bingserpapi.js +tools/bingserpapi.d.ts +tools/brave_search.cjs +tools/brave_search.js +tools/brave_search.d.ts +tools/connery.cjs +tools/connery.js +tools/connery.d.ts +tools/dadjokeapi.cjs +tools/dadjokeapi.js +tools/dadjokeapi.d.ts +tools/dataforseo_api_search.cjs +tools/dataforseo_api_search.js +tools/dataforseo_api_search.d.ts +tools/gmail.cjs +tools/gmail.js +tools/gmail.d.ts +tools/google_custom_search.cjs +tools/google_custom_search.js +tools/google_custom_search.d.ts +tools/google_places.cjs +tools/google_places.js +tools/google_places.d.ts +tools/ifttt.cjs +tools/ifttt.js +tools/ifttt.d.ts +tools/searchapi.cjs +tools/searchapi.js +tools/searchapi.d.ts +tools/searxng_search.cjs +tools/searxng_search.js +tools/searxng_search.d.ts +tools/serpapi.cjs +tools/serpapi.js +tools/serpapi.d.ts +tools/serper.cjs +tools/serper.js +tools/serper.d.ts +tools/wikipedia_query_run.cjs +tools/wikipedia_query_run.js +tools/wikipedia_query_run.d.ts +tools/wolframalpha.cjs +tools/wolframalpha.js +tools/wolframalpha.d.ts +embeddings/bedrock.cjs +embeddings/bedrock.js +embeddings/bedrock.d.ts +embeddings/cloudflare_workersai.cjs +embeddings/cloudflare_workersai.js +embeddings/cloudflare_workersai.d.ts +embeddings/cohere.cjs +embeddings/cohere.js +embeddings/cohere.d.ts +embeddings/googlepalm.cjs +embeddings/googlepalm.js +embeddings/googlepalm.d.ts +embeddings/googlevertexai.cjs +embeddings/googlevertexai.js +embeddings/googlevertexai.d.ts +embeddings/gradient_ai.cjs +embeddings/gradient_ai.js +embeddings/gradient_ai.d.ts +embeddings/hf.cjs +embeddings/hf.js +embeddings/hf.d.ts +embeddings/hf_transformers.cjs +embeddings/hf_transformers.js +embeddings/hf_transformers.d.ts +embeddings/llama_cpp.cjs +embeddings/llama_cpp.js +embeddings/llama_cpp.d.ts +embeddings/minimax.cjs +embeddings/minimax.js +embeddings/minimax.d.ts +embeddings/ollama.cjs +embeddings/ollama.js +embeddings/ollama.d.ts +embeddings/tensorflow.cjs +embeddings/tensorflow.js +embeddings/tensorflow.d.ts +embeddings/voyage.cjs +embeddings/voyage.js +embeddings/voyage.d.ts +llms/ai21.cjs +llms/ai21.js +llms/ai21.d.ts +llms/aleph_alpha.cjs +llms/aleph_alpha.js +llms/aleph_alpha.d.ts +llms/bedrock.cjs +llms/bedrock.js +llms/bedrock.d.ts +llms/bedrock/web.cjs +llms/bedrock/web.js +llms/bedrock/web.d.ts +llms/cloudflare_workersai.cjs +llms/cloudflare_workersai.js +llms/cloudflare_workersai.d.ts +llms/cohere.cjs +llms/cohere.js +llms/cohere.d.ts +llms/fireworks.cjs +llms/fireworks.js +llms/fireworks.d.ts +llms/googlepalm.cjs +llms/googlepalm.js +llms/googlepalm.d.ts +llms/googlevertexai.cjs +llms/googlevertexai.js +llms/googlevertexai.d.ts +llms/googlevertexai/web.cjs +llms/googlevertexai/web.js +llms/googlevertexai/web.d.ts +llms/gradient_ai.cjs +llms/gradient_ai.js +llms/gradient_ai.d.ts +llms/hf.cjs +llms/hf.js +llms/hf.d.ts +llms/llama_cpp.cjs +llms/llama_cpp.js +llms/llama_cpp.d.ts +llms/ollama.cjs +llms/ollama.js +llms/ollama.d.ts +llms/portkey.cjs +llms/portkey.js +llms/portkey.d.ts +llms/raycast.cjs +llms/raycast.js +llms/raycast.d.ts +llms/replicate.cjs +llms/replicate.js +llms/replicate.d.ts +llms/sagemaker_endpoint.cjs +llms/sagemaker_endpoint.js +llms/sagemaker_endpoint.d.ts +llms/watsonx_ai.cjs +llms/watsonx_ai.js +llms/watsonx_ai.d.ts +llms/writer.cjs +llms/writer.js +llms/writer.d.ts +llms/yandex.cjs +llms/yandex.js +llms/yandex.d.ts +vectorstores/analyticdb.cjs +vectorstores/analyticdb.js +vectorstores/analyticdb.d.ts +vectorstores/cassandra.cjs +vectorstores/cassandra.js +vectorstores/cassandra.d.ts +vectorstores/chroma.cjs +vectorstores/chroma.js +vectorstores/chroma.d.ts +vectorstores/clickhouse.cjs +vectorstores/clickhouse.js +vectorstores/clickhouse.d.ts +vectorstores/cloudflare_vectorize.cjs +vectorstores/cloudflare_vectorize.js +vectorstores/cloudflare_vectorize.d.ts +vectorstores/convex.cjs +vectorstores/convex.js +vectorstores/convex.d.ts +vectorstores/elasticsearch.cjs +vectorstores/elasticsearch.js +vectorstores/elasticsearch.d.ts +vectorstores/lancedb.cjs +vectorstores/lancedb.js +vectorstores/lancedb.d.ts +vectorstores/milvus.cjs +vectorstores/milvus.js +vectorstores/milvus.d.ts +vectorstores/myscale.cjs +vectorstores/myscale.js +vectorstores/myscale.d.ts +vectorstores/neo4j_vector.cjs +vectorstores/neo4j_vector.js +vectorstores/neo4j_vector.d.ts +vectorstores/opensearch.cjs +vectorstores/opensearch.js +vectorstores/opensearch.d.ts +vectorstores/prisma.cjs +vectorstores/prisma.js +vectorstores/prisma.d.ts +vectorstores/qdrant.cjs +vectorstores/qdrant.js +vectorstores/qdrant.d.ts +vectorstores/redis.cjs +vectorstores/redis.js +vectorstores/redis.d.ts +vectorstores/rockset.cjs +vectorstores/rockset.js +vectorstores/rockset.d.ts +vectorstores/singlestore.cjs +vectorstores/singlestore.js +vectorstores/singlestore.d.ts +vectorstores/tigris.cjs +vectorstores/tigris.js +vectorstores/tigris.d.ts +vectorstores/typeorm.cjs +vectorstores/typeorm.js +vectorstores/typeorm.d.ts +vectorstores/typesense.cjs +vectorstores/typesense.js +vectorstores/typesense.d.ts +vectorstores/vectara.cjs +vectorstores/vectara.js +vectorstores/vectara.d.ts +vectorstores/vercel_postgres.cjs +vectorstores/vercel_postgres.js +vectorstores/vercel_postgres.d.ts +vectorstores/voy.cjs +vectorstores/voy.js +vectorstores/voy.d.ts +vectorstores/xata.cjs +vectorstores/xata.js +vectorstores/xata.d.ts +chat_models/baiduwenxin.cjs +chat_models/baiduwenxin.js +chat_models/baiduwenxin.d.ts +chat_models/bedrock.cjs +chat_models/bedrock.js +chat_models/bedrock.d.ts +chat_models/bedrock/web.cjs +chat_models/bedrock/web.js +chat_models/bedrock/web.d.ts +chat_models/cloudflare_workersai.cjs +chat_models/cloudflare_workersai.js +chat_models/cloudflare_workersai.d.ts +chat_models/fireworks.cjs +chat_models/fireworks.js +chat_models/fireworks.d.ts +chat_models/googlevertexai.cjs +chat_models/googlevertexai.js +chat_models/googlevertexai.d.ts +chat_models/googlevertexai/web.cjs +chat_models/googlevertexai/web.js +chat_models/googlevertexai/web.d.ts +chat_models/googlepalm.cjs +chat_models/googlepalm.js +chat_models/googlepalm.d.ts +chat_models/iflytek_xinghuo.cjs +chat_models/iflytek_xinghuo.js +chat_models/iflytek_xinghuo.d.ts +chat_models/iflytek_xinghuo/web.cjs +chat_models/iflytek_xinghuo/web.js +chat_models/iflytek_xinghuo/web.d.ts +chat_models/llama_cpp.cjs +chat_models/llama_cpp.js +chat_models/llama_cpp.d.ts +chat_models/minimax.cjs +chat_models/minimax.js +chat_models/minimax.d.ts +chat_models/ollama.cjs +chat_models/ollama.js +chat_models/ollama.d.ts +chat_models/portkey.cjs +chat_models/portkey.js +chat_models/portkey.d.ts +chat_models/yandex.cjs +chat_models/yandex.js +chat_models/yandex.d.ts +callbacks/handlers/llmonitor.cjs +callbacks/handlers/llmonitor.js +callbacks/handlers/llmonitor.d.ts +retrievers/amazon_kendra.cjs +retrievers/amazon_kendra.js +retrievers/amazon_kendra.d.ts +retrievers/chaindesk.cjs +retrievers/chaindesk.js +retrievers/chaindesk.d.ts +retrievers/databerry.cjs +retrievers/databerry.js +retrievers/databerry.d.ts +retrievers/metal.cjs +retrievers/metal.js +retrievers/metal.d.ts +retrievers/supabase.cjs +retrievers/supabase.js +retrievers/supabase.d.ts +retrievers/tavily_search_api.cjs +retrievers/tavily_search_api.js +retrievers/tavily_search_api.d.ts +retrievers/zep.cjs +retrievers/zep.js +retrievers/zep.d.ts +cache/cloudflare_kv.cjs +cache/cloudflare_kv.js +cache/cloudflare_kv.d.ts +cache/momento.cjs +cache/momento.js +cache/momento.d.ts +cache/upstash_redis.cjs +cache/upstash_redis.js +cache/upstash_redis.d.ts +graphs/neo4j_graph.cjs +graphs/neo4j_graph.js +graphs/neo4j_graph.d.ts +index.cjs +index.js +index.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 09ee8ae750cd..ec3fcd3362fe 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -34,7 +34,9 @@ "license": "MIT", "dependencies": { "@langchain/core": "~0.0.9", - "@langchain/openai": "~0.0.1" + "@langchain/openai": "~0.0.1", + "uuid": "^9.0.0", + "zod": "^3.22.3" }, "devDependencies": { "@aws-crypto/sha256-js": "^5.0.0", @@ -592,15 +594,842 @@ "access": "public" }, "exports": { - ".": { - "types": "./index.d.ts", - "import": "./index.js", - "require": "./index.cjs" + "./load": { + "types": "./load.d.ts", + "import": "./load.js", + "require": "./load.cjs" + }, + "./load/serializable": { + "types": "./load/serializable.d.ts", + "import": "./load/serializable.js", + "require": "./load/serializable.cjs" + }, + "./tools/aiplugin": { + "types": "./tools/aiplugin.d.ts", + "import": "./tools/aiplugin.js", + "require": "./tools/aiplugin.cjs" + }, + "./tools/aws_sfn": { + "types": "./tools/aws_sfn.d.ts", + "import": "./tools/aws_sfn.js", + "require": "./tools/aws_sfn.cjs" + }, + "./tools/bingserpapi": { + "types": "./tools/bingserpapi.d.ts", + "import": "./tools/bingserpapi.js", + "require": "./tools/bingserpapi.cjs" + }, + "./tools/brave_search": { + "types": "./tools/brave_search.d.ts", + "import": "./tools/brave_search.js", + "require": "./tools/brave_search.cjs" + }, + "./tools/connery": { + "types": "./tools/connery.d.ts", + "import": "./tools/connery.js", + "require": "./tools/connery.cjs" + }, + "./tools/dadjokeapi": { + "types": "./tools/dadjokeapi.d.ts", + "import": "./tools/dadjokeapi.js", + "require": "./tools/dadjokeapi.cjs" + }, + "./tools/dataforseo_api_search": { + "types": "./tools/dataforseo_api_search.d.ts", + "import": "./tools/dataforseo_api_search.js", + "require": "./tools/dataforseo_api_search.cjs" + }, + "./tools/gmail": { + "types": "./tools/gmail.d.ts", + "import": "./tools/gmail.js", + "require": "./tools/gmail.cjs" + }, + "./tools/google_custom_search": { + "types": "./tools/google_custom_search.d.ts", + "import": "./tools/google_custom_search.js", + "require": "./tools/google_custom_search.cjs" + }, + "./tools/google_places": { + "types": "./tools/google_places.d.ts", + "import": "./tools/google_places.js", + "require": "./tools/google_places.cjs" + }, + "./tools/ifttt": { + "types": "./tools/ifttt.d.ts", + "import": "./tools/ifttt.js", + "require": "./tools/ifttt.cjs" + }, + "./tools/searchapi": { + "types": "./tools/searchapi.d.ts", + "import": "./tools/searchapi.js", + "require": "./tools/searchapi.cjs" + }, + "./tools/searxng_search": { + "types": "./tools/searxng_search.d.ts", + "import": "./tools/searxng_search.js", + "require": "./tools/searxng_search.cjs" + }, + "./tools/serpapi": { + "types": "./tools/serpapi.d.ts", + "import": "./tools/serpapi.js", + "require": "./tools/serpapi.cjs" + }, + "./tools/serper": { + "types": "./tools/serper.d.ts", + "import": "./tools/serper.js", + "require": "./tools/serper.cjs" + }, + "./tools/wikipedia_query_run": { + "types": "./tools/wikipedia_query_run.d.ts", + "import": "./tools/wikipedia_query_run.js", + "require": "./tools/wikipedia_query_run.cjs" + }, + "./tools/wolframalpha": { + "types": "./tools/wolframalpha.d.ts", + "import": "./tools/wolframalpha.js", + "require": "./tools/wolframalpha.cjs" + }, + "./embeddings/bedrock": { + "types": "./embeddings/bedrock.d.ts", + "import": "./embeddings/bedrock.js", + "require": "./embeddings/bedrock.cjs" + }, + "./embeddings/cloudflare_workersai": { + "types": "./embeddings/cloudflare_workersai.d.ts", + "import": "./embeddings/cloudflare_workersai.js", + "require": "./embeddings/cloudflare_workersai.cjs" + }, + "./embeddings/cohere": { + "types": "./embeddings/cohere.d.ts", + "import": "./embeddings/cohere.js", + "require": "./embeddings/cohere.cjs" + }, + "./embeddings/googlepalm": { + "types": "./embeddings/googlepalm.d.ts", + "import": "./embeddings/googlepalm.js", + "require": "./embeddings/googlepalm.cjs" + }, + "./embeddings/googlevertexai": { + "types": "./embeddings/googlevertexai.d.ts", + "import": "./embeddings/googlevertexai.js", + "require": "./embeddings/googlevertexai.cjs" + }, + "./embeddings/gradient_ai": { + "types": "./embeddings/gradient_ai.d.ts", + "import": "./embeddings/gradient_ai.js", + "require": "./embeddings/gradient_ai.cjs" + }, + "./embeddings/hf": { + "types": "./embeddings/hf.d.ts", + "import": "./embeddings/hf.js", + "require": "./embeddings/hf.cjs" + }, + "./embeddings/hf_transformers": { + "types": "./embeddings/hf_transformers.d.ts", + "import": "./embeddings/hf_transformers.js", + "require": "./embeddings/hf_transformers.cjs" + }, + "./embeddings/llama_cpp": { + "types": "./embeddings/llama_cpp.d.ts", + "import": "./embeddings/llama_cpp.js", + "require": "./embeddings/llama_cpp.cjs" + }, + "./embeddings/minimax": { + "types": "./embeddings/minimax.d.ts", + "import": "./embeddings/minimax.js", + "require": "./embeddings/minimax.cjs" + }, + "./embeddings/ollama": { + "types": "./embeddings/ollama.d.ts", + "import": "./embeddings/ollama.js", + "require": "./embeddings/ollama.cjs" + }, + "./embeddings/tensorflow": { + "types": "./embeddings/tensorflow.d.ts", + "import": "./embeddings/tensorflow.js", + "require": "./embeddings/tensorflow.cjs" + }, + "./embeddings/voyage": { + "types": "./embeddings/voyage.d.ts", + "import": "./embeddings/voyage.js", + "require": "./embeddings/voyage.cjs" + }, + "./llms/ai21": { + "types": "./llms/ai21.d.ts", + "import": "./llms/ai21.js", + "require": "./llms/ai21.cjs" + }, + "./llms/aleph_alpha": { + "types": "./llms/aleph_alpha.d.ts", + "import": "./llms/aleph_alpha.js", + "require": "./llms/aleph_alpha.cjs" + }, + "./llms/bedrock": { + "types": "./llms/bedrock.d.ts", + "import": "./llms/bedrock.js", + "require": "./llms/bedrock.cjs" + }, + "./llms/bedrock/web": { + "types": "./llms/bedrock/web.d.ts", + "import": "./llms/bedrock/web.js", + "require": "./llms/bedrock/web.cjs" + }, + "./llms/cloudflare_workersai": { + "types": "./llms/cloudflare_workersai.d.ts", + "import": "./llms/cloudflare_workersai.js", + "require": "./llms/cloudflare_workersai.cjs" + }, + "./llms/cohere": { + "types": "./llms/cohere.d.ts", + "import": "./llms/cohere.js", + "require": "./llms/cohere.cjs" + }, + "./llms/fireworks": { + "types": "./llms/fireworks.d.ts", + "import": "./llms/fireworks.js", + "require": "./llms/fireworks.cjs" + }, + "./llms/googlepalm": { + "types": "./llms/googlepalm.d.ts", + "import": "./llms/googlepalm.js", + "require": "./llms/googlepalm.cjs" + }, + "./llms/googlevertexai": { + "types": "./llms/googlevertexai.d.ts", + "import": "./llms/googlevertexai.js", + "require": "./llms/googlevertexai.cjs" + }, + "./llms/googlevertexai/web": { + "types": "./llms/googlevertexai/web.d.ts", + "import": "./llms/googlevertexai/web.js", + "require": "./llms/googlevertexai/web.cjs" + }, + "./llms/gradient_ai": { + "types": "./llms/gradient_ai.d.ts", + "import": "./llms/gradient_ai.js", + "require": "./llms/gradient_ai.cjs" + }, + "./llms/hf": { + "types": "./llms/hf.d.ts", + "import": "./llms/hf.js", + "require": "./llms/hf.cjs" + }, + "./llms/llama_cpp": { + "types": "./llms/llama_cpp.d.ts", + "import": "./llms/llama_cpp.js", + "require": "./llms/llama_cpp.cjs" + }, + "./llms/ollama": { + "types": "./llms/ollama.d.ts", + "import": "./llms/ollama.js", + "require": "./llms/ollama.cjs" + }, + "./llms/portkey": { + "types": "./llms/portkey.d.ts", + "import": "./llms/portkey.js", + "require": "./llms/portkey.cjs" + }, + "./llms/raycast": { + "types": "./llms/raycast.d.ts", + "import": "./llms/raycast.js", + "require": "./llms/raycast.cjs" + }, + "./llms/replicate": { + "types": "./llms/replicate.d.ts", + "import": "./llms/replicate.js", + "require": "./llms/replicate.cjs" + }, + "./llms/sagemaker_endpoint": { + "types": "./llms/sagemaker_endpoint.d.ts", + "import": "./llms/sagemaker_endpoint.js", + "require": "./llms/sagemaker_endpoint.cjs" + }, + "./llms/watsonx_ai": { + "types": "./llms/watsonx_ai.d.ts", + "import": "./llms/watsonx_ai.js", + "require": "./llms/watsonx_ai.cjs" + }, + "./llms/writer": { + "types": "./llms/writer.d.ts", + "import": "./llms/writer.js", + "require": "./llms/writer.cjs" + }, + "./llms/yandex": { + "types": "./llms/yandex.d.ts", + "import": "./llms/yandex.js", + "require": "./llms/yandex.cjs" + }, + "./vectorstores/analyticdb": { + "types": "./vectorstores/analyticdb.d.ts", + "import": "./vectorstores/analyticdb.js", + "require": "./vectorstores/analyticdb.cjs" + }, + "./vectorstores/cassandra": { + "types": "./vectorstores/cassandra.d.ts", + "import": "./vectorstores/cassandra.js", + "require": "./vectorstores/cassandra.cjs" + }, + "./vectorstores/chroma": { + "types": "./vectorstores/chroma.d.ts", + "import": "./vectorstores/chroma.js", + "require": "./vectorstores/chroma.cjs" + }, + "./vectorstores/clickhouse": { + "types": "./vectorstores/clickhouse.d.ts", + "import": "./vectorstores/clickhouse.js", + "require": "./vectorstores/clickhouse.cjs" + }, + "./vectorstores/cloudflare_vectorize": { + "types": "./vectorstores/cloudflare_vectorize.d.ts", + "import": "./vectorstores/cloudflare_vectorize.js", + "require": "./vectorstores/cloudflare_vectorize.cjs" + }, + "./vectorstores/convex": { + "types": "./vectorstores/convex.d.ts", + "import": "./vectorstores/convex.js", + "require": "./vectorstores/convex.cjs" + }, + "./vectorstores/elasticsearch": { + "types": "./vectorstores/elasticsearch.d.ts", + "import": "./vectorstores/elasticsearch.js", + "require": "./vectorstores/elasticsearch.cjs" + }, + "./vectorstores/lancedb": { + "types": "./vectorstores/lancedb.d.ts", + "import": "./vectorstores/lancedb.js", + "require": "./vectorstores/lancedb.cjs" + }, + "./vectorstores/milvus": { + "types": "./vectorstores/milvus.d.ts", + "import": "./vectorstores/milvus.js", + "require": "./vectorstores/milvus.cjs" + }, + "./vectorstores/myscale": { + "types": "./vectorstores/myscale.d.ts", + "import": "./vectorstores/myscale.js", + "require": "./vectorstores/myscale.cjs" + }, + "./vectorstores/neo4j_vector": { + "types": "./vectorstores/neo4j_vector.d.ts", + "import": "./vectorstores/neo4j_vector.js", + "require": "./vectorstores/neo4j_vector.cjs" + }, + "./vectorstores/opensearch": { + "types": "./vectorstores/opensearch.d.ts", + "import": "./vectorstores/opensearch.js", + "require": "./vectorstores/opensearch.cjs" + }, + "./vectorstores/prisma": { + "types": "./vectorstores/prisma.d.ts", + "import": "./vectorstores/prisma.js", + "require": "./vectorstores/prisma.cjs" + }, + "./vectorstores/qdrant": { + "types": "./vectorstores/qdrant.d.ts", + "import": "./vectorstores/qdrant.js", + "require": "./vectorstores/qdrant.cjs" + }, + "./vectorstores/redis": { + "types": "./vectorstores/redis.d.ts", + "import": "./vectorstores/redis.js", + "require": "./vectorstores/redis.cjs" + }, + "./vectorstores/rockset": { + "types": "./vectorstores/rockset.d.ts", + "import": "./vectorstores/rockset.js", + "require": "./vectorstores/rockset.cjs" + }, + "./vectorstores/singlestore": { + "types": "./vectorstores/singlestore.d.ts", + "import": "./vectorstores/singlestore.js", + "require": "./vectorstores/singlestore.cjs" + }, + "./vectorstores/tigris": { + "types": "./vectorstores/tigris.d.ts", + "import": "./vectorstores/tigris.js", + "require": "./vectorstores/tigris.cjs" + }, + "./vectorstores/typeorm": { + "types": "./vectorstores/typeorm.d.ts", + "import": "./vectorstores/typeorm.js", + "require": "./vectorstores/typeorm.cjs" + }, + "./vectorstores/typesense": { + "types": "./vectorstores/typesense.d.ts", + "import": "./vectorstores/typesense.js", + "require": "./vectorstores/typesense.cjs" + }, + "./vectorstores/vectara": { + "types": "./vectorstores/vectara.d.ts", + "import": "./vectorstores/vectara.js", + "require": "./vectorstores/vectara.cjs" + }, + "./vectorstores/vercel_postgres": { + "types": "./vectorstores/vercel_postgres.d.ts", + "import": "./vectorstores/vercel_postgres.js", + "require": "./vectorstores/vercel_postgres.cjs" + }, + "./vectorstores/voy": { + "types": "./vectorstores/voy.d.ts", + "import": "./vectorstores/voy.js", + "require": "./vectorstores/voy.cjs" + }, + "./vectorstores/xata": { + "types": "./vectorstores/xata.d.ts", + "import": "./vectorstores/xata.js", + "require": "./vectorstores/xata.cjs" + }, + "./chat_models/baiduwenxin": { + "types": "./chat_models/baiduwenxin.d.ts", + "import": "./chat_models/baiduwenxin.js", + "require": "./chat_models/baiduwenxin.cjs" + }, + "./chat_models/bedrock": { + "types": "./chat_models/bedrock.d.ts", + "import": "./chat_models/bedrock.js", + "require": "./chat_models/bedrock.cjs" + }, + "./chat_models/bedrock/web": { + "types": "./chat_models/bedrock/web.d.ts", + "import": "./chat_models/bedrock/web.js", + "require": "./chat_models/bedrock/web.cjs" + }, + "./chat_models/cloudflare_workersai": { + "types": "./chat_models/cloudflare_workersai.d.ts", + "import": "./chat_models/cloudflare_workersai.js", + "require": "./chat_models/cloudflare_workersai.cjs" + }, + "./chat_models/fireworks": { + "types": "./chat_models/fireworks.d.ts", + "import": "./chat_models/fireworks.js", + "require": "./chat_models/fireworks.cjs" + }, + "./chat_models/googlevertexai": { + "types": "./chat_models/googlevertexai.d.ts", + "import": "./chat_models/googlevertexai.js", + "require": "./chat_models/googlevertexai.cjs" + }, + "./chat_models/googlevertexai/web": { + "types": "./chat_models/googlevertexai/web.d.ts", + "import": "./chat_models/googlevertexai/web.js", + "require": "./chat_models/googlevertexai/web.cjs" + }, + "./chat_models/googlepalm": { + "types": "./chat_models/googlepalm.d.ts", + "import": "./chat_models/googlepalm.js", + "require": "./chat_models/googlepalm.cjs" + }, + "./chat_models/iflytek_xinghuo": { + "types": "./chat_models/iflytek_xinghuo.d.ts", + "import": "./chat_models/iflytek_xinghuo.js", + "require": "./chat_models/iflytek_xinghuo.cjs" + }, + "./chat_models/iflytek_xinghuo/web": { + "types": "./chat_models/iflytek_xinghuo/web.d.ts", + "import": "./chat_models/iflytek_xinghuo/web.js", + "require": "./chat_models/iflytek_xinghuo/web.cjs" + }, + "./chat_models/llama_cpp": { + "types": "./chat_models/llama_cpp.d.ts", + "import": "./chat_models/llama_cpp.js", + "require": "./chat_models/llama_cpp.cjs" + }, + "./chat_models/minimax": { + "types": "./chat_models/minimax.d.ts", + "import": "./chat_models/minimax.js", + "require": "./chat_models/minimax.cjs" + }, + "./chat_models/ollama": { + "types": "./chat_models/ollama.d.ts", + "import": "./chat_models/ollama.js", + "require": "./chat_models/ollama.cjs" + }, + "./chat_models/portkey": { + "types": "./chat_models/portkey.d.ts", + "import": "./chat_models/portkey.js", + "require": "./chat_models/portkey.cjs" + }, + "./chat_models/yandex": { + "types": "./chat_models/yandex.d.ts", + "import": "./chat_models/yandex.js", + "require": "./chat_models/yandex.cjs" + }, + "./callbacks/handlers/llmonitor": { + "types": "./callbacks/handlers/llmonitor.d.ts", + "import": "./callbacks/handlers/llmonitor.js", + "require": "./callbacks/handlers/llmonitor.cjs" + }, + "./retrievers/amazon_kendra": { + "types": "./retrievers/amazon_kendra.d.ts", + "import": "./retrievers/amazon_kendra.js", + "require": "./retrievers/amazon_kendra.cjs" + }, + "./retrievers/chaindesk": { + "types": "./retrievers/chaindesk.d.ts", + "import": "./retrievers/chaindesk.js", + "require": "./retrievers/chaindesk.cjs" + }, + "./retrievers/databerry": { + "types": "./retrievers/databerry.d.ts", + "import": "./retrievers/databerry.js", + "require": "./retrievers/databerry.cjs" + }, + "./retrievers/metal": { + "types": "./retrievers/metal.d.ts", + "import": "./retrievers/metal.js", + "require": "./retrievers/metal.cjs" + }, + "./retrievers/supabase": { + "types": "./retrievers/supabase.d.ts", + "import": "./retrievers/supabase.js", + "require": "./retrievers/supabase.cjs" + }, + "./retrievers/tavily_search_api": { + "types": "./retrievers/tavily_search_api.d.ts", + "import": "./retrievers/tavily_search_api.js", + "require": "./retrievers/tavily_search_api.cjs" + }, + "./retrievers/zep": { + "types": "./retrievers/zep.d.ts", + "import": "./retrievers/zep.js", + "require": "./retrievers/zep.cjs" + }, + "./cache/cloudflare_kv": { + "types": "./cache/cloudflare_kv.d.ts", + "import": "./cache/cloudflare_kv.js", + "require": "./cache/cloudflare_kv.cjs" + }, + "./cache/momento": { + "types": "./cache/momento.d.ts", + "import": "./cache/momento.js", + "require": "./cache/momento.cjs" + }, + "./cache/upstash_redis": { + "types": "./cache/upstash_redis.d.ts", + "import": "./cache/upstash_redis.js", + "require": "./cache/upstash_redis.cjs" + }, + "./graphs/neo4j_graph": { + "types": "./graphs/neo4j_graph.d.ts", + "import": "./graphs/neo4j_graph.js", + "require": "./graphs/neo4j_graph.cjs" }, "./package.json": "./package.json" }, "files": [ "dist/", + "load.cjs", + "load.js", + "load.d.ts", + "load/serializable.cjs", + "load/serializable.js", + "load/serializable.d.ts", + "tools/aiplugin.cjs", + "tools/aiplugin.js", + "tools/aiplugin.d.ts", + "tools/aws_sfn.cjs", + "tools/aws_sfn.js", + "tools/aws_sfn.d.ts", + "tools/bingserpapi.cjs", + "tools/bingserpapi.js", + "tools/bingserpapi.d.ts", + "tools/brave_search.cjs", + "tools/brave_search.js", + "tools/brave_search.d.ts", + "tools/connery.cjs", + "tools/connery.js", + "tools/connery.d.ts", + "tools/dadjokeapi.cjs", + "tools/dadjokeapi.js", + "tools/dadjokeapi.d.ts", + "tools/dataforseo_api_search.cjs", + "tools/dataforseo_api_search.js", + "tools/dataforseo_api_search.d.ts", + "tools/gmail.cjs", + "tools/gmail.js", + "tools/gmail.d.ts", + "tools/google_custom_search.cjs", + "tools/google_custom_search.js", + "tools/google_custom_search.d.ts", + "tools/google_places.cjs", + "tools/google_places.js", + "tools/google_places.d.ts", + "tools/ifttt.cjs", + "tools/ifttt.js", + "tools/ifttt.d.ts", + "tools/searchapi.cjs", + "tools/searchapi.js", + "tools/searchapi.d.ts", + "tools/searxng_search.cjs", + "tools/searxng_search.js", + "tools/searxng_search.d.ts", + "tools/serpapi.cjs", + "tools/serpapi.js", + "tools/serpapi.d.ts", + "tools/serper.cjs", + "tools/serper.js", + "tools/serper.d.ts", + "tools/wikipedia_query_run.cjs", + "tools/wikipedia_query_run.js", + "tools/wikipedia_query_run.d.ts", + "tools/wolframalpha.cjs", + "tools/wolframalpha.js", + "tools/wolframalpha.d.ts", + "embeddings/bedrock.cjs", + "embeddings/bedrock.js", + "embeddings/bedrock.d.ts", + "embeddings/cloudflare_workersai.cjs", + "embeddings/cloudflare_workersai.js", + "embeddings/cloudflare_workersai.d.ts", + "embeddings/cohere.cjs", + "embeddings/cohere.js", + "embeddings/cohere.d.ts", + "embeddings/googlepalm.cjs", + "embeddings/googlepalm.js", + "embeddings/googlepalm.d.ts", + "embeddings/googlevertexai.cjs", + "embeddings/googlevertexai.js", + "embeddings/googlevertexai.d.ts", + "embeddings/gradient_ai.cjs", + "embeddings/gradient_ai.js", + "embeddings/gradient_ai.d.ts", + "embeddings/hf.cjs", + "embeddings/hf.js", + "embeddings/hf.d.ts", + "embeddings/hf_transformers.cjs", + "embeddings/hf_transformers.js", + "embeddings/hf_transformers.d.ts", + "embeddings/llama_cpp.cjs", + "embeddings/llama_cpp.js", + "embeddings/llama_cpp.d.ts", + "embeddings/minimax.cjs", + "embeddings/minimax.js", + "embeddings/minimax.d.ts", + "embeddings/ollama.cjs", + "embeddings/ollama.js", + "embeddings/ollama.d.ts", + "embeddings/tensorflow.cjs", + "embeddings/tensorflow.js", + "embeddings/tensorflow.d.ts", + "embeddings/voyage.cjs", + "embeddings/voyage.js", + "embeddings/voyage.d.ts", + "llms/ai21.cjs", + "llms/ai21.js", + "llms/ai21.d.ts", + "llms/aleph_alpha.cjs", + "llms/aleph_alpha.js", + "llms/aleph_alpha.d.ts", + "llms/bedrock.cjs", + "llms/bedrock.js", + "llms/bedrock.d.ts", + "llms/bedrock/web.cjs", + "llms/bedrock/web.js", + "llms/bedrock/web.d.ts", + "llms/cloudflare_workersai.cjs", + "llms/cloudflare_workersai.js", + "llms/cloudflare_workersai.d.ts", + "llms/cohere.cjs", + "llms/cohere.js", + "llms/cohere.d.ts", + "llms/fireworks.cjs", + "llms/fireworks.js", + "llms/fireworks.d.ts", + "llms/googlepalm.cjs", + "llms/googlepalm.js", + "llms/googlepalm.d.ts", + "llms/googlevertexai.cjs", + "llms/googlevertexai.js", + "llms/googlevertexai.d.ts", + "llms/googlevertexai/web.cjs", + "llms/googlevertexai/web.js", + "llms/googlevertexai/web.d.ts", + "llms/gradient_ai.cjs", + "llms/gradient_ai.js", + "llms/gradient_ai.d.ts", + "llms/hf.cjs", + "llms/hf.js", + "llms/hf.d.ts", + "llms/llama_cpp.cjs", + "llms/llama_cpp.js", + "llms/llama_cpp.d.ts", + "llms/ollama.cjs", + "llms/ollama.js", + "llms/ollama.d.ts", + "llms/portkey.cjs", + "llms/portkey.js", + "llms/portkey.d.ts", + "llms/raycast.cjs", + "llms/raycast.js", + "llms/raycast.d.ts", + "llms/replicate.cjs", + "llms/replicate.js", + "llms/replicate.d.ts", + "llms/sagemaker_endpoint.cjs", + "llms/sagemaker_endpoint.js", + "llms/sagemaker_endpoint.d.ts", + "llms/watsonx_ai.cjs", + "llms/watsonx_ai.js", + "llms/watsonx_ai.d.ts", + "llms/writer.cjs", + "llms/writer.js", + "llms/writer.d.ts", + "llms/yandex.cjs", + "llms/yandex.js", + "llms/yandex.d.ts", + "vectorstores/analyticdb.cjs", + "vectorstores/analyticdb.js", + "vectorstores/analyticdb.d.ts", + "vectorstores/cassandra.cjs", + "vectorstores/cassandra.js", + "vectorstores/cassandra.d.ts", + "vectorstores/chroma.cjs", + "vectorstores/chroma.js", + "vectorstores/chroma.d.ts", + "vectorstores/clickhouse.cjs", + "vectorstores/clickhouse.js", + "vectorstores/clickhouse.d.ts", + "vectorstores/cloudflare_vectorize.cjs", + "vectorstores/cloudflare_vectorize.js", + "vectorstores/cloudflare_vectorize.d.ts", + "vectorstores/convex.cjs", + "vectorstores/convex.js", + "vectorstores/convex.d.ts", + "vectorstores/elasticsearch.cjs", + "vectorstores/elasticsearch.js", + "vectorstores/elasticsearch.d.ts", + "vectorstores/lancedb.cjs", + "vectorstores/lancedb.js", + "vectorstores/lancedb.d.ts", + "vectorstores/milvus.cjs", + "vectorstores/milvus.js", + "vectorstores/milvus.d.ts", + "vectorstores/myscale.cjs", + "vectorstores/myscale.js", + "vectorstores/myscale.d.ts", + "vectorstores/neo4j_vector.cjs", + "vectorstores/neo4j_vector.js", + "vectorstores/neo4j_vector.d.ts", + "vectorstores/opensearch.cjs", + "vectorstores/opensearch.js", + "vectorstores/opensearch.d.ts", + "vectorstores/prisma.cjs", + "vectorstores/prisma.js", + "vectorstores/prisma.d.ts", + "vectorstores/qdrant.cjs", + "vectorstores/qdrant.js", + "vectorstores/qdrant.d.ts", + "vectorstores/redis.cjs", + "vectorstores/redis.js", + "vectorstores/redis.d.ts", + "vectorstores/rockset.cjs", + "vectorstores/rockset.js", + "vectorstores/rockset.d.ts", + "vectorstores/singlestore.cjs", + "vectorstores/singlestore.js", + "vectorstores/singlestore.d.ts", + "vectorstores/tigris.cjs", + "vectorstores/tigris.js", + "vectorstores/tigris.d.ts", + "vectorstores/typeorm.cjs", + "vectorstores/typeorm.js", + "vectorstores/typeorm.d.ts", + "vectorstores/typesense.cjs", + "vectorstores/typesense.js", + "vectorstores/typesense.d.ts", + "vectorstores/vectara.cjs", + "vectorstores/vectara.js", + "vectorstores/vectara.d.ts", + "vectorstores/vercel_postgres.cjs", + "vectorstores/vercel_postgres.js", + "vectorstores/vercel_postgres.d.ts", + "vectorstores/voy.cjs", + "vectorstores/voy.js", + "vectorstores/voy.d.ts", + "vectorstores/xata.cjs", + "vectorstores/xata.js", + "vectorstores/xata.d.ts", + "chat_models/baiduwenxin.cjs", + "chat_models/baiduwenxin.js", + "chat_models/baiduwenxin.d.ts", + "chat_models/bedrock.cjs", + "chat_models/bedrock.js", + "chat_models/bedrock.d.ts", + "chat_models/bedrock/web.cjs", + "chat_models/bedrock/web.js", + "chat_models/bedrock/web.d.ts", + "chat_models/cloudflare_workersai.cjs", + "chat_models/cloudflare_workersai.js", + "chat_models/cloudflare_workersai.d.ts", + "chat_models/fireworks.cjs", + "chat_models/fireworks.js", + "chat_models/fireworks.d.ts", + "chat_models/googlevertexai.cjs", + "chat_models/googlevertexai.js", + "chat_models/googlevertexai.d.ts", + "chat_models/googlevertexai/web.cjs", + "chat_models/googlevertexai/web.js", + "chat_models/googlevertexai/web.d.ts", + "chat_models/googlepalm.cjs", + "chat_models/googlepalm.js", + "chat_models/googlepalm.d.ts", + "chat_models/iflytek_xinghuo.cjs", + "chat_models/iflytek_xinghuo.js", + "chat_models/iflytek_xinghuo.d.ts", + "chat_models/iflytek_xinghuo/web.cjs", + "chat_models/iflytek_xinghuo/web.js", + "chat_models/iflytek_xinghuo/web.d.ts", + "chat_models/llama_cpp.cjs", + "chat_models/llama_cpp.js", + "chat_models/llama_cpp.d.ts", + "chat_models/minimax.cjs", + "chat_models/minimax.js", + "chat_models/minimax.d.ts", + "chat_models/ollama.cjs", + "chat_models/ollama.js", + "chat_models/ollama.d.ts", + "chat_models/portkey.cjs", + "chat_models/portkey.js", + "chat_models/portkey.d.ts", + "chat_models/yandex.cjs", + "chat_models/yandex.js", + "chat_models/yandex.d.ts", + "callbacks/handlers/llmonitor.cjs", + "callbacks/handlers/llmonitor.js", + "callbacks/handlers/llmonitor.d.ts", + "retrievers/amazon_kendra.cjs", + "retrievers/amazon_kendra.js", + "retrievers/amazon_kendra.d.ts", + "retrievers/chaindesk.cjs", + "retrievers/chaindesk.js", + "retrievers/chaindesk.d.ts", + "retrievers/databerry.cjs", + "retrievers/databerry.js", + "retrievers/databerry.d.ts", + "retrievers/metal.cjs", + "retrievers/metal.js", + "retrievers/metal.d.ts", + "retrievers/supabase.cjs", + "retrievers/supabase.js", + "retrievers/supabase.d.ts", + "retrievers/tavily_search_api.cjs", + "retrievers/tavily_search_api.js", + "retrievers/tavily_search_api.d.ts", + "retrievers/zep.cjs", + "retrievers/zep.js", + "retrievers/zep.d.ts", + "cache/cloudflare_kv.cjs", + "cache/cloudflare_kv.js", + "cache/cloudflare_kv.d.ts", + "cache/momento.cjs", + "cache/momento.js", + "cache/momento.d.ts", + "cache/upstash_redis.cjs", + "cache/upstash_redis.js", + "cache/upstash_redis.d.ts", + "graphs/neo4j_graph.cjs", + "graphs/neo4j_graph.js", + "graphs/neo4j_graph.d.ts", "index.cjs", "index.js", "index.d.ts" diff --git a/libs/langchain-community/scripts/check-tree-shaking.js b/libs/langchain-community/scripts/check-tree-shaking.js index 8073e3d5507b..851d0bbba8a4 100644 --- a/libs/langchain-community/scripts/check-tree-shaking.js +++ b/libs/langchain-community/scripts/check-tree-shaking.js @@ -27,6 +27,12 @@ export function listExternals() { ...Object.keys(packageJson.peerDependencies ?? {}), /node\:/, /@langchain\/core\//, + "convex", + "convex/server", + "convex/values", + "@rockset/client/dist/codegen/api.js", + "mysql2/promise", + "web-auth-library/google", ]; } diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index 01a4daeb25ce..9d1440a726f7 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -1,8 +1,6 @@ import * as fs from "fs"; import * as path from "path"; - -// .gitignore -const DEFAULT_GITIGNORE_PATHS = ["node_modules", "dist", ".yarn"]; +import { identifySecrets } from "./identify-secrets.js"; // This lists all the entrypoints for the library. Each key corresponds to an // importable path, eg. `import { AgentExecutor } from "langchain/agents"`. @@ -10,12 +8,196 @@ const DEFAULT_GITIGNORE_PATHS = ["node_modules", "dist", ".yarn"]; // This is used to generate the `exports` field in package.json. // Order is not important. const entrypoints = { - index: "index", + load: "load/index", + "load/serializable": "load/serializable", + "tools/aiplugin": "tools/aiplugin", + "tools/aws_sfn": "tools/aws_sfn", + "tools/bingserpapi": "tools/bingserpapi", + "tools/brave_search": "tools/brave_search", + "tools/connery": "tools/connery", + "tools/dadjokeapi": "tools/dadjokeapi", + "tools/dataforseo_api_search": "tools/dataforseo_api_search", + "tools/gmail": "tools/gmail/index", + "tools/google_custom_search": "tools/google_custom_search", + "tools/google_places": "tools/google_places", + "tools/ifttt": "tools/ifttt", + "tools/searchapi": "tools/searchapi", + "tools/searxng_search": "tools/searxng_search", + "tools/serpapi": "tools/serpapi", + "tools/serper": "tools/serper", + "tools/wikipedia_query_run": "tools/wikipedia_query_run", + "tools/wolframalpha": "tools/wolframalpha", + // embeddings + "embeddings/bedrock": "embeddings/bedrock", + "embeddings/cloudflare_workersai": "embeddings/cloudflare_workersai", + "embeddings/cohere": "embeddings/cohere", + "embeddings/googlepalm": "embeddings/googlepalm", + "embeddings/googlevertexai": "embeddings/googlevertexai", + "embeddings/gradient_ai": "embeddings/gradient_ai", + "embeddings/hf": "embeddings/hf", + "embeddings/hf_transformers": "embeddings/hf_transformers", + "embeddings/llama_cpp": "embeddings/llama_cpp", + "embeddings/minimax": "embeddings/minimax", + "embeddings/ollama": "embeddings/ollama", + "embeddings/tensorflow": "embeddings/tensorflow", + "embeddings/voyage": "embeddings/voyage", + // llms + "llms/ai21": "llms/ai21", + "llms/aleph_alpha": "llms/aleph_alpha", + "llms/bedrock": "llms/bedrock/index", + "llms/bedrock/web": "llms/bedrock/web", + "llms/cloudflare_workersai": "llms/cloudflare_workersai", + "llms/cohere": "llms/cohere", + "llms/fireworks": "llms/fireworks", + "llms/googlepalm": "llms/googlepalm", + "llms/googlevertexai": "llms/googlevertexai/index", + "llms/googlevertexai/web": "llms/googlevertexai/web", + "llms/gradient_ai": "llms/gradient_ai", + "llms/hf": "llms/hf", + "llms/llama_cpp": "llms/llama_cpp", + "llms/ollama": "llms/ollama", + "llms/portkey": "llms/portkey", + "llms/raycast": "llms/raycast", + "llms/replicate": "llms/replicate", + "llms/sagemaker_endpoint": "llms/sagemaker_endpoint", + "llms/watsonx_ai": "llms/watsonx_ai", + "llms/writer": "llms/writer", + "llms/yandex": "llms/yandex", + // vectorstores + "vectorstores/analyticdb": "vectorstores/analyticdb", + "vectorstores/cassandra": "vectorstores/cassandra", + "vectorstores/chroma": "vectorstores/chroma", + "vectorstores/clickhouse": "vectorstores/clickhouse", + "vectorstores/cloudflare_vectorize": "vectorstores/cloudflare_vectorize", + "vectorstores/convex": "vectorstores/convex", + "vectorstores/elasticsearch": "vectorstores/elasticsearch", + "vectorstores/lancedb": "vectorstores/lancedb", + "vectorstores/milvus": "vectorstores/milvus", + "vectorstores/myscale": "vectorstores/myscale", + "vectorstores/neo4j_vector": "vectorstores/neo4j_vector", + "vectorstores/opensearch": "vectorstores/opensearch", + "vectorstores/prisma": "vectorstores/prisma", + "vectorstores/qdrant": "vectorstores/qdrant", + "vectorstores/redis": "vectorstores/redis", + "vectorstores/rockset": "vectorstores/rockset", + "vectorstores/singlestore": "vectorstores/singlestore", + "vectorstores/tigris": "vectorstores/tigris", + "vectorstores/typeorm": "vectorstores/typeorm", + "vectorstores/typesense": "vectorstores/typesense", + "vectorstores/vectara": "vectorstores/vectara", + "vectorstores/vercel_postgres": "vectorstores/vercel_postgres", + "vectorstores/voy": "vectorstores/voy", + "vectorstores/xata": "vectorstores/xata", + // chat_models + "chat_models/baiduwenxin": "chat_models/baiduwenxin", + "chat_models/bedrock": "chat_models/bedrock/index", + "chat_models/bedrock/web": "chat_models/bedrock/web", + "chat_models/cloudflare_workersai": "chat_models/cloudflare_workersai", + "chat_models/fireworks": "chat_models/fireworks", + "chat_models/googlevertexai": "chat_models/googlevertexai/index", + "chat_models/googlevertexai/web": "chat_models/googlevertexai/web", + "chat_models/googlepalm": "chat_models/googlepalm", + "chat_models/iflytek_xinghuo": "chat_models/iflytek_xinghuo/index", + "chat_models/iflytek_xinghuo/web": "chat_models/iflytek_xinghuo/web", + "chat_models/llama_cpp": "chat_models/llama_cpp", + "chat_models/minimax": "chat_models/minimax", + "chat_models/ollama": "chat_models/ollama", + "chat_models/portkey": "chat_models/portkey", + "chat_models/yandex": "chat_models/yandex", + // callbacks + "callbacks/handlers/llmonitor": "callbacks/handlers/llmonitor", + // retrievers + "retrievers/amazon_kendra": "retrievers/amazon_kendra", + "retrievers/chaindesk": "retrievers/chaindesk", + "retrievers/databerry": "retrievers/databerry", + "retrievers/metal": "retrievers/metal", + "retrievers/supabase": "retrievers/supabase", + "retrievers/tavily_search_api": "retrievers/tavily_search_api", + "retrievers/zep": "retrievers/zep", + // cache + "cache/cloudflare_kv": "cache/cloudflare_kv", + "cache/momento": "cache/momento", + "cache/upstash_redis": "cache/upstash_redis", + // graphs + "graphs/neo4j_graph": "graphs/neo4j_graph", }; +// Entrypoints in this list will +// 1. Be excluded from the documentation +// 2. Be only available in Node.js environments (for backwards compatibility) +const deprecatedNodeOnly = []; + // Entrypoints in this list require an optional dependency to be installed. // Therefore they are not tested in the generated test-exports-* packages. -const requiresOptionalDependency = []; +const requiresOptionalDependency = [ + "tools/aws_sfn", + "tools/gmail", + "callbacks/handlers/llmonitor", + "embeddings/bedrock", + "embeddings/cloudflare_workersai", + "embeddings/cohere", + "embeddings/googlevertexai", + "embeddings/googlepalm", + "embeddings/tensorflow", + "embeddings/hf", + "embeddings/hf_transformers", + "embeddings/llama_cpp", + "embeddings/gradient_ai", + "llms/load", + "llms/cohere", + "llms/googlevertexai", + "llms/googlevertexai/web", + "llms/googlepalm", + "llms/gradient_ai", + "llms/hf", + "llms/raycast", + "llms/replicate", + "llms/sagemaker_endpoint", + "llms/watsonx_ai", + "llms/bedrock", + "llms/bedrock/web", + "llms/llama_cpp", + "llms/writer", + "llms/portkey", + "vectorstores/analyticdb", + "vectorstores/cassandra", + "vectorstores/chroma", + "vectorstores/clickhouse", + "vectorstores/cloudflare_vectorize", + "vectorstores/convex", + "vectorstores/elasticsearch", + "vectorstores/lancedb", + "vectorstores/milvus", + "vectorstores/myscale", + "vectorstores/neo4j_vector", + "vectorstores/opensearch", + "vectorstores/qdrant", + "vectorstores/redis", + "vectorstores/rockset", + "vectorstores/singlestore", + "vectorstores/tigris", + "vectorstores/typeorm", + "vectorstores/typesense", + "vectorstores/vercel_postgres", + "vectorstores/voy", + "chat_models/bedrock", + "chat_models/bedrock/web", + "chat_models/googlevertexai", + "chat_models/googlevertexai/web", + "chat_models/googlepalm", + "chat_models/llama_cpp", + "chat_models/portkey", + "chat_models/iflytek_xinghuo", + "chat_models/iflytek_xinghuo/web", + "retrievers/amazon_kendra", + "retrievers/supabase", + "retrievers/zep", + "retrievers/metal", + "cache/cloudflare_kv", + "cache/momento", + "cache/upstash_redis", + "graphs/neo4j_graph", +]; const updateJsonFile = (relativePath, updateFunction) => { const contents = fs.readFileSync(relativePath).toString(); @@ -59,7 +241,13 @@ const updateConfig = () => { require: `./${key}.cjs`, }; - return [key === "index" ? "." : `./${key}`, entryPoint]; + if (deprecatedNodeOnly.includes(key)) { + entryPoint = { + node: entryPoint, + }; + } + + return [`./${key}`, entryPoint]; }) ), { "./package.json": "./package.json" } @@ -74,10 +262,12 @@ const updateConfig = () => { }); // Update .gitignore - fs.writeFileSync( - "./.gitignore", - filenames.join("\n") + "\n" + DEFAULT_GITIGNORE_PATHS.join("\n") + "\n" - ); + fs.writeFileSync("./.gitignore", filenames.join("\n") + "\n"); + + // Update test-exports-*/entrypoints.js + const entrypointsToTest = Object.keys(entrypoints) + .filter((key) => !deprecatedNodeOnly.includes(key)) + .filter((key) => !requiresOptionalDependency.includes(key)); }; const cleanGenerated = () => { @@ -91,10 +281,97 @@ const cleanGenerated = () => { }); }; +// Tuple describing the auto-generated import map (used by langchain/load) +// [package name, import statement, import map path] +// This will not include entrypoints deprecated or requiring optional deps. +const importMap = [ + "langchain-community", + (k, p) => `export * as ${k.replace(/\//g, "__")} from "../${p}.js";`, + "src/load/import_map.ts", +]; + +const generateImportMap = () => { + // Generate import map + const entrypointsToInclude = Object.keys(entrypoints) + .filter((key) => key !== "load") + .filter((key) => !deprecatedNodeOnly.includes(key)) + .filter((key) => !requiresOptionalDependency.includes(key)); + const [pkg, importStatement, importMapPath] = importMap; + const contents = + entrypointsToInclude + .map((key) => importStatement(key, entrypoints[key])) + .join("\n") + "\n"; + fs.writeFileSync( + `../${pkg}/${importMapPath}`, + "// Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually.\n\n" + + contents + ); +}; + +const importTypes = [ + "langchain-community", + (k, p) => + ` "@langchain/community/${k}"?: + | typeof import("../${p}.js") + | Promise;`, + "src/load/import_type.d.ts", +]; + +const generateImportTypes = () => { + // Generate import types + const [pkg, importStatement, importTypesPath] = importTypes; + fs.writeFileSync( + `../${pkg}/${importTypesPath}`, + `// Auto-generated by \`scripts/create-entrypoints.js\`. Do not edit manually. + +export interface OptionalImportMap { +${Object.keys(entrypoints) + .filter((key) => !deprecatedNodeOnly.includes(key)) + .filter((key) => requiresOptionalDependency.includes(key)) + .map((key) => importStatement(key, entrypoints[key])) + .join("\n")} +} + +export interface SecretMap { +${[...identifySecrets()] + .sort() + .map((secret) => ` ${secret}?: string;`) + .join("\n")} +} +` + ); +}; + +const importConstants = [ + "langchain-community", + (k) => ` "@langchain/community/${k}"`, + "src/load/import_constants.ts", +]; + +const generateImportConstants = () => { + // Generate import constants + const entrypointsToInclude = Object.keys(entrypoints) + .filter((key) => !deprecatedNodeOnly.includes(key)) + .filter((key) => requiresOptionalDependency.includes(key)); + const [pkg, importStatement, importConstantsPath] = importConstants; + const contents = + entrypointsToInclude + .map((key) => importStatement(key, entrypoints[key])) + .join(",\n") + ",\n];\n"; + fs.writeFileSync( + `../${pkg}/${importConstantsPath}`, + "// Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually.\n\nexport const optionalImportEntrypoints = [\n" + + contents + ); +}; + const command = process.argv[2]; if (command === "pre") { cleanGenerated(); + generateImportMap(); + generateImportTypes(); + generateImportConstants(); } else { updateConfig(); } diff --git a/libs/langchain-community/src/cache/cloudflare_kv.ts b/libs/langchain-community/src/cache/cloudflare_kv.ts new file mode 100644 index 000000000000..e03013b0b992 --- /dev/null +++ b/libs/langchain-community/src/cache/cloudflare_kv.ts @@ -0,0 +1,76 @@ +import type { KVNamespace } from "@cloudflare/workers-types"; + +import { BaseCache, + getCacheKey, + serializeGeneration, + deserializeStoredGeneration, } from "@langchain/core/caches"; +import { Generation } from "@langchain/core/outputs"; + +/** + * Represents a specific implementation of a caching mechanism using Cloudflare KV + * as the underlying storage system. It extends the `BaseCache` class and + * overrides its methods to provide the Cloudflare KV-specific logic. + * @example + * ```typescript + * // Example of using OpenAI with Cloudflare KV as cache in a Cloudflare Worker + * const cache = new CloudflareKVCache(env.KV_NAMESPACE); + * const model = new ChatAnthropic({ + * cache, + * }); + * const response = await model.invoke("How are you today?"); + * return new Response(JSON.stringify(response), { + * headers: { "content-type": "application/json" }, + * }); + * + * ``` + */ +export class CloudflareKVCache extends BaseCache { + private binding: KVNamespace; + + constructor(binding: KVNamespace) { + super(); + this.binding = binding; + } + + /** + * Retrieves data from the cache. It constructs a cache key from the given + * `prompt` and `llmKey`, and retrieves the corresponding value from the + * Cloudflare KV namespace. + * @param prompt The prompt used to construct the cache key. + * @param llmKey The LLM key used to construct the cache key. + * @returns An array of Generations if found, null otherwise. + */ + public async lookup(prompt: string, llmKey: string) { + let idx = 0; + let key = getCacheKey(prompt, llmKey, String(idx)); + let value = await this.binding.get(key); + const generations: Generation[] = []; + + while (value) { + generations.push(deserializeStoredGeneration(JSON.parse(value))); + idx += 1; + key = getCacheKey(prompt, llmKey, String(idx)); + value = await this.binding.get(key); + } + + return generations.length > 0 ? generations : null; + } + + /** + * Updates the cache with new data. It constructs a cache key from the + * given `prompt` and `llmKey`, and stores the `value` in the Cloudflare KV + * namespace. + * @param prompt The prompt used to construct the cache key. + * @param llmKey The LLM key used to construct the cache key. + * @param value The value to be stored in the cache. + */ + public async update(prompt: string, llmKey: string, value: Generation[]) { + for (let i = 0; i < value.length; i += 1) { + const key = getCacheKey(prompt, llmKey, String(i)); + await this.binding.put( + key, + JSON.stringify(serializeGeneration(value[i])) + ); + } + } +} diff --git a/libs/langchain-community/src/cache/momento.ts b/libs/langchain-community/src/cache/momento.ts new file mode 100644 index 000000000000..0877a022ef4d --- /dev/null +++ b/libs/langchain-community/src/cache/momento.ts @@ -0,0 +1,173 @@ +/* eslint-disable no-instanceof/no-instanceof */ +import { + ICacheClient, + CacheGet, + CacheSet, + InvalidArgumentError, +} from "@gomomento/sdk-core"; + +import { BaseCache, + deserializeStoredGeneration, + getCacheKey, + serializeGeneration, } from "@langchain/core/caches"; +import { Generation } from "@langchain/core/outputs"; + +import { ensureCacheExists } from "../util/momento.js"; + +/** + * The settings to instantiate the Momento standard cache. + */ +export interface MomentoCacheProps { + /** + * The Momento cache client. + */ + client: ICacheClient; + /** + * The name of the cache to use to store the data. + */ + cacheName: string; + /** + * The time to live for the cache items. If not specified, + * the cache client default is used. + */ + ttlSeconds?: number; + /** + * If true, ensure that the cache exists before returning. + * If false, the cache is not checked for existence. + * Defaults to true. + */ + ensureCacheExists?: true; +} + +/** + * A cache that uses Momento as the backing store. + * See https://gomomento.com. + * @example + * ```typescript + * const cache = new MomentoCache({ + * client: new CacheClient({ + * configuration: Configurations.Laptop.v1(), + * credentialProvider: CredentialProvider.fromEnvironmentVariable({ + * environmentVariableName: "MOMENTO_API_KEY", + * }), + * defaultTtlSeconds: 60 * 60 * 24, // Cache TTL set to 24 hours. + * }), + * cacheName: "langchain", + * }); + * // Initialize the OpenAI model with Momento cache for caching responses + * const model = new ChatOpenAI({ + * cache, + * }); + * await model.invoke("How are you today?"); + * const cachedValues = await cache.lookup("How are you today?", "llmKey"); + * ``` + */ +export class MomentoCache extends BaseCache { + private client: ICacheClient; + + private readonly cacheName: string; + + private readonly ttlSeconds?: number; + + private constructor(props: MomentoCacheProps) { + super(); + this.client = props.client; + this.cacheName = props.cacheName; + + this.validateTtlSeconds(props.ttlSeconds); + this.ttlSeconds = props.ttlSeconds; + } + + /** + * Create a new standard cache backed by Momento. + * + * @param {MomentoCacheProps} props The settings to instantiate the cache. + * @param {ICacheClient} props.client The Momento cache client. + * @param {string} props.cacheName The name of the cache to use to store the data. + * @param {number} props.ttlSeconds The time to live for the cache items. If not specified, + * the cache client default is used. + * @param {boolean} props.ensureCacheExists If true, ensure that the cache exists before returning. + * If false, the cache is not checked for existence. Defaults to true. + * @throws {@link InvalidArgumentError} if {@link props.ttlSeconds} is not strictly positive. + * @returns The Momento-backed cache. + */ + public static async fromProps( + props: MomentoCacheProps + ): Promise { + const instance = new MomentoCache(props); + if (props.ensureCacheExists || props.ensureCacheExists === undefined) { + await ensureCacheExists(props.client, props.cacheName); + } + return instance; + } + + /** + * Validate the user-specified TTL, if provided, is strictly positive. + * @param ttlSeconds The TTL to validate. + */ + private validateTtlSeconds(ttlSeconds?: number): void { + if (ttlSeconds !== undefined && ttlSeconds <= 0) { + throw new InvalidArgumentError("ttlSeconds must be positive."); + } + } + + /** + * Lookup LLM generations in cache by prompt and associated LLM key. + * @param prompt The prompt to lookup. + * @param llmKey The LLM key to lookup. + * @returns The generations associated with the prompt and LLM key, or null if not found. + */ + public async lookup( + prompt: string, + llmKey: string + ): Promise { + const key = getCacheKey(prompt, llmKey); + const getResponse = await this.client.get(this.cacheName, key); + + if (getResponse instanceof CacheGet.Hit) { + const value = getResponse.valueString(); + const parsedValue = JSON.parse(value); + if (!Array.isArray(parsedValue)) { + return null; + } + return JSON.parse(value).map(deserializeStoredGeneration); + } else if (getResponse instanceof CacheGet.Miss) { + return null; + } else if (getResponse instanceof CacheGet.Error) { + throw getResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${getResponse.toString()}`); + } + } + + /** + * Update the cache with the given generations. + * + * Note this overwrites any existing generations for the given prompt and LLM key. + * + * @param prompt The prompt to update. + * @param llmKey The LLM key to update. + * @param value The generations to store. + */ + public async update( + prompt: string, + llmKey: string, + value: Generation[] + ): Promise { + const key = getCacheKey(prompt, llmKey); + const setResponse = await this.client.set( + this.cacheName, + key, + JSON.stringify(value.map(serializeGeneration)), + { ttl: this.ttlSeconds } + ); + + if (setResponse instanceof CacheSet.Success) { + // pass + } else if (setResponse instanceof CacheSet.Error) { + throw setResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${setResponse.toString()}`); + } + } +} diff --git a/libs/langchain-community/src/cache/tests/momento.test.ts b/libs/langchain-community/src/cache/tests/momento.test.ts new file mode 100644 index 000000000000..9ba02464cc8b --- /dev/null +++ b/libs/langchain-community/src/cache/tests/momento.test.ts @@ -0,0 +1,329 @@ +import { expect } from "@jest/globals"; + +import { + ICacheClient, + IMomentoCache, + CacheDelete, + CacheGet, + CacheIncrement, + CacheKeyExists, + CacheKeysExist, + CacheSet, + CacheSetIfNotExists, + CacheSetFetch, + CacheSetAddElements, + CacheSetAddElement, + CacheSetRemoveElements, + CacheSetRemoveElement, + CacheListFetch, + CacheListLength, + CacheListPushFront, + CacheListPushBack, + CacheListConcatenateBack, + CacheListConcatenateFront, + CacheListPopBack, + CacheListPopFront, + CacheListRemoveValue, + CacheListRetain, + CacheDictionarySetField, + CacheDictionarySetFields, + CacheDictionaryGetField, + CacheDictionaryGetFields, + CacheDictionaryFetch, + CacheDictionaryLength, + CacheDictionaryIncrement, + CacheDictionaryRemoveField, + CacheDictionaryRemoveFields, + CacheSortedSetFetch, + CacheSortedSetPutElement, + CacheSortedSetPutElements, + CacheSortedSetGetRank, + CacheSortedSetGetScore, + CacheSortedSetGetScores, + CacheSortedSetLength, + CacheSortedSetLengthByScore, + CacheSortedSetIncrementScore, + CacheSortedSetRemoveElement, + CacheItemGetType, + CacheItemGetTtl, + CreateCache, + ListCaches, + DeleteCache, + CacheFlush, + CacheUpdateTtl, + CacheIncreaseTtl, + CacheDecreaseTtl, +} from "@gomomento/sdk-core"; +import { Generation } from "@langchain/core/outputs"; + +import { MomentoCache } from "../momento.js"; + +class MockClient implements ICacheClient { + private _cache: Map; + + constructor() { + this._cache = new Map(); + } + + cache(): IMomentoCache { + throw new Error("Method not implemented."); + } + + public async get(_: string, key: string): Promise { + if (this._cache.has(key)) { + return new CacheGet.Hit(new TextEncoder().encode(this._cache.get(key))); + } else { + return new CacheGet.Miss(); + } + } + + public async set( + _: string, + key: string, + value: string + ): Promise { + this._cache.set(key, value); + return new CacheSet.Success(); + } + + public async createCache(): Promise { + return new CreateCache.Success(); + } + + deleteCache(): Promise { + throw new Error("Method not implemented."); + } + + listCaches(): Promise { + throw new Error("Method not implemented."); + } + + flushCache(): Promise { + throw new Error("Method not implemented."); + } + + ping(): Promise { + throw new Error("Method not implemented."); + } + + delete(): Promise { + throw new Error("Method not implemented."); + } + + increment(): Promise { + throw new Error("Method not implemented."); + } + + keyExists(): Promise { + throw new Error("Method not implemented."); + } + + keysExist(): Promise { + throw new Error("Method not implemented."); + } + + setIfNotExists(): Promise { + throw new Error("Method not implemented."); + } + + setFetch(): Promise { + throw new Error("Method not implemented."); + } + + setAddElement(): Promise { + throw new Error("Method not implemented."); + } + + setAddElements(): Promise { + throw new Error("Method not implemented."); + } + + setRemoveElement(): Promise { + throw new Error("Method not implemented."); + } + + setRemoveElements(): Promise { + throw new Error("Method not implemented."); + } + + listFetch(): Promise { + throw new Error("Method not implemented."); + } + + listLength(): Promise { + throw new Error("Method not implemented."); + } + + listPushFront(): Promise { + throw new Error("Method not implemented."); + } + + listPushBack(): Promise { + throw new Error("Method not implemented."); + } + + listConcatenateBack(): Promise { + throw new Error("Method not implemented."); + } + + listConcatenateFront(): Promise { + throw new Error("Method not implemented."); + } + + listPopBack(): Promise { + throw new Error("Method not implemented."); + } + + listPopFront(): Promise { + throw new Error("Method not implemented."); + } + + listRemoveValue(): Promise { + throw new Error("Method not implemented."); + } + + listRetain(): Promise { + throw new Error("Method not implemented."); + } + + dictionarySetField(): Promise { + throw new Error("Method not implemented."); + } + + dictionarySetFields(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryGetField(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryGetFields(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryFetch(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryIncrement(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryLength(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryRemoveField(): Promise { + throw new Error("Method not implemented."); + } + + dictionaryRemoveFields(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetFetchByRank(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetFetchByScore(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetPutElement(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetPutElements(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetGetRank(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetGetScore(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetGetScores(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetIncrementScore(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetLength(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetLengthByScore(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetRemoveElement(): Promise { + throw new Error("Method not implemented."); + } + + sortedSetRemoveElements(): Promise { + throw new Error("Method not implemented."); + } + + itemGetType(): Promise { + throw new Error("Method not implemented."); + } + + itemGetTtl(): Promise { + throw new Error("Method not implemented."); + } + + updateTtl(): Promise { + throw new Error("Method not implemented."); + } + + increaseTtl(): Promise { + throw new Error("Method not implemented."); + } + + decreaseTtl(): Promise { + throw new Error("Method not implemented."); + } +} + +describe("MomentoCache", () => { + it("should return null on a cache miss", async () => { + const client = new MockClient(); + const cache = await MomentoCache.fromProps({ + client, + cacheName: "test-cache", + }); + expect(await cache.lookup("prompt", "llm-key")).toBeNull(); + }); + + it("should get a stored value", async () => { + const client = new MockClient(); + const cache = await MomentoCache.fromProps({ + client, + cacheName: "test-cache", + }); + const generations: Generation[] = [{ text: "foo" }]; + await cache.update("prompt", "llm-key", generations); + expect(await cache.lookup("prompt", "llm-key")).toStrictEqual(generations); + }); + + it("should work with multiple generations", async () => { + const client = new MockClient(); + const cache = await MomentoCache.fromProps({ + client, + cacheName: "test-cache", + }); + const generations: Generation[] = [ + { text: "foo" }, + { text: "bar" }, + { text: "baz" }, + ]; + await cache.update("prompt", "llm-key", generations); + expect(await cache.lookup("prompt", "llm-key")).toStrictEqual(generations); + }); +}); diff --git a/libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts b/libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts new file mode 100644 index 000000000000..b6ba8628cc21 --- /dev/null +++ b/libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts @@ -0,0 +1,38 @@ +/* eslint-disable no-process-env */ +import { ChatOpenAI } from "@langchain/openai"; +import { UpstashRedisCache } from "../upstash_redis.js"; + +/** + * This test is a result of the `lookup` method trying to parse an + * incorrectly typed value Before it was being typed as a string, + * whereas in reality it was a JSON object. + */ +test.skip("UpstashRedisCache does not parse non string cached values", async () => { + if ( + !process.env.UPSTASH_REDIS_REST_URL || + !process.env.UPSTASH_REDIS_REST_TOKEN || + !process.env.OPENAI_API_KEY + ) { + throw new Error( + "Missing Upstash Redis REST URL // REST TOKEN or OpenAI API key" + ); + } + const upstashRedisCache = new UpstashRedisCache({ + config: { + url: process.env.UPSTASH_REDIS_REST_URL, + token: process.env.UPSTASH_REDIS_REST_TOKEN, + }, + }); + + const chat = new ChatOpenAI({ + temperature: 0, + cache: upstashRedisCache, + maxTokens: 10, + }); + + const prompt = "is the sky blue"; + const result1 = await chat.predict(prompt); + const result2 = await chat.predict(prompt); + + expect(result1).toEqual(result2); +}); diff --git a/libs/langchain-community/src/cache/tests/upstash_redis.test.ts b/libs/langchain-community/src/cache/tests/upstash_redis.test.ts new file mode 100644 index 000000000000..fc8cc5cc0f92 --- /dev/null +++ b/libs/langchain-community/src/cache/tests/upstash_redis.test.ts @@ -0,0 +1,21 @@ +import { test, expect, jest } from "@jest/globals"; +import { insecureHash } from "@langchain/core/utils/hash"; +import { StoredGeneration } from "@langchain/core/messages"; + +import { UpstashRedisCache } from "../upstash_redis.js"; + +const sha1 = (str: string) => insecureHash(str); + +test("UpstashRedisCache", async () => { + const redis = { + get: jest.fn(async (key: string): Promise => { + if (key === sha1("foo_bar_0")) { + return { text: "baz" }; + } + return null; + }), + }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const cache = new UpstashRedisCache({ client: redis as any }); + expect(await cache.lookup("foo", "bar")).toEqual([{ text: "baz" }]); +}); diff --git a/libs/langchain-community/src/cache/upstash_redis.ts b/libs/langchain-community/src/cache/upstash_redis.ts new file mode 100644 index 000000000000..1cf89e82c826 --- /dev/null +++ b/libs/langchain-community/src/cache/upstash_redis.ts @@ -0,0 +1,93 @@ +import { Redis, type RedisConfigNodejs } from "@upstash/redis"; + +import { Generation } from "@langchain/core/outputs"; +import { + BaseCache, + deserializeStoredGeneration, + getCacheKey, + serializeGeneration, +} from "@langchain/core/caches"; +import { StoredGeneration } from "@langchain/core/messages"; + +export type UpstashRedisCacheProps = { + /** + * The config to use to instantiate an Upstash Redis client. + */ + config?: RedisConfigNodejs; + /** + * An existing Upstash Redis client. + */ + client?: Redis; +}; + +/** + * A cache that uses Upstash as the backing store. + * See https://docs.upstash.com/redis. + * @example + * ```typescript + * const cache = new UpstashRedisCache({ + * config: { + * url: "UPSTASH_REDIS_REST_URL", + * token: "UPSTASH_REDIS_REST_TOKEN", + * }, + * }); + * // Initialize the OpenAI model with Upstash Redis cache for caching responses + * const model = new ChatOpenAI({ + * cache, + * }); + * await model.invoke("How are you today?"); + * const cachedValues = await cache.lookup("How are you today?", "llmKey"); + * ``` + */ +export class UpstashRedisCache extends BaseCache { + private redisClient: Redis; + + constructor(props: UpstashRedisCacheProps) { + super(); + const { config, client } = props; + + if (client) { + this.redisClient = client; + } else if (config) { + this.redisClient = new Redis(config); + } else { + throw new Error( + `Upstash Redis caches require either a config object or a pre-configured client.` + ); + } + } + + /** + * Lookup LLM generations in cache by prompt and associated LLM key. + */ + public async lookup(prompt: string, llmKey: string) { + let idx = 0; + let key = getCacheKey(prompt, llmKey, String(idx)); + let value = await this.redisClient.get(key); + const generations: Generation[] = []; + + while (value) { + generations.push(deserializeStoredGeneration(value)); + idx += 1; + key = getCacheKey(prompt, llmKey, String(idx)); + value = await this.redisClient.get(key); + } + + return generations.length > 0 ? generations : null; + } + + /** + * Update the cache with the given generations. + * + * Note this overwrites any existing generations for the given prompt and LLM key. + */ + public async update(prompt: string, llmKey: string, value: Generation[]) { + for (let i = 0; i < value.length; i += 1) { + const key = getCacheKey(prompt, llmKey, String(i)); + await this.redisClient.set( + key, + JSON.stringify(serializeGeneration(value[i])) + ); + } + } +} diff --git a/libs/langchain-community/src/callbacks/handlers/llmonitor.ts b/libs/langchain-community/src/callbacks/handlers/llmonitor.ts new file mode 100644 index 000000000000..f87ecea05599 --- /dev/null +++ b/libs/langchain-community/src/callbacks/handlers/llmonitor.ts @@ -0,0 +1,338 @@ +import monitor from "llmonitor"; +import { LLMonitorOptions, ChatMessage, cJSON } from "llmonitor/types"; +import { BaseRun, RunUpdate as BaseRunUpdate, KVMap } from "langsmith/schemas"; + +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { + BaseMessage, +} from "@langchain/core/messages"; +import { ChainValues } from "@langchain/core/utils/types"; +import { LLMResult, Generation } from "@langchain/core/outputs"; +import { BaseCallbackHandler, BaseCallbackHandlerInput } from "@langchain/core/callbacks/base"; + +import { Serialized } from "../../load/serializable.js"; + + +type Role = "user" | "ai" | "system" | "function" | "tool"; + +// Langchain Helpers +// Input can be either a single message, an array of message, or an array of array of messages (batch requests) + +const parseRole = (id: string[]): Role => { + const roleHint = id[id.length - 1]; + + if (roleHint.includes("Human")) return "user"; + if (roleHint.includes("System")) return "system"; + if (roleHint.includes("AI")) return "ai"; + if (roleHint.includes("Function")) return "function"; + if (roleHint.includes("Tool")) return "tool"; + + return "ai"; +}; + +type Message = BaseMessage | Generation | string; + +type OutputMessage = ChatMessage | string; + +const PARAMS_TO_CAPTURE = [ + "stop", + "stop_sequences", + "function_call", + "functions", + "tools", + "tool_choice", + "response_format", +]; + +export const convertToLLMonitorMessages = ( + input: Message | Message[] | Message[][] +): OutputMessage | OutputMessage[] | OutputMessage[][] => { + const parseMessage = (raw: Message): OutputMessage => { + if (typeof raw === "string") return raw; + // sometimes the message is nested in a "message" property + if ("message" in raw) return parseMessage(raw.message as Message); + + // Serialize + const message = JSON.parse(JSON.stringify(raw)); + + try { + // "id" contains an array describing the constructor, with last item actual schema type + const role = parseRole(message.id); + + const obj = message.kwargs; + const text = message.text ?? obj.content; + + return { + role, + text, + ...(obj.additional_kwargs ?? {}), + }; + } catch (e) { + // if parsing fails, return the original message + return message.text ?? message; + } + }; + + if (Array.isArray(input)) { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore Confuses the compiler + return input.length === 1 + ? convertToLLMonitorMessages(input[0]) + : input.map(convertToLLMonitorMessages); + } + return parseMessage(input); +}; + +const parseInput = (rawInput: Record) => { + if (!rawInput) return null; + + const { input, inputs, question } = rawInput; + + if (input) return input; + if (inputs) return inputs; + if (question) return question; + + return rawInput; +}; + +const parseOutput = (rawOutput: Record) => { + if (!rawOutput) return null; + + const { text, output, answer, result } = rawOutput; + + if (text) return text; + if (answer) return answer; + if (output) return output; + if (result) return result; + + return rawOutput; +}; + +const parseExtraAndName = ( + llm: Serialized, + extraParams?: KVMap, + metadata?: KVMap +) => { + const params = { + ...(extraParams?.invocation_params ?? {}), + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore this is a valid property + ...(llm?.kwargs ?? {}), + ...(metadata || {}), + }; + + const { model, model_name, modelName, model_id, userId, userProps, ...rest } = + params; + + const name = model || modelName || model_name || model_id || llm.id.at(-1); + + // Filter rest to only include params we want to capture + const extra = Object.fromEntries( + Object.entries(rest).filter( + ([key]) => + PARAMS_TO_CAPTURE.includes(key) || + ["string", "number", "boolean"].includes(typeof rest[key]) + ) + ) as cJSON; + + return { name, extra, userId, userProps }; +}; + +export interface Run extends BaseRun { + id: string; + child_runs: this[]; + child_execution_order: number; +} + +export interface RunUpdate extends BaseRunUpdate { + events: BaseRun["events"]; +} + +export interface LLMonitorHandlerFields + extends BaseCallbackHandlerInput, + LLMonitorOptions {} + +export class LLMonitorHandler + extends BaseCallbackHandler + implements LLMonitorHandlerFields +{ + name = "llmonitor_handler"; + + monitor: typeof monitor; + + constructor(fields: LLMonitorHandlerFields = {}) { + super(fields); + + this.monitor = monitor; + + if (fields) { + const { appId, apiUrl, verbose } = fields; + + this.monitor.init({ + verbose, + appId: appId ?? getEnvironmentVariable("LLMONITOR_APP_ID"), + apiUrl: apiUrl ?? getEnvironmentVariable("LLMONITOR_API_URL"), + }); + } + } + + async handleLLMStart( + llm: Serialized, + prompts: string[], + runId: string, + parentRunId?: string, + extraParams?: KVMap, + tags?: string[], + metadata?: KVMap + ): Promise { + const { name, extra, userId, userProps } = parseExtraAndName( + llm, + extraParams, + metadata + ); + + await this.monitor.trackEvent("llm", "start", { + runId, + parentRunId, + name, + input: convertToLLMonitorMessages(prompts), + extra, + userId, + userProps, + tags, + runtime: "langchain-js", + }); + } + + async handleChatModelStart( + llm: Serialized, + messages: BaseMessage[][], + runId: string, + parentRunId?: string, + extraParams?: KVMap, + tags?: string[], + metadata?: KVMap + ): Promise { + const { name, extra, userId, userProps } = parseExtraAndName( + llm, + extraParams, + metadata + ); + + await this.monitor.trackEvent("llm", "start", { + runId, + parentRunId, + name, + input: convertToLLMonitorMessages(messages), + extra, + userId, + userProps, + tags, + runtime: "langchain-js", + }); + } + + async handleLLMEnd(output: LLMResult, runId: string): Promise { + const { generations, llmOutput } = output; + + await this.monitor.trackEvent("llm", "end", { + runId, + output: convertToLLMonitorMessages(generations), + tokensUsage: { + completion: llmOutput?.tokenUsage?.completionTokens, + prompt: llmOutput?.tokenUsage?.promptTokens, + }, + }); + } + + async handleLLMError(error: Error, runId: string): Promise { + await this.monitor.trackEvent("llm", "error", { + runId, + error, + }); + } + + async handleChainStart( + chain: Serialized, + inputs: ChainValues, + runId: string, + parentRunId?: string, + tags?: string[], + metadata?: KVMap + ): Promise { + const { agentName, userId, userProps, ...rest } = metadata || {}; + + // allow the user to specify an agent name + const name = agentName || chain.id.at(-1); + + // Attempt to automatically detect if this is an agent or chain + const runType = + agentName || ["AgentExecutor", "PlanAndExecute"].includes(name) + ? "agent" + : "chain"; + + await this.monitor.trackEvent(runType, "start", { + runId, + parentRunId, + name, + userId, + userProps, + input: parseInput(inputs) as cJSON, + extra: rest, + tags, + runtime: "langchain-js", + }); + } + + async handleChainEnd(outputs: ChainValues, runId: string): Promise { + await this.monitor.trackEvent("chain", "end", { + runId, + output: parseOutput(outputs) as cJSON, + }); + } + + async handleChainError(error: Error, runId: string): Promise { + await this.monitor.trackEvent("chain", "error", { + runId, + error, + }); + } + + async handleToolStart( + tool: Serialized, + input: string, + runId: string, + parentRunId?: string, + tags?: string[], + metadata?: KVMap + ): Promise { + const { toolName, userId, userProps, ...rest } = metadata || {}; + const name = toolName || tool.id.at(-1); + + await this.monitor.trackEvent("tool", "start", { + runId, + parentRunId, + name, + userId, + userProps, + input, + extra: rest, + tags, + runtime: "langchain-js", + }); + } + + async handleToolEnd(output: string, runId: string): Promise { + await this.monitor.trackEvent("tool", "end", { + runId, + output, + }); + } + + async handleToolError(error: Error, runId: string): Promise { + await this.monitor.trackEvent("tool", "error", { + runId, + error, + }); + } +} diff --git a/libs/langchain-community/src/chat_models.ts b/libs/langchain-community/src/chat_models.ts deleted file mode 100644 index 81683647a0bd..000000000000 --- a/libs/langchain-community/src/chat_models.ts +++ /dev/null @@ -1,88 +0,0 @@ -import { type BaseMessage } from "@langchain/core/messages"; -import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; - -import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { - type BaseChatModelParams, - SimpleChatModel, -} from "@langchain/core/language_models/chat_models"; - -// Uncomment if implementing streaming - -// import { -// ChatGenerationChunk, -// } from "@langchain/core/outputs"; -// import { -// AIMessageChunk, -// } from "@langchain/core/messages"; - -/** - * Input to chat model class. - */ -export interface ChatIntegrationInput extends BaseChatModelParams {} - -/** - * Integration with a chat model. - */ -export class ChatIntegration< - CallOptions extends BaseLanguageModelCallOptions = BaseLanguageModelCallOptions - > - extends SimpleChatModel - implements ChatIntegrationInput -{ - // Used for tracing, replace with the same name as your class - static lc_name() { - return "ChatIntegration"; - } - - lc_serializable = true; - - constructor(fields?: ChatIntegrationInput) { - super(fields ?? {}); - } - - // Replace - _llmType() { - return "chat_integration"; - } - - /** - * For some given input messages and options, return a string output. - */ - _call( - _messages: BaseMessage[], - _options: this["ParsedCallOptions"], - _runManager?: CallbackManagerForLLMRun - ): Promise { - throw new Error("Not implemented."); - } - - /** - * Implement to support streaming. - * Should yield chunks iteratively. - */ - // async *_streamResponseChunks( - // messages: BaseMessage[], - // options: this["ParsedCallOptions"], - // runManager?: CallbackManagerForLLMRun - // ): AsyncGenerator { - // // All models have a built in `this.caller` property for retries - // const stream = await this.caller.call(async () => - // createStreamMethod() - // ); - // for await (const chunk of stream) { - // if (!chunk.done) { - // yield new ChatGenerationChunk({ - // text: chunk.response, - // message: new AIMessageChunk({ content: chunk.response }), - // }); - // await runManager?.handleLLMNewToken(chunk.response ?? ""); - // } - // } - // } - - /** @ignore */ - _combineLLMOutput() { - return []; - } -} diff --git a/libs/langchain-community/src/chat_models/anthropic.ts b/libs/langchain-community/src/chat_models/anthropic.ts deleted file mode 100644 index a06de018ee42..000000000000 --- a/libs/langchain-community/src/chat_models/anthropic.ts +++ /dev/null @@ -1,451 +0,0 @@ -import { - Anthropic, - AI_PROMPT, - HUMAN_PROMPT, - ClientOptions, -} from "@anthropic-ai/sdk"; -import type { CompletionCreateParams } from "@anthropic-ai/sdk/resources/completions"; -import type { Stream } from "@anthropic-ai/sdk/streaming"; - -import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { - AIMessage, - AIMessageChunk, - type BaseMessage, - ChatMessage, -} from "@langchain/core/messages"; -import { - type ChatGeneration, - ChatGenerationChunk, - type ChatResult, -} from "@langchain/core/outputs"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - BaseChatModel, - type BaseChatModelParams, -} from "@langchain/core/language_models/chat_models"; -import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; - -export { AI_PROMPT, HUMAN_PROMPT }; - -/** - * Extracts the custom role of a generic chat message. - * @param message The chat message from which to extract the custom role. - * @returns The custom role of the chat message. - */ -function extractGenericMessageCustomRole(message: ChatMessage) { - if ( - message.role !== AI_PROMPT && - message.role !== HUMAN_PROMPT && - message.role !== "" - ) { - console.warn(`Unknown message role: ${message.role}`); - } - - return message.role; -} - -/** - * Gets the Anthropic prompt from a base message. - * @param message The base message from which to get the Anthropic prompt. - * @returns The Anthropic prompt from the base message. - */ -function getAnthropicPromptFromMessage(message: BaseMessage): string { - const type = message._getType(); - switch (type) { - case "ai": - return AI_PROMPT; - case "human": - return HUMAN_PROMPT; - case "system": - return ""; - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -export const DEFAULT_STOP_SEQUENCES = [HUMAN_PROMPT]; - -/** - * Input to AnthropicChat class. - */ -export interface AnthropicInput { - /** Amount of randomness injected into the response. Ranges - * from 0 to 1. Use temp closer to 0 for analytical / - * multiple choice, and temp closer to 1 for creative - * and generative tasks. - */ - temperature?: number; - - /** Only sample from the top K options for each subsequent - * token. Used to remove "long tail" low probability - * responses. Defaults to -1, which disables it. - */ - topK?: number; - - /** Does nucleus sampling, in which we compute the - * cumulative distribution over all the options for each - * subsequent token in decreasing probability order and - * cut it off once it reaches a particular probability - * specified by top_p. Defaults to -1, which disables it. - * Note that you should either alter temperature or top_p, - * but not both. - */ - topP?: number; - - /** A maximum number of tokens to generate before stopping. */ - maxTokensToSample: number; - - /** A list of strings upon which to stop generating. - * You probably want `["\n\nHuman:"]`, as that's the cue for - * the next turn in the dialog agent. - */ - stopSequences?: string[]; - - /** Whether to stream the results or not */ - streaming?: boolean; - - /** Anthropic API key */ - anthropicApiKey?: string; - - /** Anthropic API URL */ - anthropicApiUrl?: string; - - /** Model name to use */ - modelName: string; - - /** Overridable Anthropic ClientOptions */ - clientOptions: ClientOptions; - - /** Holds any additional parameters that are valid to pass to {@link - * https://console.anthropic.com/docs/api/reference | - * `anthropic.complete`} that are not explicitly specified on this class. - */ - invocationKwargs?: Kwargs; -} - -/** - * A type representing additional parameters that can be passed to the - * Anthropic API. - */ -// eslint-disable-next-line @typescript-eslint/no-explicit-any -type Kwargs = Record; - -/** - * Wrapper around Anthropic large language models. - * - * To use you should have the `@anthropic-ai/sdk` package installed, with the - * `ANTHROPIC_API_KEY` environment variable set. - * - * @remarks - * Any parameters that are valid to be passed to {@link - * https://console.anthropic.com/docs/api/reference | - * `anthropic.complete`} can be passed through {@link invocationKwargs}, - * even if not explicitly available on this class. - * @example - * ```typescript - * const model = new ChatAnthropic({ - * temperature: 0.9, - * anthropicApiKey: 'YOUR-API-KEY', - * }); - * const res = await model.invoke({ input: 'Hello!' }); - * console.log(res); - * ``` - */ -export class ChatAnthropic< - CallOptions extends BaseLanguageModelCallOptions = BaseLanguageModelCallOptions - > - extends BaseChatModel - implements AnthropicInput -{ - static lc_name() { - return "ChatAnthropic"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - anthropicApiKey: "ANTHROPIC_API_KEY", - }; - } - - get lc_aliases(): Record { - return { - modelName: "model", - }; - } - - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; - - lc_serializable = true; - - anthropicApiKey?: string; - - apiUrl?: string; - - temperature = 1; - - topK = -1; - - topP = -1; - - maxTokensToSample = 2048; - - modelName = "claude-2"; - - invocationKwargs?: Kwargs; - - stopSequences?: string[]; - - streaming = false; - - clientOptions: ClientOptions; - - // Used for non-streaming requests - protected batchClient: Anthropic; - - // Used for streaming requests - protected streamingClient: Anthropic; - - constructor(fields?: Partial & BaseChatModelParams) { - super(fields ?? {}); - - this.anthropicApiKey = - fields?.anthropicApiKey ?? getEnvironmentVariable("ANTHROPIC_API_KEY"); - if (!this.anthropicApiKey) { - throw new Error("Anthropic API key not found"); - } - - // Support overriding the default API URL (i.e., https://api.anthropic.com) - this.apiUrl = fields?.anthropicApiUrl; - - this.modelName = fields?.modelName ?? this.modelName; - this.invocationKwargs = fields?.invocationKwargs ?? {}; - - this.temperature = fields?.temperature ?? this.temperature; - this.topK = fields?.topK ?? this.topK; - this.topP = fields?.topP ?? this.topP; - this.maxTokensToSample = - fields?.maxTokensToSample ?? this.maxTokensToSample; - this.stopSequences = fields?.stopSequences ?? this.stopSequences; - - this.streaming = fields?.streaming ?? false; - this.clientOptions = fields?.clientOptions ?? {}; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams( - options?: this["ParsedCallOptions"] - ): Omit & Kwargs { - return { - model: this.modelName, - temperature: this.temperature, - top_k: this.topK, - top_p: this.topP, - stop_sequences: - options?.stop?.concat(DEFAULT_STOP_SEQUENCES) ?? - this.stopSequences ?? - DEFAULT_STOP_SEQUENCES, - max_tokens_to_sample: this.maxTokensToSample, - stream: this.streaming, - ...this.invocationKwargs, - }; - } - - /** @ignore */ - _identifyingParams() { - return { - model_name: this.modelName, - ...this.invocationParams(), - }; - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams() { - return { - model_name: this.modelName, - ...this.invocationParams(), - }; - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const params = this.invocationParams(options); - const stream = await this.createStreamWithRetry({ - ...params, - prompt: this.formatMessagesAsPrompt(messages), - }); - let modelSent = false; - let stopReasonSent = false; - for await (const data of stream) { - if (options.signal?.aborted) { - stream.controller.abort(); - throw new Error("AbortError: User aborted the request."); - } - const additional_kwargs: Record = {}; - if (data.model && !modelSent) { - additional_kwargs.model = data.model; - modelSent = true; - } else if (data.stop_reason && !stopReasonSent) { - additional_kwargs.stop_reason = data.stop_reason; - stopReasonSent = true; - } - const delta = data.completion ?? ""; - yield new ChatGenerationChunk({ - message: new AIMessageChunk({ - content: delta, - additional_kwargs, - }), - text: delta, - }); - await runManager?.handleLLMNewToken(delta); - if (data.stop_reason) { - break; - } - } - } - - /** - * Formats messages as a prompt for the model. - * @param messages The base messages to format as a prompt. - * @returns The formatted prompt. - */ - protected formatMessagesAsPrompt(messages: BaseMessage[]): string { - return ( - messages - .map((message) => { - const messagePrompt = getAnthropicPromptFromMessage(message); - return `${messagePrompt} ${message.content}`; - }) - .join("") + AI_PROMPT - ); - } - - /** @ignore */ - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - if (this.stopSequences && options.stop) { - throw new Error( - `"stopSequence" parameter found in input and default params` - ); - } - - const params = this.invocationParams(options); - let response; - if (params.stream) { - response = { - completion: "", - model: "", - stop_reason: "", - }; - const stream = await this._streamResponseChunks( - messages, - options, - runManager - ); - for await (const chunk of stream) { - response.completion += chunk.message.content; - response.model = - (chunk.message.additional_kwargs.model as string) ?? response.model; - response.stop_reason = - (chunk.message.additional_kwargs.stop_reason as string) ?? - response.stop_reason; - } - } else { - response = await this.completionWithRetry( - { - ...params, - prompt: this.formatMessagesAsPrompt(messages), - }, - { signal: options.signal } - ); - } - - const generations: ChatGeneration[] = (response.completion ?? "") - .split(AI_PROMPT) - .map((message) => ({ - text: message, - message: new AIMessage(message), - })); - - return { - generations, - }; - } - - /** - * Creates a streaming request with retry. - * @param request The parameters for creating a completion. - * @returns A streaming request. - */ - protected async createStreamWithRetry( - request: CompletionCreateParams & Kwargs - ): Promise> { - if (!this.streamingClient) { - const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined; - this.streamingClient = new Anthropic({ - ...this.clientOptions, - ...options, - apiKey: this.anthropicApiKey, - maxRetries: 0, - }); - } - const makeCompletionRequest = async () => - this.streamingClient.completions.create( - { ...request, stream: true }, - { headers: request.headers } - ); - return this.caller.call(makeCompletionRequest); - } - - /** @ignore */ - protected async completionWithRetry( - request: CompletionCreateParams & Kwargs, - options: { signal?: AbortSignal } - ): Promise { - if (!this.anthropicApiKey) { - throw new Error("Missing Anthropic API key."); - } - if (!this.batchClient) { - const options = this.apiUrl ? { baseURL: this.apiUrl } : undefined; - this.batchClient = new Anthropic({ - ...this.clientOptions, - ...options, - apiKey: this.anthropicApiKey, - maxRetries: 0, - }); - } - const makeCompletionRequest = async () => - this.batchClient.completions.create( - { ...request, stream: false }, - { headers: request.headers } - ); - return this.caller.callWithOptions( - { signal: options.signal }, - makeCompletionRequest - ); - } - - _llmType() { - return "anthropic"; - } - - /** @ignore */ - _combineLLMOutput() { - return []; - } -} diff --git a/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts deleted file mode 100644 index cfc572c9584d..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatanthropic.int.test.ts +++ /dev/null @@ -1,313 +0,0 @@ -/* eslint-disable no-process-env */ - -import { expect, test } from "@jest/globals"; -import { HUMAN_PROMPT } from "@anthropic-ai/sdk"; -import { ChatMessage, HumanMessage } from "../../schema/index.js"; -import { ChatPromptValue } from "../../prompts/chat.js"; -import { - PromptTemplate, - ChatPromptTemplate, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "../../prompts/index.js"; -import { ChatAnthropic } from "../anthropic.js"; -import { CallbackManager } from "../../callbacks/index.js"; - -test("Test ChatAnthropic", async () => { - const chat = new ChatAnthropic({ modelName: "claude-instant-v1" }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); - -test("Test ChatAnthropic Generate", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - for (const generation of res.generations) { - expect(generation.length).toBe(1); - for (const message of generation) { - console.log(message.text); - } - } - console.log({ res }); -}); - -test("Test ChatAnthropic Generate w/ ClientOptions", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - clientOptions: { - defaultHeaders: { - "Helicone-Auth": "HELICONE_API_KEY", - }, - }, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - for (const generation of res.generations) { - expect(generation.length).toBe(1); - for (const message of generation) { - console.log(message.text); - } - } - console.log({ res }); -}); - -test("Test ChatAnthropic Generate with a signal in call options", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - }); - const controller = new AbortController(); - const message = new HumanMessage( - "How is your day going? Be extremely verbose!" - ); - await expect(() => { - const res = chat.generate([[message], [message]], { - signal: controller.signal, - }); - setTimeout(() => { - controller.abort(); - }, 1000); - return res; - }).rejects.toThrow(); -}, 10000); - -test("Test ChatAnthropic tokenUsage with a batch", async () => { - const model = new ChatAnthropic({ - temperature: 0, - modelName: "claude-instant-v1", - }); - const res = await model.generate([ - [new HumanMessage(`Hello!`)], - [new HumanMessage(`Hi!`)], - ]); - console.log({ res }); -}); - -test("Test ChatAnthropic in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new ChatAnthropic({ - modelName: "claude-instant-v1", - streaming: true, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }), - }); - const message = new HumanMessage("Hello!"); - const res = await model.call([message]); - console.log({ res }); - - expect(nrNewTokens > 0).toBe(true); - expect(res.content).toBe(streamedCompletion); -}); - -test("Test ChatAnthropic in streaming mode with a signal", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new ChatAnthropic({ - modelName: "claude-instant-v1", - streaming: true, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }), - }); - const controller = new AbortController(); - const message = new HumanMessage( - "Hello! Give me an extremely verbose response" - ); - await expect(() => { - const res = model.call([message], { - signal: controller.signal, - }); - setTimeout(() => { - controller.abort(); - }, 500); - return res; - }).rejects.toThrow(); - - console.log({ nrNewTokens, streamedCompletion }); -}, 5000); - -test("Test ChatAnthropic prompt value", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generatePrompt([new ChatPromptValue([message])]); - expect(res.generations.length).toBe(1); - for (const generation of res.generations) { - for (const g of generation) { - console.log(g.text); - } - } - console.log({ res }); -}); - -test("ChatAnthropic, docs, prompt templates", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - temperature: 0, - }); - - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant that translates {input_language} to {output_language}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - input_language: "English", - output_language: "French", - text: "I love programming.", - }), - ]); - - console.log(responseA.generations); -}); - -test("ChatAnthropic, longer chain of messages", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-v1", - temperature: 0, - }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); -}); - -test("ChatAnthropic, Anthropic apiUrl set manually via constructor", async () => { - // Pass the default URL through (should use this, and work as normal) - const anthropicApiUrl = "https://api.anthropic.com"; - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - anthropicApiUrl, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); - -test("ChatAnthropic, Claude V2", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-2", - temperature: 0, - }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); -}); - -test("ChatAnthropic with specific roles in ChatMessage", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - maxTokensToSample: 10, - }); - const user_message = new ChatMessage("Hello!", HUMAN_PROMPT); - const res = await chat.call([user_message]); - console.log({ res }); -}); - -test("Test ChatAnthropic stream method", async () => { - const model = new ChatAnthropic({ - maxTokensToSample: 50, - modelName: "claude-instant-v1", - }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test ChatAnthropic stream method with abort", async () => { - await expect(async () => { - const model = new ChatAnthropic({ - maxTokensToSample: 500, - modelName: "claude-instant-v1", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(1000), - } - ); - for await (const chunk of stream) { - console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test ChatAnthropic stream method with early break", async () => { - const model = new ChatAnthropic({ - maxTokensToSample: 50, - modelName: "claude-instant-v1", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - for await (const chunk of stream) { - console.log(chunk); - i += 1; - if (i > 10) { - break; - } - } -}); - -test("Test ChatAnthropic headers passed through", async () => { - const chat = new ChatAnthropic({ - modelName: "claude-instant-v1", - anthropicApiKey: "NOT_REAL", - invocationKwargs: { - headers: { - "X-Api-Key": process.env.ANTHROPIC_API_KEY, - }, - }, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts index 3608fd273c15..8c15dabc7393 100644 --- a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts @@ -1,6 +1,6 @@ import { test, expect } from "@jest/globals"; import { ChatBaiduWenxin } from "../baiduwenxin.js"; -import { SystemMessage, HumanMessage } from "../../schema/index.js"; +import { SystemMessage, HumanMessage } from "@langchain/core/messages"; interface TestConfig { modelName: string | undefined; diff --git a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts index 0c5db6bacb15..2b218cd746e2 100644 --- a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts @@ -3,7 +3,7 @@ import { test, expect } from "@jest/globals"; import { BedrockChat } from "../bedrock/web.js"; -import { HumanMessage } from "../../schema/index.js"; +import { HumanMessage } from "@langchain/core/messages"; // void testChatModel( // "Test Bedrock chat model: Llama2 13B v1", diff --git a/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts index 7d0b3357add5..09ecd3ad17f0 100644 --- a/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts @@ -1,14 +1,14 @@ import { describe, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { ChatMessage, HumanMessage } from "@langchain/core/messages"; import { PromptTemplate, ChatPromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, -} from "../../prompts/index.js"; +} from "@langchain/core/prompts"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { ChatCloudflareWorkersAI } from "../cloudflare_workersai.js"; -import { getEnvironmentVariable } from "../../util/env.js"; describe("ChatCloudflareWorkersAI", () => { test("call", async () => { diff --git a/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts index 7a0e268d90dd..fe934e8d856a 100644 --- a/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts @@ -1,12 +1,12 @@ import { describe, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "../../schema/index.js"; +import { ChatMessage, HumanMessage } from "@langchain/core/messages"; import { PromptTemplate, ChatPromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, -} from "../../prompts/index.js"; +} from "@langchain/core/prompts"; import { ChatFireworks } from "../fireworks.js"; describe("ChatFireworks", () => { diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts deleted file mode 100644 index 967335eb1072..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.int.test.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { test } from "@jest/globals"; -import { HumanMessage, AIMessage } from "../../schema/index.js"; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "../../prompts/index.js"; -import { ConversationChain } from "../../chains/conversation.js"; -import { BufferMemory } from "../../memory/buffer_memory.js"; -import { ChatGooglePaLM } from "../googlepalm.js"; - -test.skip("Test ChatGooglePalm", async () => { - const chat = new ChatGooglePaLM({ - maxRetries: 1, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); - -test.skip("Test ChatGooglePalm generate", async () => { - const chat = new ChatGooglePaLM({ - maxRetries: 1, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message]]); - console.log(JSON.stringify(res, null, 2)); -}); - -test.skip("ChatGooglePalm, prompt templates", async () => { - const chat = new ChatGooglePaLM({ - maxRetries: 1, - examples: [ - { - input: new HumanMessage("What is your favorite sock color?"), - output: new AIMessage("My favorite sock color be arrrr-ange!"), - }, - ], - }); - - // PaLM doesn't support translation yet - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a {job}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - job: "pirate", - text: "What would be a good company name a company that makes colorful socks?", - }), - ]); - - console.log(responseA.generations); -}); - -test.skip("ChatGooglePalm, longer chain of messages", async () => { - const chat = new ChatGooglePaLM({ - maxRetries: 1, - }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - AIMessagePromptTemplate.fromTemplate( - `Hello there! I'm Droid, your personal assistant.` - ), - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate( - `Nice to meet you, Joe! How can I help you today?` - ), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); -}); - -test.skip("ChatGooglePalm, with a memory in a chain", async () => { - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a pirate" - ), - new MessagesPlaceholder("history"), - HumanMessagePromptTemplate.fromTemplate("{input}"), - ]); - - const chain = new ConversationChain({ - memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), - prompt: chatPrompt, - llm: new ChatGooglePaLM({ - maxRetries: 1, - }), - }); - - const response = await chain.call({ - input: "Hi, my name is afirstenberg!", - }); - - console.log(response); - - const response2 = await chain.call({ - input: "What did I say my name was?", - }); - - console.log(response2); -}); - -test.skip("ChatGooglePalm, chain of messages on code", async () => { - const chat = new ChatGooglePaLM({ - maxRetries: 1, - }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - `Answer all questions using Python and just show the code without an explanation.` - ), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "How can I write a for loop counting to 10?", - }), - ]); - - console.log(JSON.stringify(responseA.generations, null, 1)); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts index 11d99dd77f01..72e9927aa503 100644 --- a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts @@ -5,7 +5,7 @@ import { BaseMessage, HumanMessage, SystemMessage, -} from "../../schema/index.js"; +} from "@langchain/core/messages"; import { ChatGooglePaLM } from "../googlepalm.js"; // Test class extending actual class to test private & protected methods diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts deleted file mode 100644 index f691ae72ce55..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.int.test.ts +++ /dev/null @@ -1,145 +0,0 @@ -import { describe, expect, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "../../schema/index.js"; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "../../prompts/index.js"; -import { ConversationChain } from "../../chains/conversation.js"; -import { BufferMemory } from "../../memory/buffer_memory.js"; -import { ChatGoogleVertexAI } from "../googlevertexai/index.js"; - -describe("ChatGoogleVertexAI", () => { - test("call", async () => { - const chat = new ChatGoogleVertexAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("32k", async () => { - const chat = new ChatGoogleVertexAI({ - model: "chat-bison-32k", - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("generate", async () => { - const chat = new ChatGoogleVertexAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message]]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("custom messages", async () => { - const chat = new ChatGoogleVertexAI(); - const res = await chat.call([new ChatMessage("Hello!", "user")]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("prompt templates", async () => { - const chat = new ChatGoogleVertexAI(); - - // PaLM doesn't support translation yet - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a {job}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - job: "pirate", - text: "What would be a good company name a company that makes colorful socks?", - }), - ]); - - console.log(responseA.generations); - }); - - test("longer chain of messages", async () => { - const chat = new ChatGoogleVertexAI(); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); - }); - - test("with a memory in a chain", async () => { - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a pirate" - ), - new MessagesPlaceholder("history"), - HumanMessagePromptTemplate.fromTemplate("{input}"), - ]); - - const chain = new ConversationChain({ - memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), - prompt: chatPrompt, - llm: new ChatGoogleVertexAI(), - }); - - const response = await chain.call({ - input: "Hi, my name is afirstenberg!", - }); - - console.log(response); - - const response2 = await chain.call({ - input: "What did I say my name was?", - }); - - console.log(response2); - }); - - test("code, chain of messages", async () => { - const chat = new ChatGoogleVertexAI({ model: "codechat-bison" }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - `Answer all questions using Python and just show the code without an explanation.` - ), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "How can I write a for loop counting to 10?", - }), - ]); - - console.log(JSON.stringify(responseA.generations, null, 1)); - }); - - test("stream method", async () => { - const model = new ChatGoogleVertexAI(); - const stream = await model.stream( - "What is the answer to life, the universe, and everything? Be verbose." - ); - const chunks = []; - for await (const chunk of stream) { - console.log("chunk", chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts index fe05d8092f19..9c3ee527d430 100644 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts @@ -4,7 +4,7 @@ import { SystemMessage, HumanMessage, AIMessage, -} from "../../schema/index.js"; +} from "@langchain/core/messages"; import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/index.js"; test("Google messages", async () => { diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts deleted file mode 100644 index 4ed02cd8554c..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.int.test.ts +++ /dev/null @@ -1,146 +0,0 @@ -// Requires corepack: https://nodejs.org/api/corepack.html -// Enable with: -// $ corepack enable -import { describe, expect, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "../../schema/index.js"; -import { - PromptTemplate, - ChatPromptTemplate, - MessagesPlaceholder, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "../../prompts/index.js"; -import { ConversationChain } from "../../chains/conversation.js"; -import { BufferMemory } from "../../memory/buffer_memory.js"; -import { ChatGoogleVertexAI } from "../googlevertexai/web.js"; - -describe("ChatGoogleVertexAIWeb", () => { - test("call", async () => { - const chat = new ChatGoogleVertexAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("32k", async () => { - const chat = new ChatGoogleVertexAI({ - model: "chat-bison-32k", - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("generate", async () => { - const chat = new ChatGoogleVertexAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message]]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("custom messages", async () => { - const chat = new ChatGoogleVertexAI(); - const res = await chat.call([new ChatMessage("Hello!", "user")]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("prompt templates", async () => { - const chat = new ChatGoogleVertexAI(); - - // PaLM doesn't support translation yet - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a {job}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - job: "pirate", - text: "What would be a good company name a company that makes colorful socks?", - }), - ]); - - console.log(responseA.generations); - }); - - test("longer chain of messages", async () => { - const chat = new ChatGoogleVertexAI(); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); - }); - - test("with a memory in a chain", async () => { - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a pirate" - ), - new MessagesPlaceholder("history"), - HumanMessagePromptTemplate.fromTemplate("{input}"), - ]); - - const chain = new ConversationChain({ - memory: new BufferMemory({ returnMessages: true, memoryKey: "history" }), - prompt: chatPrompt, - llm: new ChatGoogleVertexAI(), - }); - - const response = await chain.call({ - input: "Hi, my name is afirstenberg!", - }); - - console.log(response); - - const response2 = await chain.call({ - input: "What did I say my name was?", - }); - - console.log(response2); - }); - - test("code, chain of messages", async () => { - const chat = new ChatGoogleVertexAI({ model: "codechat-bison" }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - SystemMessagePromptTemplate.fromTemplate( - `Answer all questions using Python and just show the code without an explanation.` - ), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "How can I write a for loop counting to 10?", - }), - ]); - - console.log(JSON.stringify(responseA.generations, null, 1)); - }); - - test("stream method", async () => { - const model = new ChatGoogleVertexAI({}); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts index 028265a89fcd..5858e525fd84 100644 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts @@ -4,7 +4,7 @@ import { SystemMessage, HumanMessage, AIMessage, -} from "../../schema/index.js"; +} from "@langchain/core/messages"; import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/web.js"; test("Google messages", async () => { diff --git a/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts index 0e3135a88d1f..bb62d736d10e 100644 --- a/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts @@ -1,4 +1,4 @@ -import { HumanMessage } from "../../schema/index.js"; +import { HumanMessage } from "@langchain/core/messages"; import { ChatIflytekXinghuo } from "../iflytek_xinghuo/index.js"; test.skip("Iflytek Xinghuo Call", async () => { diff --git a/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts deleted file mode 100644 index 69116823751b..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatllama_cpp.int.test.ts +++ /dev/null @@ -1,118 +0,0 @@ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ - -import { test } from "@jest/globals"; -import { getEnvironmentVariable } from "../../util/env.js"; -import { ChatLlamaCpp } from "../llama_cpp.js"; -import { SystemMessage, AIMessage, HumanMessage } from "../../schema/index.js"; -import { LLMChain } from "../../chains/llm_chain.js"; -import { ConversationChain } from "../../chains/index.js"; -import { PromptTemplate } from "../../prompts/prompt.js"; -import { BufferMemory } from "../../memory/buffer_memory.js"; - -const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; - -test.skip("Test predict", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); - - const response = await llamaCpp.predict("Where do Llamas come from?"); - console.log({ response }); -}); - -test.skip("Test call", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); - - const response = await llamaCpp.call([ - new HumanMessage({ content: "My name is Nigel." }), - ]); - console.log({ response }); -}); - -test.skip("Test multiple messages", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); - - const response = await llamaCpp.call([ - new HumanMessage("My name is Nigel."), - new AIMessage( - "Hello Nigel! It is great to meet you, how can I help you today?" - ), - new HumanMessage("What did I say my name was?"), - ]); - console.log({ response }); -}); - -test.skip("Test system message", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); - - const response = await llamaCpp.call([ - new SystemMessage( - "You are a pirate, responses must be very verbose and in pirate dialect, add 'Arr, m'hearty!' to each sentence." - ), - new HumanMessage("Tell me where Llamas come from?"), - ]); - console.log({ response }); -}); - -test.skip("Test basic chain", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.5 }); - const prompt = PromptTemplate.fromTemplate( - "What is a good name for a company that makes {product}?" - ); - const chain = new LLMChain({ llm: llamaCpp, prompt }); - - const response = await chain.call({ product: "colorful socks" }); - - console.log({ response }); -}); - -test.skip("Test chain with memory", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath }); - - const chain = new ConversationChain({ - llm: llamaCpp, - memory: new BufferMemory(), - }); - - const response1 = await chain.call({ input: "My name is Nigel." }); - console.log({ response1 }); - - const response2 = await chain.call({ input: "What did I say my name was?" }); - console.log({ response2 }); - - const response3 = await chain.call({ input: "What is your name?" }); - console.log({ response3 }); -}); - -test.skip("test streaming call", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); - - const stream = await llamaCpp.stream( - "Tell me a short story about a happy Llama." - ); - - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk.content); - console.log(chunk.content); - } - - expect(chunks.length).toBeGreaterThan(1); -}); - -test.skip("test multi-mesage streaming call", async () => { - const llamaCpp = new ChatLlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); - - const stream = await llamaCpp.stream([ - new SystemMessage( - "You are a pirate, responses must be very verbose and in pirate dialect." - ), - new HumanMessage("Tell me about Llamas?"), - ]); - - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk.content); - console.log(chunk.content); - } - - expect(chunks.length).toBeGreaterThan(1); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts deleted file mode 100644 index 09ec23767ebe..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatollama.int.test.ts +++ /dev/null @@ -1,156 +0,0 @@ -import { test } from "@jest/globals"; -import { ChatOllama } from "../ollama.js"; -import { AIMessage, HumanMessage } from "../../schema/index.js"; -import { LLMChain } from "../../chains/llm_chain.js"; -import { PromptTemplate } from "../../prompts/prompt.js"; -import { BufferMemory } from "../../memory/buffer_memory.js"; -import { - BytesOutputParser, - StringOutputParser, -} from "../../schema/output_parser.js"; - -test.skip("test call", async () => { - const ollama = new ChatOllama({}); - const result = await ollama.predict( - "What is a good name for a company that makes colorful socks?" - ); - console.log({ result }); -}); - -test.skip("test call with callback", async () => { - const ollama = new ChatOllama({ - baseUrl: "http://localhost:11434", - }); - const tokens: string[] = []; - const result = await ollama.predict( - "What is a good name for a company that makes colorful socks?", - { - callbacks: [ - { - handleLLMNewToken(token) { - tokens.push(token); - }, - }, - ], - } - ); - expect(tokens.length).toBeGreaterThan(1); - expect(result).toEqual(tokens.join("")); -}); - -test.skip("test streaming call", async () => { - const ollama = new ChatOllama({ - baseUrl: "http://localhost:11434", - }); - const stream = await ollama.stream( - `Translate "I love programming" into German.` - ); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test.skip("should abort the request", async () => { - const ollama = new ChatOllama({ - baseUrl: "http://localhost:11434", - }); - const controller = new AbortController(); - - await expect(() => { - const ret = ollama.predict("Respond with an extremely verbose response", { - signal: controller.signal, - }); - controller.abort(); - return ret; - }).rejects.toThrow("This operation was aborted"); -}); - -test.skip("Test multiple messages", async () => { - const model = new ChatOllama({ baseUrl: "http://localhost:11434" }); - const res = await model.call([ - new HumanMessage({ content: "My name is Jonas" }), - ]); - console.log({ res }); - const res2 = await model.call([ - new HumanMessage("My name is Jonas"), - new AIMessage( - "Hello Jonas! It's nice to meet you. Is there anything I can help you with?" - ), - new HumanMessage("What did I say my name was?"), - ]); - console.log({ res2 }); -}); - -test.skip("Test chain with memory", async () => { - const template = `The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. - -Current conversation: -{history} -Human: {input}`; - const model = new ChatOllama({ baseUrl: "http://localhost:11434" }); - const chain = new LLMChain({ - prompt: PromptTemplate.fromTemplate(template), - llm: model, - memory: new BufferMemory({}), - }); - const res = await chain.call({ input: "My name is Jonas" }); - console.log({ res }); - const res2 = await chain.call({ - input: "What did I say my name was?", - }); - console.log({ res2 }); - const res3 = await chain.call({ - input: "What is your name?", - }); - console.log({ res3 }); -}); - -test.skip("should stream through with a bytes output parser", async () => { - const TEMPLATE = `You are a pirate named Patchy. All responses must be extremely verbose and in pirate dialect. - - User: {input} - AI:`; - - // Infer the input variables from the template - const prompt = PromptTemplate.fromTemplate(TEMPLATE); - - const ollama = new ChatOllama({ - model: "llama2", - baseUrl: "http://127.0.0.1:11434", - }); - const outputParser = new BytesOutputParser(); - const chain = prompt.pipe(ollama).pipe(outputParser); - const stream = await chain.stream({ - input: `Translate "I love programming" into German.`, - }); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - console.log(chunks.join("")); - expect(chunks.length).toBeGreaterThan(1); -}); - -test.skip("JSON mode", async () => { - const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value. - - User: {input} - AI:`; - - // Infer the input variables from the template - const prompt = PromptTemplate.fromTemplate(TEMPLATE); - - const ollama = new ChatOllama({ - model: "llama2", - baseUrl: "http://127.0.0.1:11434", - format: "json", - }); - const outputParser = new StringOutputParser(); - const chain = prompt.pipe(ollama).pipe(outputParser); - const res = await chain.invoke({ - input: `Translate "I love programming" into German.`, - }); - expect(JSON.parse(res).response).toBeDefined(); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts deleted file mode 100644 index 995da52a22f1..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatopenai-extended.int.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { ChatOpenAI } from "../openai.js"; -import { HumanMessage, ToolMessage } from "../../schema/index.js"; - -test("Test ChatOpenAI JSON mode", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).bind({ - response_format: { - type: "json_object", - }, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.invoke([["system", "Only return JSON"], message]); - console.log(JSON.stringify(res)); -}); - -test("Test ChatOpenAI seed", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo-1106", - maxTokens: 128, - temperature: 1, - }).bind({ - seed: 123454930394983, - }); - const message = new HumanMessage("Say something random!"); - const res = await chat.invoke([message]); - console.log(JSON.stringify(res)); - const res2 = await chat.invoke([message]); - expect(res).toEqual(res2); -}); - -test("Test ChatOpenAI tool calling", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).bind({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const res = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - console.log(JSON.stringify(res)); - expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); -}); - -test("Test ChatOpenAI tool calling with ToolMessages", async () => { - function getCurrentWeather(location: string) { - if (location.toLowerCase().includes("tokyo")) { - return JSON.stringify({ location, temperature: "10", unit: "celsius" }); - } else if (location.toLowerCase().includes("san francisco")) { - return JSON.stringify({ - location, - temperature: "72", - unit: "fahrenheit", - }); - } else { - return JSON.stringify({ location, temperature: "22", unit: "celsius" }); - } - } - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo-1106", - maxTokens: 128, - }).bind({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const res = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - console.log(JSON.stringify(res)); - expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const toolMessages = res.additional_kwargs.tool_calls!.map( - (toolCall) => - new ToolMessage({ - tool_call_id: toolCall.id, - name: toolCall.function.name, - content: getCurrentWeather( - JSON.parse(toolCall.function.arguments).location - ), - }) - ); - const finalResponse = await chat.invoke([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - res, - ...toolMessages, - ]); - console.log(finalResponse); -}); - -test("Test ChatOpenAI tool calling with streaming", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo-1106", - maxTokens: 256, - }).bind({ - tools: [ - { - type: "function", - function: { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - }, - ], - tool_choice: "auto", - }); - const stream = await chat.stream([ - ["human", "What's the weather like in San Francisco, Tokyo, and Paris?"], - ]); - let finalChunk; - const chunks = []; - for await (const chunk of stream) { - console.log(chunk.additional_kwargs.tool_calls); - chunks.push(chunk); - if (!finalChunk) { - finalChunk = chunk; - } else { - finalChunk = finalChunk.concat(chunk); - } - } - expect(chunks.length).toBeGreaterThan(1); - console.log(finalChunk?.additional_kwargs.tool_calls); - expect(finalChunk?.additional_kwargs.tool_calls?.length).toBeGreaterThan(1); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts deleted file mode 100644 index 56dc7c381d25..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatopenai-vision.int.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { test } from "@jest/globals"; -import * as fs from "node:fs/promises"; -import { fileURLToPath } from "node:url"; -import * as path from "node:path"; -import { ChatOpenAI } from "../openai.js"; -import { HumanMessage } from "../../schema/index.js"; - -test("Test ChatOpenAI with a file", async () => { - const __filename = fileURLToPath(import.meta.url); - const __dirname = path.dirname(__filename); - const imageData = await fs.readFile(path.join(__dirname, "/data/hotdog.jpg")); - const chat = new ChatOpenAI({ - modelName: "gpt-4-vision-preview", - maxTokens: 1024, - }); - const message = new HumanMessage({ - content: [ - { - type: "text", - text: "What's in this image?", - }, - { - type: "image_url", - image_url: { - url: `data:image/jpeg;base64,${imageData.toString("base64")}`, - }, - }, - ], - }); - const res = await chat.invoke([message]); - console.log({ res }); -}); - -test("Test ChatOpenAI with a URL", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-4-vision-preview", - maxTokens: 1024, - }); - const message = new HumanMessage({ - content: [ - { - type: "text", - text: "What does this image say?", - }, - { - type: "image_url", - image_url: - "https://www.freecodecamp.org/news/content/images/2023/05/Screenshot-2023-05-29-at-5.40.38-PM.png", - }, - ], - }); - const res = await chat.invoke([message]); - console.log({ res }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts deleted file mode 100644 index 5d712f2b3bb2..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatopenai.int.test.ts +++ /dev/null @@ -1,777 +0,0 @@ -import { test, jest, expect } from "@jest/globals"; -import { ChatOpenAI } from "../openai.js"; -import { - BaseMessage, - ChatMessage, - ChatGeneration, - HumanMessage, - LLMResult, - SystemMessage, -} from "../../schema/index.js"; -import { ChatPromptValue } from "../../prompts/chat.js"; -import { - PromptTemplate, - ChatPromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "../../prompts/index.js"; -import { CallbackManager } from "../../callbacks/index.js"; -import { NewTokenIndices } from "../../callbacks/base.js"; -import { InMemoryCache } from "../../cache/index.js"; - -test("Test ChatOpenAI", async () => { - const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); - -test("Test ChatOpenAI with SystemChatMessage", async () => { - const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); - const system_message = new SystemMessage("You are to chat with a user."); - const message = new HumanMessage("Hello!"); - const res = await chat.call([system_message, message]); - console.log({ res }); -}); - -test("Test ChatOpenAI Generate", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - for (const generation of res.generations) { - expect(generation.length).toBe(2); - for (const message of generation) { - console.log(message.text); - expect(typeof message.text).toBe("string"); - } - } - console.log({ res }); -}); - -test("Test ChatOpenAI Generate throws when one of the calls fails", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - await expect(() => - chat.generate([[message], [message]], { - signal: AbortSignal.timeout(10), - }) - ).rejects.toThrow(); -}); - -test("Test ChatOpenAI tokenUsage", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 10, - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const message = new HumanMessage("Hello"); - const res = await model.call([message]); - console.log({ res }); - - expect(tokenUsage.promptTokens).toBeGreaterThan(0); -}); - -test("Test ChatOpenAI tokenUsage with a batch", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new ChatOpenAI({ - temperature: 0, - modelName: "gpt-3.5-turbo", - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const res = await model.generate([ - [new HumanMessage("Hello")], - [new HumanMessage("Hi")], - ]); - console.log(res); - - expect(tokenUsage.promptTokens).toBeGreaterThan(0); -}); - -test("Test ChatOpenAI in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: true, - maxTokens: 10, - callbacks: [ - { - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }, - ], - }); - const message = new HumanMessage("Hello!"); - const result = await model.call([message]); - console.log(result); - - expect(nrNewTokens > 0).toBe(true); - expect(result.content).toBe(streamedCompletion); -}, 10000); - -test("Test ChatOpenAI in streaming mode with n > 1 and multiple prompts", async () => { - let nrNewTokens = 0; - const streamedCompletions = [ - ["", ""], - ["", ""], - ]; - - const model = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: true, - maxTokens: 10, - n: 2, - callbacks: [ - { - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - streamedCompletions[idx.prompt][idx.completion] += token; - }, - }, - ], - }); - const message1 = new HumanMessage("Hello!"); - const message2 = new HumanMessage("Bye!"); - const result = await model.generate([[message1], [message2]]); - console.log(result.generations); - - expect(nrNewTokens > 0).toBe(true); - expect(result.generations.map((g) => g.map((gg) => gg.text))).toEqual( - streamedCompletions - ); -}, 10000); - -test("Test ChatOpenAI prompt value", async () => { - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generatePrompt([new ChatPromptValue([message])]); - expect(res.generations.length).toBe(1); - for (const generation of res.generations) { - expect(generation.length).toBe(2); - for (const g of generation) { - console.log(g.text); - } - } - console.log({ res }); -}); - -test("OpenAI Chat, docs, prompt templates", async () => { - const chat = new ChatOpenAI({ temperature: 0, maxTokens: 10 }); - - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant that translates {input_language} to {output_language}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - input_language: "English", - output_language: "French", - text: "I love programming.", - }), - ]); - - console.log(responseA.generations); -}, 5000); - -test("Test OpenAI with stop", async () => { - const model = new ChatOpenAI({ maxTokens: 5 }); - const res = await model.call( - [new HumanMessage("Print hello world")], - ["world"] - ); - console.log({ res }); -}); - -test("Test OpenAI with stop in object", async () => { - const model = new ChatOpenAI({ maxTokens: 5 }); - const res = await model.call([new HumanMessage("Print hello world")], { - stop: ["world"], - }); - console.log({ res }); -}); - -test("Test OpenAI with timeout in call options", async () => { - const model = new ChatOpenAI({ maxTokens: 5 }); - await expect(() => - model.call([new HumanMessage("Print hello world")], { - options: { timeout: 10 }, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with timeout in call options and node adapter", async () => { - const model = new ChatOpenAI({ maxTokens: 5 }); - await expect(() => - model.call([new HumanMessage("Print hello world")], { - options: { timeout: 10 }, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options", async () => { - const model = new ChatOpenAI({ maxTokens: 5 }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call([new HumanMessage("Print hello world")], { - options: { signal: controller.signal }, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options and node adapter", async () => { - const model = new ChatOpenAI({ maxTokens: 5, modelName: "text-ada-001" }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call([new HumanMessage("Print hello world")], { - options: { signal: controller.signal }, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -function createSystemChatMessage(text: string, name?: string) { - const msg = new SystemMessage(text); - msg.name = name; - return msg; -} - -function createSampleMessages(): BaseMessage[] { - // same example as in https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb - return [ - createSystemChatMessage( - "You are a helpful, pattern-following assistant that translates corporate jargon into plain English." - ), - createSystemChatMessage( - "New synergies will help drive top-line growth.", - "example_user" - ), - createSystemChatMessage( - "Things working well together will increase revenue.", - "example_assistant" - ), - createSystemChatMessage( - "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.", - "example_user" - ), - createSystemChatMessage( - "Let's talk later when we're less busy about how to do better.", - "example_assistant" - ), - new HumanMessage( - "This late pivot means we don't have time to boil the ocean for the client deliverable." - ), - ]; -} - -test("getNumTokensFromMessages gpt-3.5-turbo-0301 model for sample input", async () => { - const messages: BaseMessage[] = createSampleMessages(); - - const chat = new ChatOpenAI({ - openAIApiKey: "dummy", - modelName: "gpt-3.5-turbo-0301", - }); - - const { totalCount } = await chat.getNumTokensFromMessages(messages); - - expect(totalCount).toBe(127); -}); - -test("getNumTokensFromMessages gpt-4-0314 model for sample input", async () => { - const messages: BaseMessage[] = createSampleMessages(); - - const chat = new ChatOpenAI({ - openAIApiKey: "dummy", - modelName: "gpt-4-0314", - }); - - const { totalCount } = await chat.getNumTokensFromMessages(messages); - - expect(totalCount).toBe(129); -}); - -test("Test OpenAI with specific roles in ChatMessage", async () => { - const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); - const system_message = new ChatMessage( - "You are to chat with a user.", - "system" - ); - const user_message = new ChatMessage("Hello!", "user"); - const res = await chat.call([system_message, user_message]); - console.log({ res }); -}); - -test("Test ChatOpenAI stream method", async () => { - const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test ChatOpenAI stream method with abort", async () => { - await expect(async () => { - const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(1000), - } - ); - for await (const chunk of stream) { - console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test ChatOpenAI stream method with early break", async () => { - const model = new ChatOpenAI({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - for await (const chunk of stream) { - console.log(chunk); - i += 1; - if (i > 10) { - break; - } - } -}); - -test("Test ChatOpenAI stream method, timeout error thrown from SDK", async () => { - await expect(async () => { - const model = new ChatOpenAI({ - maxTokens: 50, - modelName: "gpt-3.5-turbo", - timeout: 1, - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - for await (const chunk of stream) { - console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Function calling with streaming", async () => { - let finalResult: BaseMessage | undefined; - const modelForFunctionCalling = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - temperature: 0, - callbacks: [ - { - handleLLMEnd(output: LLMResult) { - finalResult = (output.generations[0][0] as ChatGeneration).message; - }, - }, - ], - }); - - const stream = await modelForFunctionCalling.stream( - "What is the weather in New York?", - { - functions: [ - { - name: "get_current_weather", - description: "Get the current weather in a given location", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: "The city and state, e.g. San Francisco, CA", - }, - unit: { type: "string", enum: ["celsius", "fahrenheit"] }, - }, - required: ["location"], - }, - }, - ], - function_call: { - name: "get_current_weather", - }, - } - ); - - const chunks = []; - let streamedOutput; - for await (const chunk of stream) { - chunks.push(chunk); - if (!streamedOutput) { - streamedOutput = chunk; - } else if (chunk) { - streamedOutput = streamedOutput.concat(chunk); - } - } - - expect(finalResult).toEqual(streamedOutput); - expect(chunks.length).toBeGreaterThan(1); - expect(finalResult?.additional_kwargs?.function_call?.name).toBe( - "get_current_weather" - ); - console.log( - JSON.parse(finalResult?.additional_kwargs?.function_call?.arguments ?? "") - .location - ); -}); - -test("ChatOpenAI can cache generations", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 10, - n: 2, - cache: memoryCache, - }); - const message = new HumanMessage("Hello"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - - expect(lookupSpy).toHaveBeenCalledTimes(2); - expect(updateSpy).toHaveBeenCalledTimes(2); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -test("ChatOpenAI can write and read cached generations", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 100, - n: 1, - cache: memoryCache, - }); - const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); - - const messages = [ - [ - new HumanMessage("what color is the sky?"), - new HumanMessage("what color is the ocean?"), - ], - [new HumanMessage("hello")], - ]; - - const response1 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(1); - generateUncachedSpy.mockRestore(); - - const response2 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should be cached, no need to generate. - generateUncachedSpy.mockRestore(); - - expect(response1.generations.length).toBe(2); - expect(response2.generations).toEqual(response1.generations); - expect(lookupSpy).toHaveBeenCalledTimes(4); - expect(updateSpy).toHaveBeenCalledTimes(2); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -test("ChatOpenAI should not reuse cache if function call args have changed", async () => { - const memoryCache = new InMemoryCache(); - const lookupSpy = jest.spyOn(memoryCache, "lookup"); - const updateSpy = jest.spyOn(memoryCache, "update"); - - const chat = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - maxTokens: 100, - n: 1, - cache: memoryCache, - }); - - const generateUncachedSpy = jest.spyOn(chat, "_generateUncached"); - - const messages = [ - [ - new HumanMessage("what color is the sky?"), - new HumanMessage("what color is the ocean?"), - ], - [new HumanMessage("hello")], - ]; - - const response1 = await chat.generate(messages); - expect(generateUncachedSpy).toHaveBeenCalledTimes(1); - generateUncachedSpy.mockRestore(); - - const response2 = await chat.generate(messages, { - functions: [ - { - name: "extractor", - description: "Extract fields from the input", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - description: "the tone of the input", - }, - }, - required: ["tone"], - }, - }, - ], - function_call: { - name: "extractor", - }, - }); - - expect(generateUncachedSpy).toHaveBeenCalledTimes(0); // Request should not be cached since it's being called with different function call args - - expect(response1.generations.length).toBe(2); - expect( - (response2.generations[0][0] as ChatGeneration).message.additional_kwargs - .function_call?.name ?? "" - ).toEqual("extractor"); - - const response3 = await chat.generate(messages, { - functions: [ - { - name: "extractor", - description: "Extract fields from the input", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - description: "the tone of the input", - }, - }, - required: ["tone"], - }, - }, - ], - function_call: { - name: "extractor", - }, - }); - - expect(response2.generations).toEqual(response3.generations); - - expect(lookupSpy).toHaveBeenCalledTimes(6); - expect(updateSpy).toHaveBeenCalledTimes(4); - - lookupSpy.mockRestore(); - updateSpy.mockRestore(); -}); - -test("Test ChatOpenAI token usage reporting for streaming function calls", async () => { - let streamingTokenUsed = -1; - let nonStreamingTokenUsed = -1; - - const humanMessage = "What a beautiful day!"; - const extractionFunctionSchema = { - name: "extractor", - description: "Extracts fields from the input.", - parameters: { - type: "object", - properties: { - tone: { - type: "string", - enum: ["positive", "negative"], - description: "The overall tone of the input", - }, - word_count: { - type: "number", - description: "The number of words in the input", - }, - chat_response: { - type: "string", - description: "A response to the human's input", - }, - }, - required: ["tone", "word_count", "chat_response"], - }, - }; - - const streamingModel = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: true, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - streamingTokenUsed = - output.llmOutput?.estimatedTokenUsage?.totalTokens; - console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); - }, - handleLLMError: async (err) => { - console.error(err); - }, - }, - ], - }).bind({ - functions: [extractionFunctionSchema], - function_call: { name: "extractor" }, - }); - - const nonStreamingModel = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: false, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; - console.log("non-streaming usage", output.llmOutput?.tokenUsage); - }, - handleLLMError: async (err) => { - console.error(err); - }, - }, - ], - }).bind({ - functions: [extractionFunctionSchema], - function_call: { name: "extractor" }, - }); - - const [nonStreamingResult, streamingResult] = await Promise.all([ - nonStreamingModel.invoke([new HumanMessage(humanMessage)]), - streamingModel.invoke([new HumanMessage(humanMessage)]), - ]); - - if ( - nonStreamingResult.additional_kwargs.function_call?.arguments && - streamingResult.additional_kwargs.function_call?.arguments - ) { - const nonStreamingArguments = JSON.stringify( - JSON.parse(nonStreamingResult.additional_kwargs.function_call.arguments) - ); - const streamingArguments = JSON.stringify( - JSON.parse(streamingResult.additional_kwargs.function_call.arguments) - ); - if (nonStreamingArguments === streamingArguments) { - expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); - } - } - - expect(streamingTokenUsed).toBeGreaterThan(-1); -}); - -test("Test ChatOpenAI token usage reporting for streaming calls", async () => { - let streamingTokenUsed = -1; - let nonStreamingTokenUsed = -1; - const systemPrompt = "You are a helpful assistant"; - const question = "What is the color of the night sky?"; - - const streamingModel = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: true, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - streamingTokenUsed = - output.llmOutput?.estimatedTokenUsage?.totalTokens; - console.log("streaming usage", output.llmOutput?.estimatedTokenUsage); - }, - handleLLMError: async (err) => { - console.error(err); - }, - }, - ], - }); - - const nonStreamingModel = new ChatOpenAI({ - modelName: "gpt-3.5-turbo", - streaming: false, - maxRetries: 10, - maxConcurrency: 10, - temperature: 0, - topP: 0, - callbacks: [ - { - handleLLMEnd: async (output) => { - nonStreamingTokenUsed = output.llmOutput?.tokenUsage?.totalTokens; - console.log("non-streaming usage", output.llmOutput?.estimated); - }, - handleLLMError: async (err) => { - console.error(err); - }, - }, - ], - }); - - const [nonStreamingResult, streamingResult] = await Promise.all([ - nonStreamingModel.generate([ - [new SystemMessage(systemPrompt), new HumanMessage(question)], - ]), - streamingModel.generate([ - [new SystemMessage(systemPrompt), new HumanMessage(question)], - ]), - ]); - - expect(streamingTokenUsed).toBeGreaterThan(-1); - if ( - nonStreamingResult.generations[0][0].text === - streamingResult.generations[0][0].text - ) { - expect(streamingTokenUsed).toEqual(nonStreamingTokenUsed); - } -}); diff --git a/libs/langchain-community/src/chat_models/tests/data/hotdog.jpg b/libs/langchain-community/src/chat_models/tests/data/hotdog.jpg deleted file mode 100644 index dfab265903befae368bf814d734a6bee158232aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28191 zcmeFXcTk&6vNtLWI0ME+6K%j?lXDbcn`E-dAd<<*LWqpWPM9nVm>{CbQDB0|!6u2E z5lMi_28;|M=gYgh-}%m0ch9+J|G0IlZf#ddnrEixr)PS4X1aU&a`N&g;GUYYsxpA+ zig<~50f5T|fT0o`Y!3ivY67?c0KiQE2@wN;^r}R3^^qfD{ExCS(Gvji-|DXc0N1Gg zQT~hTZ}b=1Uw!`md$|SRedP%AfVn%uTpo(>3j!oxsA`gs{Ed}@i3kKBW_+x|bj5!S zKyvjKci%aBc(_Ol2spd*TiL>_?f7kAPyx7=i+~Wnpa1|U4|lP$f!KLGw6=2qJIiwY zX=vek2)32wG7{4i)O1m>a|FNib+gm+)zY`|h1f{ha>>ix2EwJ`P#373ht)$k)XCXh z8ZOKAH*o2z@?X^gTo3=o;sKH6QiWPM*hx$OB@BHS%;}+!Ait0x*A$FCJNp&F?T3HEP_%Qm zaRa+}fML!L|H8DghIx9(a&g^$_z!3gu>JqW{y*gKSIgf7afSBqA@Hw9X$;4>@6=z(1@04|e1K%`Yb+C@3KG5B~pL{~ydI{{{1ZuD=?Uz+c1oPs9435A|wV zG&TQ2j{nD4|JR&Dp#py&?caE%6=61>P&;Q2RYkeKPoXW$25c+s?g6ubNK4p>S=&ik ziSgMA3fuCD3Rw&BSqa%l@!8tg2-}HC2-}I+O8^D_Z$N*I_wSXe&h8#o&Ng;e=vNi| z;45T7TR}-dDPc)IdvO~PK2cF&2|g)XYbib}L3;^nD`9af8!M~-h54Vl|5LI2T~b#{ zbfr!L|DlwB&E^05^zT3Umt_2Jas69d|B?j$CFXx?*T2Q}FG=8EV*a;w{aalBk_7%G z=KmjQ*Z+1W?3}NB2=6Or;c^C`03anMAt50qz4{_0CB1&*?#&xlL~-ZNt-F*I_wQ3u zP*PISGSN{{Gtf{{(m$eSU}k1zWu>BH<6vXqU}9lq`3r>Ts_TvGH^^_^Bxj+fq-Ob- z>GC^(@+OhvH6~&rO29QrB4SFS%XR?sm2Y*8_%FZh@9x&sH!;bztCu9YdF!efcn?5C zLPUIx^co4t^=sFz5&w;yl7#gBBcbQlsn~R^Znz4+i^-&Be^Gu@_s8(UHjTB28^?~q z``D}Aj8~|Cf&T~em0LzkL~@0G)hJGR)%X_?T_YhSxkhy58xmb3rX+cEUx@U%4i%fQ z)w`JMFI+Q+so8&QmoJFYh`7b>TuuY-5?}SBB&Gz&1I`oP{+s?klEC{si_EB~wt0t1 zKBv?Y_zBWHn)J_lX-fND=y#=3f5ZQiR2I}oIw$8EoWp)i* zus>?D#xQoBroVGsJHAWri^N?5xNL&A%?8Yz(PM!Q`6-+8=7iKWm2AfyMH$u4gx!?TIQXK-TRrzn?->L8{XM3=lP|FFBP@ zZcI?;WF5;{-#qcs@PX@SKOil2{MCe&>DZ(ViRo6&&8t{x1n*Nl+n%wcT6^FVbqP3n zjb&i^Y?BxDTUxa1X(Ql2wR3fB*(H?$au2M z^f}Jd4+Ids9IEE!)tOX{aW7$GfBWn-?Mh5a^rmnn;N0^fLgUatoQwlNNwGwkdl5HaoB3j;hi7~ z@$rrGHX{M@YJ8K#(I_V7UILe?+NSOfeAaAc1T6g}+%fd#|C~tP@|I})(&-tqj-;ge zkoL!)kLhefugIIP#2;dMl+1f>m4#a;4m|{tcJ=n4^Su()>8LEVAg{1#?4kEf4nR^; zjQjiuIiR7eXc=;(=jWlZY&bSFWgvg6QfP_!>7Xqwy0xiCS-wDx?~u=9r#d2;4AEYq z$!^XUp}Kl(Rn_KDc&N8unuI+lPI*FE_s5Df|Gl;W;|Z>`L$zuH#hG8`<3daJ)qh`q zhJQ(Gka3AU(9jjsiQ>2p`r*zgn93ZKPbwBikKM!D0szHE&-2MQ)QbCC#d2i%L@0orGP=>c}p|pB$PIv zn}|RC3!647KIJap{no(29U}<=7Jf<83SY*7igwi95e>#bKGPMXFN^#g7Wr30%MXNc zssax^SFDrPIBFWRnZUqT%3Uch(#EEL*>r#~nx?kM1Sv|N$mtUo*@SlO2#fuQ@{Ztg zmx22e)Agf2q!LmTOSq&BymTmdR|l28*o~b_Z(HOpZs~P@^fCU;n3uLCV=UXOdSGac zk>bi1SZ-EfrC^P3x?s(((v%~kQt237iXXiT|EL-B(}ehUy?CP`n>t&hdIEIt7_p+H~A*?_;-2E;=QJJnX)^KTaq8$WYTYU ze8xGcDr2d0~23ZdB3m(TuZ^qfU;=UG2N zl4#PKyRi-&E&~4|V#Y;C{0xEv)%IQvzJLp{C%jiB($<@gJUKW$} z#FNJO8cTml*T$97TIdhv!|3j#@c~sndi^pVBD&?LAf`RDy>HlfhFO4LePV%>QNB z=nJjtOOS0M-Q(l7%%o~BGrdWnY`8;{zKGrk~b z*w9gZE$S6OGx}K;UfyU|+&PNay=ky#zB`t^0Iofs;O(Q=o!q^FhVq3QXyPkzPu@Io zdU0*hZ%Aj$K9ghsU;v72^)!sZ84MrlpcAk(t0}N0?v&-wg9^0rdWlQ66sK;u_#D|7 zxm?Ah>V?_Ghr{3X@(;SU`T9iMrM4jL!!Ibd|1h4= z#jRfg;*~A*Rmz-es1{Mq1Yg&B#JAr_vt6MW{6&^w@UCWlP0rFzVR(JX8wR9}f5o+* zYsTp0B|`d)W#PR6&I+OROf$aU@WtjY7P}mHn@OelT2q>Jl<%>huDyJmK+@pGmOl@t z*F>bNiG^*>H?nc_7sckNx%ky2QrY#WzGko{!>6*Sw{C22HF=Ef;%AJ%YwP-v-mFQ> z`j#(t3D8PtuF3zkb>mn5$FCpydSU}q;9v@20AO={24An!im?o6lwdzo>8{!+U)-b5 zZXez06sM-NX`PFVdY7Hw+Fw?hi8-I*>e;Q+?e3QM)Sn7)EVW%cAG06MjbU7;M$EP% z4pNol^WtEnFR&v$o#T-WWm5)p(|?$G`k#mkfDJ%9vb-pd{1|oxFS=`fG^p4e)*8sV z$wA4T=@GM^DziqEj-{)5NnvSe38zJ&CMFJ?Vu~cTJ%RfNnw#n z8QvmiO|pEJ+gMC{)~K|(1deW1;!yk_s1toJJDZgsj$s&AVEGu#VwfI_vL7vQ_nm8D zWcc+HE)O6CUi1#S2kP1bUF*-Z5lH;@qGy1YPY`jnMBb3EfJ|aMB#0~M#{Dmj$?LhF z>jpeoJVb$) zNNxLpHWriN1DAl<2lLKWF|);pUJ)7WR0+fvjFvT}BORcU>U`d-@Di#Dw}sdc=+s=WJ2jkA5^Ye^QNM^-OefRac>+6YgqLu~&p3A{gi1w5 zxSp+6P~|h-QI=P&s(ej>-8ZK!G`^oaxkLpt0 zFw-_DTv-_%TEb8xGEtM3k6byU_3)*bPC`1?R6IL3X{IulXp%ozsKe&fc+**oz2el= zMzImrb2KC6uDP~WxMSMGWX*#`kngJly~g&;PjOm_xFy4YcweXDroEfMlJ-{dG;TUo z-c}|rX;l^l5H^D2ln7st(l?OF2x5~Ea6Hl5)AjEX7V(FE`VHs&7({k6(W%etr}sT+ z7w+g5rKS%bYUdn%!X1bA(X8pct0VCFtcH+O)565!wc(HFzM8?rP$BCk3IFm*l5zksJ?HG8~AO| z8HT3AXKhK#{-9%yp7X3%FP4LM5ThmDBUP2+y{b#Khx#)ditA9Y$aG0?c3Qh`o_+H{ zJ)40W%SbQzX^}99Q?@imEp^ZpRtMuQ42~Z)PyD>79;6ds;9iZDZdacE)wLT*iiN$W zAo1fVpKwdpDqcQh&AWca9H#Jg?cD*+sQxJ95W@K4BAX}I05;t<1!hzRrKciuiIeL7XNum5J|_uBWryVLU0yn-kxl3hzV<+mrp2*1BOK!x#DGTs5pha%*XfNAC4x zdrVbY)A@LO=$J-;Na0*==GcC<3VYGA&z3)_g~l*>KHZW)ESp1t8>ku0f#-{LQTG*O zBm>p-iXRFnVSbL@4V-j6p8iyLraHwAEI;SizXKs>^3M00Zaj;yswqv_J4N4P(?O7fJ2{SsEfaZ++ zW@LcykUv!vi$2QUtGs3+VW*9v>h%TX?${ZBCySgM5Qe+3ls~>Y;0LR)O?o?*0DcVn zL#m@OR9ujAfDpJeZDzI`R1;NQJR;F7nKpysq

aAS<{m zcRc&%tTVnS0&0&YUdbEgVFA78snbcehL)C6!0eO=u<4Tt^VXc$dlS&5DTt2-Og)Rn z{0Z?G)L@0deCbF(!JevC-tl_fuS}8q)jWb_{MxTtpX}#GoxkYHUnxSK+VT^R_@0m0 zK*J9itS6^UpkH?f7VxDx9>ITPF0|cGUQbYvfLHeG`d#I(UZ`oXQ@OSZZW*7HZoYpB zXp%MAJVKWtKUpRiIgW$HtxLb8YgeWnB7@fT_^z2o?XlSxscj2%`@PgdAMeQP!f-EZ5ba zGKTRj7cOAT5_iJ(IyK@ze+ch+KP}n4TzW7K*I%l#?uGPm`D(6H-j5&g*-5noLY}$6Tf;HILvi}#Czo}(la7<;xUCSZYyYJWuy09gNYY5WATP->r;8s zZ5EIGk{kgeyy7RtJ3o<8y>z`SnQ@aph(0|t8<9;^xqFeJ<-oO#*yuHphDr)v*$91NArZ@f1uGFlmdGu6xvuKpUcZb0YWZb@p*4iZ? zKyGC@1cYs>u4ERS=zanuKIJy0ylc+IdRuQj^rJu=BGa%^ZL~{uDL%qdoyP!ly*GLl z9TZXLq@r;*GipN(lWH_0jWp=#%8PYkwKLR+qeOqnA4wf~Kh0WJpLIZ3he8VVQXZwe z=R=W<~JtjXe0O6{6>M<^bF^@&7+oBItH)ydE(;ufuEFZT#%(X!e z8{Un{c0QJ1jSsM9)2Y%p`RHmJBpbO*+|L6(`U2C+v&`}*V@+J(6RINp&Wst9-)Ne@ z-3;_qA^0C=$SlvFo90%H{kMV2Tcw3fw@s2i3W{Oi-*|T~0Z{j2rW2d6323AlWX>!j z6sW}RWWz|)8zmqyaLgMw>*oy*Vz%6^W9lnscZLqQAM?bON#id8&P_I*o|d`SaeGn_ zP&vq_T`_=b@ZrT>Oi^@ekfV;~wf7#~m zqV5cHgXu+ol|+31dAfJO(Kl@*&trb~kR_k>lL8DYU-ls3w-FcXjo)F|&zK}!kx&=Y zt)GaE!Zye1T(sGEmGc91&TItB#in%<`pJOiJN&mh@GW)cn9^L$k^z-_BaY|sDF)69 zYL1H4>VWJYw`cc9LDu$uwiAHo%`eJkghrR+Zm;ynfz%wfBgmO%)un-CGPF)5$h2z7 zEU)WRBl|V_NNpqXxMVzZD*wH}b+q0unZa32{DEEZjfXvPo0bOD1 zq#MU)XlO!Oa{X#2E0d^sg~LQ_QSZ{VZ#>S~g$UIlkm2p>mb|$+VYeRY)EkF7DZ)nT z+$|r6Ic6Af2R?=ApxAba6%HBM%kil2ZrOD$6MBMk2Z8905G(kP>Nn;!t*)8g-VXql zwP193! zb;o|`X{k)cCE)HiCR5sQTY=&Y6@y&p;9TxdE`(K`epP`9_L(X$!xPHso8bszcfS|E zt3o_?3+^h;^SLavR4T1_yzCyr|MRi&hIf}9I%^`#x+*CwSR=pFfrRG7*+Y65H&62?8{X0;?R^J0xnR{^{I^~dq$y^maq?=~~g7&gw#4DGJUn{FSalS}3WH8rbUc908Q{6&7X|^WFrNK-xgFeh>J_aKqBAA_} zh4W#gbZ63($}9y^l$!!0MH~qb7avdtS$zUfPPQLPJmvVtJbCW%gD>UnP8>XtyPBDM zgAFwsm8kOO&&}%Pb!v|_OA3ykebc@z0{h`n7-dQ5{s{OSOWrh41e{dvsg z1KuyWnKYhPQrID5W<7iGn2CwO$2R>U8*f>3kp^c~+P;o(*(3apEY$iPSqCCsmcs?o zBEAQ1maFMSSuzB94_^6Zk5ZQ1d@Kv1uS}8_=$xSu6KPo-cPhMULi6!tWlxp^v>r2E z?y}k+cWl9k6|9tj=A zW}>|F^mtMO^?6L@W*BjAXb2UzAgMhoJ%jF5#pa&e6}C5jC0a7yPRxzbe#|QG@!snxg@U}$4l>pQud`|bz=W{Vizvt!to88;r{eucOU8AnVmHmPCEiGo@dCBkS?HWMG zl{mPwA?>_#6c_8Y=ozKogbxyyZA8E=V2NEQhhALU&`#}ily8VcTtP2S$0XMth-OeC zgU4h3oWAT;E<39r7-;fK>ebjh$h0*>p4gU^RApQIZ? zOtfJ4P9vy5KbMr|Hoa+|ltImq!1|A|ofvYbxFNoxfn+K03PU1a(N9||`@5mjYgHMl zSGy=Mcm$ImVEMp}uOr!Zw>ay2ikFP)x0QizhRS?|uLEzXIODVQaOwCsM+J^-Fvsq8 zWlJyBQ_)CZl!H;n$hVw#$OpPNmqfbQ?J~Ss!bKx2hK+SH_ym_y6ZTmVD;vplNAMkT z!>Z2B`WZKRTBT-S9>2;5%JBf3DWP#YA9j)6-mjJX$O5oLojtmSL z7Htw}JJA!}V!h1-^tvA7T>f&T^1z`|6S+i<`$S|g3IjfemrgyoE^u} zSM9nK8hoS6%qh7nUSDgi8WXAb$o&T^kL=^ZXKVK)i}Bf2Ri9KQxJ9H*?!6;i^DSCq zbJZ8FFL|%Tje?q%ssOQL z6ZSsBhv1Cg4$1QZ0V0F!2uC zFY=6<)9+k7nalX{Z+GCKELl0Vg$uGI+`@z`0U36f>!BF>cBut`$1B1dFN4gqf*8GS*mvE z{&8j$Aw;y{Y~UBQtFF6cC$rDFoSf`tf!j7b+^8-KETMMMzXqFiS~=QOUX^{?K}aHK zBdVsYx`gef*8u9O;6on6q|>n)2Z=452;o=S*PV&@8Z|?9H9nsNUWeksjgs}ys?(Pn z4+*Nb>XiKqyS!Et8pbV|c&r1*Kr+v1st@XeJ#-0S2Z*Xp)#TCcW@=7r@(EGPyYP&5 z_^$0Q5T~=py%wif#a@WHkSGjptF{RKkzj=t&WI0 zERUG(eG?Y{a0DVH>Y1xyDjD$@+}( zL5ulI_YOoW*rt1Dd-Z05{1|?AbKx zEF$!btbPN=*k1w`Oem|Z#imPQ&4QaV15{cEtCk9;>3V?u?@x8mPli#M$rjbqD{5TI z@k!)|l5ySVG-yN&(t#egOYQDy5JimrU4v$>vtXf z?mV!suU;=)Xycwx#~_4db%Lu*-qRCO7p^KYHvQm%S1WkwN}r0Y(HYFJktXN4|yK$E*M;o`pplwEcXGxysA|`G+jWX2#J0@~C{9*ETQ#q1OJdwfkJS(lKn3Z|LaSgSrQG zM!Vf$q(fSQ*9o)1Hd@31x&L&bZ62zntm7?n2@uP6ii5e$=E?G?%Z9h~+XMy281RO? zSFJZ##(UML$LG4&5Z4zFAT<@Gz6TX-TqDA#qGViOH1_&bUe0ZkUfr~1-4E4Ic9ECc zG@N=A0OlT9jtMNCZ79$(IV%Z&s+zz$MmwPn+Kub>w;3s3!#PP3P2kI^>P)F8s`DkD z)U66G)Oz1;YVUnhyU(cDExtrsuhpb9bz8qQTVuu@ruIUc^ZpP%?_IM&JR)Jx9sQc{ zz%-rz529FlRD?19Y3`%%&K!r8`Xyx=Sx(gqw;nYzKDw1w-sPv!9%6aBD^w*w5OQ?@ zZ;0RI?ooL;?fK+vewFgi%#jfz5ucXob4f4%;0l5{iuwnIS0e{qrgoK|t?d;g#dORuFoTT-@L zn$Yxaf%_aAcIx^QzMw)Jl=+#NoVSer+^ifIIW%zL``U(k^9!kmyW>6@ji>!**3!OC z{-6$TKKEr|;0ODYqAk`!7;E7{&5@0xy}LPMW4@qdkmfil`sUS*%ZzoUkbIr88PfO% zRj-*-Oo!2G`N|tZw%O81&ni4k?^d2sZ`~|$0M@$l%v5_~YlNw<#GTlrY3H}>OT#%H zu?ZAc1kLKX^1;Z)Nu_2Dy;KP`bneQt?@3ucDXwfknhpHjjKeznHEHv2j?Eh=&bw`m zGD8?Xg@`=qlAyBnYA&QfnyxI44LnYGMr?NLCut=JlH(wt>gvj!>pnN(%=rBLD)++E z=2%R4K0xkG(%mkMUz4W0W>hJUH#s;r6`{>i$F93}#yz+|SbbNX@O}#y=df>zD~!PM zjxSs-y9uqPGI%HeGV7KVQ==vD#Qt#==-Hu>$}7a-6LWT(=t-IgOWWtU)^DU&^Jx>a z7kiN@QfyCbid_M5V-wHxc@R^4G$npUDdIrUR3w)U#jvGj;5+cNy;cSa9wIeG2=&&? zR}UZLkm}3AI5$kIE*m{$p@UIij9AKi<_Yki&zFGvmb|9444W-}X{VQf-l=a7zmJIi z4v@Kab7!Y~iL#B6QPW1{*|p}W^_SrWYtR#9nM5PrVLHLy3a4})TpDP$D?HQjA+Opg z(`E^4|8aj_NH z`q+S+yEn2$mI_bR*x!sU-w+-&j5pMXd9!5)E*wE9ewU=`y66RAKXveMS7=|w25|2) zo3S0#c1dE(hX)M}1`zedhF}RhAYGy{7utn7%V8_%qjp~9BYeV(t)F7~Uw`R)R`&%? zDy|(nEu(*+n&wu_X~w{;ZY5LdQFWc)&l0Q)H8yBglQmI}HLSt5Led9sF99O1U@*CC zQ`48GK@D%p89Kug@cQV??@GDh6U8-s1Q};HwS0XAn_SYood)yw_sal_^>4SYuK za!LCn8w>b+y`fu{D{~>Pn-%b(vJ< zoh9#tWIWCTHeUiVYD^9mD;qmKD%1lI$JYs;ZGTmZ`KAlL~NeV zVt{<=Zn8@3(4wD*b&v`X09kkhWQX<=U`vRCLoDOb9T#>c_V=A*LwQkcU*+v=11~NC zw>mtFE(|K|zO{D7wzjGOiv?Kn&AAZGr&}dnMF+yEP6*Py+w{98!7%wmC>bHZP3BT!I|G5g%WM2`>pNn9Aq73Q~5-`hBSOGixyy3Ln*2`^gUxK-C&VT z3HFc7d#iND>qD0Kp;2Iwm0<(+efxrIM_BRuu`1lg209+Ay)~{)erL71m`n>MLqQ(n zUs(_sf1tt^wV1=&Mi`qsY6es^>o>F{FdEt6{w~w8}b8!p?FBZ$`X{eCUtChFYnD>p3(e&u3 zxF!%G*GmZ>6}kV(VV)zoC6LCj`8?lPKW$tkl6I&kL~wjni*Pu#7jX?^fGnwgN_j8;?jS;U<{Ngh z#jVnK#B1yaa3*<(h$E1T2aPswFP0h*g4SQ(OfS4QsNs*1PvIk5ESq1^Vlz%pQufC- zjTtlQ%h6hkw_l~x*h5{!NiAo!ctobDq@S-j0}ijEXxTNmx6JPpq74Go&Rj=|XKL*B z+}q|m-$afP*c+W3%)BK1P6HzvcKo$v0&|-bJ`m0ZZ(pPZ^zM0mAsF*{|0cvvKN0-r z32z~sUW%<|8$@8YI-!?ICecjOzV1_Xh_sa<#Vyq4(_ixyKW8M;YK$*R^MFDx$Ma7l zXC2sk`1vIo3hK`$xDpfhHbvy8rk9ooV+tyq8gA%*J3sQ?G_&zRDjHZ|>QWLCBE0Nw zJ%8^8bT-(`&^kseS~GkDS{zWIwc3AEd~PbG9Fa7R5)x;Y{b!4h}T;xNFF>^ z=W{5C&KsnoUPkU9R&oc%`-Bp1=mnl-)WbUKkzU*W0c`$x68ta`!(A4TrRUbviwR_u zLavz*b6vj=Y5*f7bs+sdDA&nVK=RM>$f~Y@Nl7hKogDbG`ZzY^dMu0My&IsbJD|j3 zOpx5(Hga8(4|NGJJZ<&K==_eYXm>SSRGK{IiRoQDvUJ-!le4w!pdv>_&cev94XXb} z1081k1|EIviv%{__I~lAhGjp;UdI?LL&4o$GFT1FAVI}^(dMBt_v(nz0FXG-QnVPz z^YQ4q&$Ku}9}jv1+*H?FOYQBVHfOvSTJqCHc9g5Oc(jJyvSXsiuoe5VzDx#aJoOeE zGC4G_OZE4KkDm0dF*Vyeduj6wTIw8`4fcya^K>2Jw{TWo)8P^P`6jQ6zrykqC1V)` z-B_9!iQJv~8Mj@SQih_L7t9{~6$c-&cyc!-bc1O9jgW1gXa~VpTUPpM|L{nl zS0oVEV~DK}7FJ+#1ZZ=uRpd#VZ0)HnKp$QUvL#ID!pntC*nWs+H&vpzm%FRJeK7a+ z=Ao^^(~dNJDwDPvsxPo)Hqfiaa-)47u)8aO7PT!i?9!XW*R>iT4!tg4?p@#fb3T^E zN!wv%$*tK2QTaMmIc0C{nN!S-`to){XJ0Zl;6mQ+2_mURuTTj%{*)H9@k`s&p-^SI za?O3&5wPXBk{z71*Cw!d=LfGV*{mgX?p<|GJF&SB>tH4T|Celw&uN%972l-W>WX zE)DVdAN#ZG;B`@6V;j6-kr!NYn*HE~5AQAkJRx0}D>cRS%UoEVZZ*BfBDXtO$G?v6 zTX8u1-}QNWPaJWBSrYW1P6tlN(T>Re5V0U#b+9Pr|3~EUX)=>}2R{W@%!*G~)Yj~e z5Q8Xn&DaH+YR=C~&ZV0mp-%Z3V<^`Dw9=$tIgCB#y|(eg2;lp&#*Sj4W~?XJUebfjs^Sqf4aC`MNVS7qibFzRs*iU~6*WXJSZQ_) zt?|3`;^M>ZwTEVY11odITyo!;i167oBYrQ|42IY6C1EN}#j5JFLbFq6An6Uk4h=O<> zNsoN6)iuuXo;g<70UGWwC1;aNaXrdlg0_KXVIik|(DxGH{b}%mV`aIdL*Drk(5SMo zZX)H>@loL>*2b?c>e1ru>q zS#1l$s_DQPR|L&+^67a3-_s6*l^~ESD?S^nMoAaGT%y<+_0a!&EaQ-Wi4SH}$g)R5 zTe*8}%4QI6vSZ}`_QOPY{q3eL$*8OKwm~MoV6>$so@4?#FFyY9m3XLVwHFvffrz>_lIB7v@aL zzC6+mHK$~S>K~Ks0$X9=WuhNPCSk$pBI_ihQuC294PRN-{ot4#!F|LDMk%E*=vprbx5I+P0qBx&6dF*8oWi*Pcr(S zugek8o=bq<;9$jpKa0e<+>^2y%vm(gKEYX5VZ2d3SQ8RzDxS!1H_|?Y4F>TJ{j>1m zecyW;2jzK2$&O}ciHEUetQJTy)HQ=Uvr&V3hOu8~Zg>)h@ij6GB7+>DBEq{g%Ns0*P{``BBX4w` zBthHGB5to;V{WllT^SjsANRES1kr<8llo30h95xEI11~n6ckLxQBrOEU74_#$J$DCqIXx*XYWs z?7#|E+Z6Obc-n3V^NEexzFVSWLP@g%?5|5H?fS>gUikSUN;K`qI-$5K|6b?-|2v&? ztrOrYIo(_2;Z*~(3iQ|es?7`5dMK~s7$L}Wej@araeJ#|Lr#xe;7-D5s<|0< z1sNKTMr9Txzc%~T76FSm)GblT>rRYVO!ET)BCj0(74LuXiXH@KHrY7rr9qkM5nd9 z^B`WU5%+Q*;Y@Fv^|g%pd8D#oA>%U`x8@TxbCoq%Jn1izm!ZmMj!euzSN{?GN~sLE zT^A2g2mG^G;)h$Q^sU-TT7Dhis$#~3>{l^Ux|j&cg;gB>JNafr>~8w-%TwpYvUM!X zzO18^uv2~(mH+yo`=mc#P;AMOq3gLgoOycA(wWY5^0i0S^Jdgl^r2!GSJaNR=c);c z7FoHsBBn7+&F^(WH(EbZ0-9AocPwd{u46XpQ_G?{O5III{v7Bbw@ZumjwHu03?ame z{@tdL>kJlaen?T3v~xD6K1T>}xXj3)AtTDnEt}k@+r0Npe}h7G zh$O<>jkZb!4C+ex7Sj-&-qX{bEv`<_V)M))zmRLnwDdF5n343}lah9|hL?xe2y;9r zo=s+`QQ&aP$R=#x6D<{$y;3C*h`Go6K3m+u)Gix*T#v7d&RO5GAyk-G3$9&{C|xeV z^7(|fmP~=)$HiV`o*4bA86lHsdt8rgJn|%a6(SZ>b>jy;dyJxQ+L)!b-N@2HsldGR z%HoCPs+(KTmeoc}##KliQ)ssuI?etXLO-ktfdjZj2=;cE4$QpU)v7R!Pyw0Etl-v% zF`wA0t9_12(Rg%OfO3kEfs}puQOgRX|L~xKUF}a=`*^`|kA+~^=n%1Iib+JDGBOKJRjgV(qAuC|xud2qGy{8&2Wg&lfiL~Gc`V`a_Kd-R0c4Jn(QLpky>(d2!8 z4{&+-q>s0RJ>?;h!gO=5$kE45C8=ev>2E_)z}eg^Bg02TX~}u1isPV@`MLP!%JZhIA^0#CjqgX*rD9;ifJjVJ1T($jpN!k24x{p6(q z*z`QRk+9IHD&Gi_nBSr*>PAbm4p6hW>d%XJ!657CM8EWFbLS@AR8-tqY;^3u)P1Q- z9)TTxU4?(r4aReN>Z;m>$UV0MWZzGQ23x3Vq}|x}GT9DO@%_C!ZEAlW%MVCyAIX)TwEFt=uYHE8dO&=)f}!*|77%e{AY?{xXYng@wDvL zP_8F7xz0|`ev=gm@@nyeDthJuBhx}4X{J%eD|!-VB`B|AUfeuuQ4zGzQ4OvFahp-&$dfdAZGPP{kp19^NO8~PJx2l^8}%)lxy9(SC%`_0nLPD?U$@S zCjJ1Cj^7{2?D}dJz%Q$PTCye2nrkwL9fENXb$DL_^uyalsGh7K2Rua_F2>FL3ODS1 zIfJ;}(!Pb5EWd(iuMD$sY4o+yH7k?FR?)o@53-YecKq0*)Y1L86mER+lufjU@=GbIc2V3+6Q9!~UL_q(<|y)p9~@Kaq!^Jo@sq#h z&O50*KA4t@Oa{G8XY*Y5f;m7aro8+rEDiwtqRTB&Vt+-L64qRE+e>5Bvb_t-Pc}yC zh*Ynj$f&7}1O9oy{P}giJ3DDFdVR_~i~CAus5jT`pN=O-);_nf7~;KsO4?N|nH2VE zR#J1m+5y$4g zexN}i|-`mBAJi7)m$=0|zt#z{cR@x{m!Q&6F*5Xr8U3dH&w-4=9eEvuUS z6$b6&;0-BW;k1)1VNZBws*{=eG!&Y-6D?%kkAQ4tVPkP>nPq!;O(9HmJY5JE=? zz4u;4x)A9dq<4^#gdU3YPAC%Eq4yRk0RsMW-??|@zW+JzoqNCEFMH2g`^%m^zrEIa zp7pGi?ZDB+EA@?{F^suYIFyOKMbV~?SluH%EvX{Tvy5wV0o%0{OtfgpM9?Y&nFP=Q zhM6l?inzVWCjVHLq0F~25pKuzYc-1Q6@t}&N`{zETvrwqQVxA*vl2u#a@=R7Z@t>ej@;;>2y~;+ZAsmTX*zWC$qKs& zQ{4RX2BLBHG5a7Ejn|hUg-)!A3TxM#oIo3zY|px*4;F{}bFG_{d+HL80oUKlImVkb;@(=Qu-qoMpYxmBSixKI23Z zHJCZ%jG6Hbydgf+soJ}0V*I*&T+yQQs>^%XU>wiA(lt!;av*!3C;h;S+*?o%_VvT9 z7R%!o1ynqn#szDQ3pt)aT(x`@dp8V}#kPqSHhP$9*~~hR$4KAE+mv8Y{7@=23on@z z^w44#yDQKuepMKRMXxKv_Y*yy9(;pb=r{D$T?*&pvjSw^zxWXs?eJdCN zrT*fhR5l9*dQ)8I4i9bO%(BIdn`G%h zSA_9NPHd6+U!+!#*oKpmgH@Qa!$mGj%soea<1Z`(hw zEK_3`gsHg9M9sduw^O*;TlB1xg~rNkQVsZDWG?xolxSs-+5n?hw_1nt!fVM)G}D5A zso_&86MbTvy=|w5BraT6?@Zk&nyft%b2>Ay{ZzhmL9zq1QJrjDObiItT^Qy*dBfCS z;9n-|nI!)c%sBtixBb~QV|h)Qpl{(;iIQIAj=4@?-Uo|RCnSS{!FCa_D$94=@Uf~7 zD`EQA_=)W3DUQ8s-jDVTvxnX>tk{bKJ*ZDYisF9vy}PbVzJn@hsGa)3~@d@#eZhZG-NG3&B$ z?pb)p*0jF|I6=-*vEeQIPZO$crh!e^V3KU@M)94G+93?KR-GwR(L;9#TU)(cK5F{3iH{z zIyi=^uJ^I4ROcbpuY;+o472oX1w^|_{F4Jo&XvB=O+`u1z3&gU+o(y_Rx{vh$jmMO zFH4q5IXNvWCZyvwZqx?oD8DvJWpqK`$J4lHmf9)@_11v*x~6CUM3?=`ch7&{MN(VF zd0Pj?M7|5DvEy6rHTF7_CFSjCF1ALAeW6nnl5OM>R_f&-5NA#@$GI0xKX*>?@YNBI zIR>)sut|{-%t41ozHf3*!{ucB`Bn1kUdfBJ(}~f|&J`(su%KY`(+l$3QPykCpBq}u z3ahk9?O1aZK{UNq9IlQJH#qt3qHmH+h!0&4%dLdh_Sq-{JB=cun!<&3y%fe#0!(QJPnOy&v(rtB+~iXlOQ1O5{D~USs@?FCjr`*Uiyia zBepZ<=3r-$x5D7UX>t)xmB@|1^%L!Wzt|XD|rZ`EsRH)$ zlg|GvCTgtx7QQ&Q&NcI0wMcu+y`!}O;JvqehSQ2nhx-0?#3QN-pJKbCP;shvF)ww1 z6=R@X-LtDeTZO}t*}`8LIwq(y@~>Xin{@yOa9Y(ZDJ&l3)!Ye?`@Q=8^N82#rVIEB z-E5|&Fn0Y3o}GtGUhlnYU|}4Y%O&&NFFU9Sqqc1i+oMP1zs{S+gtExaBd)RzUx?pB z_I29LYXIT(TUDG*IeE+8>RpF!n2WjvpPdQcK-Nc%P`8Cvlt`{Uy7csHnFME13ojbJ zNx3NDnDz1&)#&KB`?hzQC*+@z`2R%bq3Kt!ycWabl%miF_x19mb)`h&46cKzMb^ES zE21&i?zgPz1i9&8vEmu!?XpwnEpya~SqsDaO>uq$_yQg3!dRI+*?}&gI3;FySco=9 z85I@*94lRk6e%%XbZ2i_KNv>s7tUk1PlZV(?%P*}_#*uO5-a=_1Cy~g`6HO5oz1VR z-Pj3TveVaefiF1Ta5+)4&vC=^?Ol#-A|Ab*{tYPOq^eo3R=u)c z=A?U(H(1fDnfc>wwbmli<(pk{WQ{fEfT|#A8*}UnAZ)~mQC5iWKaL8-{02P0NP9)x zCYs8W*0X|Gg&`ZwG+F2k_^F=uIMqbK7^grA%dh9DNt>!N>d-&@xN+^Lj(4zNK9jJWCuNMv07$&2xk3RlF>|#tM70tgzk z)bQV5og9{ZFSHJIBO$DgBh`1B7F03ADmVOBmoeEvQI%WqIsr9jnv(*Ln%s`8_bjGm zkR198r^D{JE|-1ggqK9lRQ$)mY!Pbt|d zQf*G%IY1aN_Ej*XYtHS!$^aJEp8+6wg#PEo<^S?BMDd}Z*b7|7m1+9@Z;}dmrtSVJ zQD4%zVvClPcL(xrSfX&ktz!c&VEN^;cQ^I7V}(KC(vD4h>L9`031AR`YfTFbATTT& zK3g2#My5Uc(9DkjZ|1@aLBdM&%sLh~caIyY64ipop`mgiH>n~Zk0@6kpuT}hYR*wnRw90C zqHGE@ApB11jlNG2GInT}2Pql>86b&G@se<_RZwM4i1vctBvULP8Ks00fTO~@fW?p= zW=%Q@@h}_yyKT?^%KTm(tFD6EgNqgKY%26|SzAoRy0dnzNdg#gUL1+#~_QKqTvcV^-22E8OmoKdXPQ*F9ghF^; zsuqgb3XgURriZXJYYxkAL?zp}zVk!0SP&_+Rra2%yzC!@hGtEux~xQe*UaR5t`qeQ zopl~QJt0NxA>LfG8&_Tc0%pP5CzSs5cANfj{OY=-%+ zSC5geSq6Drxr*h!dJd!Y;x#(<`Wz|)PL|nknd;M*&)agdgm1a=ei%R1tu1i+78u=I zIPZXWfqx3tl}q^7s=WVvA&&z@OMVLNK>bJ!Zm@!qY7r0p7#{Zdr5nIE052$Vm)dK_ zF7x&DL~!#%${Gl(6;Rj<)Cw$NECtb>*gPYSC$RAm>)d_{8T6^O*nZvld-3o3J8$fh zb3_>HK_i$=mtrfIDWTl1zPrJMzLHF2E%W4M4XEbssewR(173^A-Gx(c_hpPr-7>q| zuxFldbsP5=k)?CgGYfblGX5ug(wV+o=i0t3N%ywE$?|?EwOYc{0mb}P>j_NpyFOs`lS*ZjBQBgU59q` z>g>cj`mx0j74Nt?YnruR*}7V?9Bi7Wu*F5UA|UT<;1BOndtCE4`pIWD79hn+TiY?? zN}J2=dEG=M@r0#A$@CXQ^C_jLCh?6$~kxB*l-Z?=nc7r1AV zvwRra6&$tW$EDWiLj><720c>Ar!(xM7EK8?4;lO+Bfzvp=BEdYBb& z<*wAF+4+n3ts(6mDdJeHeM2?YhSG#;$W8w0ncx3F(E;@Y)oAR2Da<3mZ!!sj04LH# zr7{g=txetS2YHywD!@G(#)@>P2Pd)gz!5La=%WU1>$rqf=#ao?mpg}jHz~_>G5+=C z-3DNev^CFEyA}0Eikz5BeM7}V>ipGXn{bgsW53j~cH@FlKl0D5%rp1JCT@rFN?o1e zx37%TsR&{xdd8@1gac9jr`K2i#Qy)AFH~l2jMU7#zFSXUTI1&qo%0|u-#ai##MbPB zK6=Y}XSiFm>IR(D45GnO<8D>mwDuMPp@*QbR;PkpUtxH9k)5b^ax`|1mUw&R@WDF6 ziq9iW9It5AA;_p&#qf*vSags>ThP(et)cy~O^q`TZ`87ga$qW=IVDctYk|*PG`_{( zjo8n(QO6N|P{pU4-Te^z(PqAt@1K!zcf#t z>17BU5{w<4(f5snZbpCm%j00DE0cD|Fg<3mA%WOHw^GT7{|~_ksZ1ZhxX8IC%zEeS zmpplLV}nTSH}VNR+EbYGCq2Uex*LT8b?|AKtrZ&RCpjzh?Es_jM?(GXJq2k^p6)E- zKOU_uN+`K^R))o2*7cKg{GEbTN6xADGD<)PEq&i4V4*gAFV-YO0(|sl@-q5tD+45k z-F$OIH*W`BG_CB-T^40S8W|pLOMhr}uXcK%+}b;HAs#_w5<^-eUg;pOGF?chFJki7 zH&PmeR&qTsYj~3-_5C$$p!y!@!q?=pKdsu>M&M9A3}{0e>5SdVRNFLeT0xQY-K8~p zv9_W+4L{$nR8xa=8j*LX zQL}UmfR>-{pQ`ezqH9s%G3tF}UiZeRwsKwlDrbHJG}kQ${1cm!;|o!DU(RL7%e>4! z$oC|?OjQ<(P}f0^sV8S3%$j#( zxZ+=UTQlY5bs-qFs@n)z|9tWD(;(iul-oqJn~v;Ci&;kp^`<9^5*04aHoJi5iuFoM zHsw-8f9G&+Fd1(osUZYg`zO6LTqI#bHy`lOc?N@Q&cXfS*P^|&v*bNN=)+1DI z%YZ@wYC&CVJGsW|Yt5uCz6a^dp|gGx8j|=)+rX%)(^%|}J~YmhuF~x{;B6K}v-jdr zO`@BK*k`iYDLl-AVe z;EcVGI8#hkJc9gWn-l_L&QE#B=ii4@aFQsx}Mv7_+p zn|w23BH7j>xQ;T~NG&C3*43d6qcIRxvI0!|Iq?@@UG6ymELUbf(I#8lntC{y$MyN^ z47^sMpNt#%w8${g%;x#ULh1T_nLi8KwH6lvI+wOV8RF?-T$foh2C9u0K<&)5*mw%= zqscaxxWWF3l48k)@nyP-6LOTyRY?+VbfV2~LBg=>LTD<$E-KfJf1!_p7BASO$sm&% zWZlkitb9)I&Ft%!)aWF#TOLocjz@opsJCTL)wyYF5Gq`P5X!7 z6+JqGDLs|$|0D=MZUJ#X$64&bH zn)k(9{46QLBFoA?^;c{^Y|MuTsTDQvb}T$#o(>ZhKaYmMi+T=gZSX5TGY}+C?(!|+sPC}o&%K*Pj1DNVwanon?Ou{XKHy)iP{+8zt@IjlT)f z)W+2k1^|_2V*Olwm+H1N4DO5h^(im_48TP}4I*S>m~c2%5mnxOm9JxuKfPNR6>fCD zE(=T=_8d!G@~2zvmsBsL38f>mSG~(Zf@j5#{02;&pKmv|E>QPKT&2vA$fdXeb(ABe zW`#$?OpAxyp`}2c12eci#6d^p3%AhQ;(x;!IqfO zcHHTNRx?uM(P0AA!Q39>E8*dCGh_F7xUUJ4Vl_vnuP>+7RDaE(DS6n znez++?X*uU?`qj97F{fYQY!UlqUnJ0O+~Y7cnBeptz)IN2#L(SnK#rTeZ+0eE$Z;? z{#uW|dt^QYfrKw8Lh_-3a{f*7gic#s8>P%~)|2~*FHm)f#;)S6+`=zt+4G*H zW3KgQqKRJohd$YV4`)DgbXM{gkVFXHmphLF5vIK}k@CUAb(_o`|LIDG|zAaua5 zIZ&$R7Vjh8jiT%3Q^S}Wf7Ktig~R68{n^aB4Q6{M8r7OSBqH=8Rw2ru)Uv;}U+rxE z;1RfR(zw)LaSfz}whG(;>FIH2?oBkW%^mpO^$`apuU+#v;F@fvD|m|Oe;(k4!xVKp z7HjN$&!m(-Ywgr-w67?6rzz*?-px=Yo2tAK_P=_c`Mge0lz-7{V7KT>VV83(LJZNT z>fq_c;SwDHs?a_BQ1pY(U|~~9onE{HWWmwH1>{BaK_iEO+cy3u-N~mk^ zOmDaU;Wwn5&x$qTFsp)U(7N%vvY=a(TgLGZdL4LT&UJe9&j%0OYe?2Qh_QX3M@p8z zcp|9P!=C`GWjz~b!sG7zUlh*&UPZ5E0H^QyxG)91rxN5CDeSndd|2#R5Wj-=f^~r9 zU{wA^Wh2K~y!X?P-q* zHFsKGl1?D#fnUIO`9WV1Dw+zC=zEolUZJlhr8Id|hQm|dYM)?f3h=|s$AQW)1#%<_ zUq=sU!;0|33aDRxMRh)H#r8cHKR!_?FOXbFyfy!dBN}~XdXo%ao0NLnw2lcFvI#rI zxwr%lZWkwyIv}sSf>#|JzXV3RXlo@;?kqj+{Y(;z%1?URG{Ak%0)stx)_KMLvdJXw z!B0_Z`KSLc!S8>Miy)FW`=3NQ`aFoBZ;AhwJ=!6mrO4)|ym<-j$!-r?UHJ`QQ>V1L zOS9eBZ2X)MTEX9(Dn{q-wZa=Yj^B+47 O|Np^%`U1qi=l%;4D{gN9 diff --git a/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts b/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts index 1969ff0b9bb5..8ad6f4eee6d8 100644 --- a/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts @@ -1,18 +1,18 @@ import { expect, test } from "@jest/globals"; -import { ChatMinimax } from "../minimax.js"; import { ChatMessage, HumanMessage, - LLMResult, SystemMessage, -} from "../../schema/index.js"; -import { CallbackManager } from "../../callbacks/index.js"; +} from "@langchain/core/messages"; +import { LLMResult } from "@langchain/core/outputs"; +import { CallbackManager } from "@langchain/core/callbacks/manager"; import { ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate, SystemMessagePromptTemplate, -} from "../../prompts/index.js"; +} from "@langchain/core/prompts"; +import { ChatMinimax } from "../minimax.js"; test.skip("Test ChatMinimax", async () => { const chat = new ChatMinimax({ diff --git a/libs/langchain-community/src/chat_models/yandex.ts b/libs/langchain-community/src/chat_models/yandex.ts index df1b164ae049..459bd0d6dd4e 100644 --- a/libs/langchain-community/src/chat_models/yandex.ts +++ b/libs/langchain-community/src/chat_models/yandex.ts @@ -3,6 +3,7 @@ import { AIMessage, BaseMessage } from "@langchain/core/messages"; +import { ChatResult, ChatGeneration } from "@langchain/core/outputs"; import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; diff --git a/libs/langchain-community/src/embeddings/bedrock.ts b/libs/langchain-community/src/embeddings/bedrock.ts new file mode 100644 index 000000000000..7fedd2f9a8f4 --- /dev/null +++ b/libs/langchain-community/src/embeddings/bedrock.ts @@ -0,0 +1,142 @@ +import { + BedrockRuntimeClient, + InvokeModelCommand, +} from "@aws-sdk/client-bedrock-runtime"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import type { CredentialType } from "../util/bedrock.js"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the BedrockEmbeddings class. + */ +export interface BedrockEmbeddingsParams extends EmbeddingsParams { + /** + * Model Name to use. Defaults to `amazon.titan-embed-text-v1` if not provided + * + */ + model?: string; + + /** + * A client provided by the user that allows them to customze any + * SDK configuration options. + */ + client?: BedrockRuntimeClient; + + region?: string; + + credentials?: CredentialType; +} + +/** + * Class that extends the Embeddings class and provides methods for + * generating embeddings using the Bedrock API. + * @example + * ```typescript + * const embeddings = new BedrockEmbeddings({ + * region: "your-aws-region", + * credentials: { + * accessKeyId: "your-access-key-id", + * secretAccessKey: "your-secret-access-key", + * }, + * model: "amazon.titan-embed-text-v1", + * }); + * + * // Embed a query and log the result + * const res = await embeddings.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * ``` + */ +export class BedrockEmbeddings + extends Embeddings + implements BedrockEmbeddingsParams +{ + model: string; + + client: BedrockRuntimeClient; + + batchSize = 512; + + constructor(fields?: BedrockEmbeddingsParams) { + super(fields ?? {}); + + this.model = fields?.model ?? "amazon.titan-embed-text-v1"; + + this.client = + fields?.client ?? + new BedrockRuntimeClient({ + region: fields?.region, + credentials: fields?.credentials, + }); + } + + /** + * Protected method to make a request to the Bedrock API to generate + * embeddings. Handles the retry logic and returns the response from the + * API. + * @param request Request to send to the Bedrock API. + * @returns Promise that resolves to the response from the API. + */ + protected async _embedText(text: string): Promise { + return this.caller.call(async () => { + try { + // replace newlines, which can negatively affect performance. + const cleanedText = text.replace(/\n/g, " "); + + const res = await this.client.send( + new InvokeModelCommand({ + modelId: this.model, + body: JSON.stringify({ + inputText: cleanedText, + }), + contentType: "application/json", + accept: "application/json", + }) + ); + + const body = new TextDecoder().decode(res.body); + return JSON.parse(body).embedding; + } catch (e) { + console.error({ + error: e, + }); + // eslint-disable-next-line no-instanceof/no-instanceof + if (e instanceof Error) { + throw new Error( + `An error occurred while embedding documents with Bedrock: ${e.message}` + ); + } + + throw new Error( + "An error occurred while embedding documents with Bedrock" + ); + } + }); + } + + /** + * Method that takes a document as input and returns a promise that + * resolves to an embedding for the document. It calls the _embedText + * method with the document as the input. + * @param document Document for which to generate an embedding. + * @returns Promise that resolves to an embedding for the input document. + */ + embedQuery(document: string): Promise { + return this.caller.callWithOptions( + {}, + this._embedText.bind(this), + document + ); + } + + /** + * Method to generate embeddings for an array of texts. Calls _embedText + * method which batches and handles retry logic when calling the AWS Bedrock API. + * @param documents Array of texts for which to generate embeddings. + * @returns Promise that resolves to a 2D array of embeddings for each input document. + */ + async embedDocuments(documents: string[]): Promise { + return Promise.all(documents.map((document) => this._embedText(document))); + } +} diff --git a/libs/langchain-community/src/embeddings/cloudflare_workersai.ts b/libs/langchain-community/src/embeddings/cloudflare_workersai.ts new file mode 100644 index 000000000000..a6501e47b5af --- /dev/null +++ b/libs/langchain-community/src/embeddings/cloudflare_workersai.ts @@ -0,0 +1,94 @@ +import { Ai } from "@cloudflare/ai"; +import { Fetcher } from "@cloudflare/workers-types"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; + +type AiTextEmbeddingsInput = { + text: string | string[]; +}; + +type AiTextEmbeddingsOutput = { + shape: number[]; + data: number[][]; +}; + +export interface CloudflareWorkersAIEmbeddingsParams extends EmbeddingsParams { + /** Binding */ + binding: Fetcher; + + /** Model name to use */ + modelName?: string; + + /** + * The maximum number of documents to embed in a single request. + */ + batchSize?: number; + + /** + * Whether to strip new lines from the input text. This is recommended by + * OpenAI, but may not be suitable for all use cases. + */ + stripNewLines?: boolean; +} + +export class CloudflareWorkersAIEmbeddings extends Embeddings { + modelName = "@cf/baai/bge-base-en-v1.5"; + + batchSize = 50; + + stripNewLines = true; + + ai: Ai; + + constructor(fields: CloudflareWorkersAIEmbeddingsParams) { + super(fields); + + if (!fields.binding) { + throw new Error( + "Must supply a Workers AI binding, eg { binding: env.AI }" + ); + } + this.ai = new Ai(fields.binding); + this.modelName = fields.modelName ?? this.modelName; + this.stripNewLines = fields.stripNewLines ?? this.stripNewLines; + } + + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray( + this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => this.runEmbedding(batch)); + const batchResponses = await Promise.all(batchRequests); + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batchResponse = batchResponses[i]; + for (let j = 0; j < batchResponse.length; j += 1) { + embeddings.push(batchResponse[j]); + } + } + + return embeddings; + } + + async embedQuery(text: string): Promise { + const data = await this.runEmbedding([ + this.stripNewLines ? text.replace(/\n/g, " ") : text, + ]); + return data[0]; + } + + private async runEmbedding(texts: string[]) { + return this.caller.call(async () => { + const response: AiTextEmbeddingsOutput = await this.ai.run( + this.modelName, + { + text: texts, + } as AiTextEmbeddingsInput + ); + return response.data; + }); + } +} diff --git a/libs/langchain-community/src/embeddings/cohere.ts b/libs/langchain-community/src/embeddings/cohere.ts new file mode 100644 index 000000000000..7d6f102ae7ce --- /dev/null +++ b/libs/langchain-community/src/embeddings/cohere.ts @@ -0,0 +1,155 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the CohereEmbeddings class. + */ +export interface CohereEmbeddingsParams extends EmbeddingsParams { + modelName: string; + + /** + * The maximum number of documents to embed in a single request. This is + * limited by the Cohere API to a maximum of 96. + */ + batchSize?: number; +} + +/** + * A class for generating embeddings using the Cohere API. + * @example + * ```typescript + * // Embed a query using the CohereEmbeddings class + * const model = new ChatOpenAI(); + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?", + * ); + * console.log({ res }); + * + * ``` + */ +export class CohereEmbeddings + extends Embeddings + implements CohereEmbeddingsParams +{ + modelName = "small"; + + batchSize = 48; + + private apiKey: string; + + private client: typeof import("cohere-ai"); + + /** + * Constructor for the CohereEmbeddings class. + * @param fields - An optional object with properties to configure the instance. + */ + constructor( + fields?: Partial & { + verbose?: boolean; + apiKey?: string; + } + ) { + const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; + + super(fieldsWithDefaults); + + const apiKey = + fieldsWithDefaults?.apiKey || getEnvironmentVariable("COHERE_API_KEY"); + + if (!apiKey) { + throw new Error("Cohere API key not found"); + } + + this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; + this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; + this.apiKey = apiKey; + } + + /** + * Generates embeddings for an array of texts. + * @param texts - An array of strings to generate embeddings for. + * @returns A Promise that resolves to an array of embeddings. + */ + async embedDocuments(texts: string[]): Promise { + await this.maybeInitClient(); + + const batches = chunkArray(texts, this.batchSize); + + const batchRequests = batches.map((batch) => + this.embeddingWithRetry({ + model: this.modelName, + texts: batch, + }) + ); + + const batchResponses = await Promise.all(batchRequests); + + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batch = batches[i]; + const { body: batchResponse } = batchResponses[i]; + for (let j = 0; j < batch.length; j += 1) { + embeddings.push(batchResponse.embeddings[j]); + } + } + + return embeddings; + } + + /** + * Generates an embedding for a single text. + * @param text - A string to generate an embedding for. + * @returns A Promise that resolves to an array of numbers representing the embedding. + */ + async embedQuery(text: string): Promise { + await this.maybeInitClient(); + + const { body } = await this.embeddingWithRetry({ + model: this.modelName, + texts: [text], + }); + return body.embeddings[0]; + } + + /** + * Generates embeddings with retry capabilities. + * @param request - An object containing the request parameters for generating embeddings. + * @returns A Promise that resolves to the API response. + */ + private async embeddingWithRetry( + request: Parameters[0] + ) { + await this.maybeInitClient(); + + return this.caller.call(this.client.embed.bind(this.client), request); + } + + /** + * Initializes the Cohere client if it hasn't been initialized already. + */ + private async maybeInitClient() { + if (!this.client) { + const { cohere } = await CohereEmbeddings.imports(); + + this.client = cohere; + this.client.init(this.apiKey); + } + } + + /** @ignore */ + static async imports(): Promise<{ + cohere: typeof import("cohere-ai"); + }> { + try { + const { default: cohere } = await import("cohere-ai"); + return { cohere }; + } catch (e) { + throw new Error( + "Please install cohere-ai as a dependency with, e.g. `yarn add cohere-ai`" + ); + } + } +} diff --git a/libs/langchain-community/src/embeddings/googlepalm.ts b/libs/langchain-community/src/embeddings/googlepalm.ts new file mode 100644 index 000000000000..93ed0743e601 --- /dev/null +++ b/libs/langchain-community/src/embeddings/googlepalm.ts @@ -0,0 +1,107 @@ +import { TextServiceClient } from "@google-ai/generativelanguage"; +import { GoogleAuth } from "google-auth-library"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the GooglePaLMEmbeddings class. + */ +export interface GooglePaLMEmbeddingsParams extends EmbeddingsParams { + /** + * Model Name to use + * + * Note: The format must follow the pattern - `models/{model}` + */ + modelName?: string; + /** + * Google Palm API key to use + */ + apiKey?: string; +} + +/** + * Class that extends the Embeddings class and provides methods for + * generating embeddings using the Google Palm API. + * @example + * ```typescript + * const model = new GooglePaLMEmbeddings({ + * apiKey: "", + * modelName: "models/embedding-gecko-001", + * }); + * + * // Embed a single query + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * + * // Embed multiple documents + * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); + * console.log({ documentRes }); + * ``` + */ +export class GooglePaLMEmbeddings + extends Embeddings + implements GooglePaLMEmbeddingsParams +{ + apiKey?: string; + + modelName = "models/embedding-gecko-001"; + + private client: TextServiceClient; + + constructor(fields?: GooglePaLMEmbeddingsParams) { + super(fields ?? {}); + + this.modelName = fields?.modelName ?? this.modelName; + + this.apiKey = + fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); + if (!this.apiKey) { + throw new Error( + "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" + ); + } + + this.client = new TextServiceClient({ + authClient: new GoogleAuth().fromAPIKey(this.apiKey), + }); + } + + protected async _embedText(text: string): Promise { + // replace newlines, which can negatively affect performance. + const cleanedText = text.replace(/\n/g, " "); + const res = await this.client.embedText({ + model: this.modelName, + text: cleanedText, + }); + return res[0].embedding?.value ?? []; + } + + /** + * Method that takes a document as input and returns a promise that + * resolves to an embedding for the document. It calls the _embedText + * method with the document as the input. + * @param document Document for which to generate an embedding. + * @returns Promise that resolves to an embedding for the input document. + */ + embedQuery(document: string): Promise { + return this.caller.callWithOptions( + {}, + this._embedText.bind(this), + document + ); + } + + /** + * Method that takes an array of documents as input and returns a promise + * that resolves to a 2D array of embeddings for each document. It calls + * the _embedText method for each document in the array. + * @param documents Array of documents for which to generate embeddings. + * @returns Promise that resolves to a 2D array of embeddings for each input document. + */ + embedDocuments(documents: string[]): Promise { + return Promise.all(documents.map((document) => this._embedText(document))); + } +} diff --git a/libs/langchain-community/src/embeddings/googlevertexai.ts b/libs/langchain-community/src/embeddings/googlevertexai.ts new file mode 100644 index 000000000000..9e5aa25e741c --- /dev/null +++ b/libs/langchain-community/src/embeddings/googlevertexai.ts @@ -0,0 +1,145 @@ +import { GoogleAuth, GoogleAuthOptions } from "google-auth-library"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { AsyncCallerCallOptions } from "@langchain/core/utils/async_caller"; +import { + GoogleVertexAIBasePrediction, + GoogleVertexAIBaseLLMInput, + GoogleVertexAILLMPredictions, +} from "../types/googlevertexai-types.js"; +import { GoogleVertexAILLMConnection } from "../util/googlevertexai-connection.js"; +import { chunkArray } from "../util/chunk.js"; + +/** + * Defines the parameters required to initialize a + * GoogleVertexAIEmbeddings instance. It extends EmbeddingsParams and + * GoogleVertexAIConnectionParams. + */ +export interface GoogleVertexAIEmbeddingsParams + extends EmbeddingsParams, + GoogleVertexAIBaseLLMInput {} + +/** + * Defines additional options specific to the + * GoogleVertexAILLMEmbeddingsInstance. It extends AsyncCallerCallOptions. + */ +interface GoogleVertexAILLMEmbeddingsOptions extends AsyncCallerCallOptions {} + +/** + * Represents an instance for generating embeddings using the Google + * Vertex AI API. It contains the content to be embedded. + */ +interface GoogleVertexAILLMEmbeddingsInstance { + content: string; +} + +/** + * Defines the structure of the embeddings results returned by the Google + * Vertex AI API. It extends GoogleVertexAIBasePrediction and contains the + * embeddings and their statistics. + */ +interface GoogleVertexEmbeddingsResults extends GoogleVertexAIBasePrediction { + embeddings: { + statistics: { + token_count: number; + truncated: boolean; + }; + values: number[]; + }; +} + +/** + * Enables calls to the Google Cloud's Vertex AI API to access + * the embeddings generated by Large Language Models. + * + * To use, you will need to have one of the following authentication + * methods in place: + * - You are logged into an account permitted to the Google Cloud project + * using Vertex AI. + * - You are running this on a machine using a service account permitted to + * the Google Cloud project using Vertex AI. + * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the + * path of a credentials file for a service account permitted to the + * Google Cloud project using Vertex AI. + * @example + * ```typescript + * const model = new GoogleVertexAIEmbeddings(); + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * ``` + */ +export class GoogleVertexAIEmbeddings + extends Embeddings + implements GoogleVertexAIEmbeddingsParams +{ + model = "textembedding-gecko"; + + private connection: GoogleVertexAILLMConnection< + GoogleVertexAILLMEmbeddingsOptions, + GoogleVertexAILLMEmbeddingsInstance, + GoogleVertexEmbeddingsResults, + GoogleAuthOptions + >; + + constructor(fields?: GoogleVertexAIEmbeddingsParams) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + + this.connection = new GoogleVertexAILLMConnection( + { ...fields, ...this }, + this.caller, + new GoogleAuth({ + scopes: "https://www.googleapis.com/auth/cloud-platform", + ...fields?.authOptions, + }) + ); + } + + /** + * Takes an array of documents as input and returns a promise that + * resolves to a 2D array of embeddings for each document. It splits the + * documents into chunks and makes requests to the Google Vertex AI API to + * generate embeddings. + * @param documents An array of documents to be embedded. + * @returns A promise that resolves to a 2D array of embeddings for each document. + */ + async embedDocuments(documents: string[]): Promise { + const instanceChunks: GoogleVertexAILLMEmbeddingsInstance[][] = chunkArray( + documents.map((document) => ({ + content: document, + })), + 5 + ); // Vertex AI accepts max 5 instances per prediction + const parameters = {}; + const options = {}; + const responses = await Promise.all( + instanceChunks.map((instances) => + this.connection.request(instances, parameters, options) + ) + ); + const result: number[][] = + responses + ?.map( + (response) => + ( + response?.data as GoogleVertexAILLMPredictions + )?.predictions?.map((result) => result.embeddings.values) ?? [] + ) + .flat() ?? []; + return result; + } + + /** + * Takes a document as input and returns a promise that resolves to an + * embedding for the document. It calls the embedDocuments method with the + * document as the input. + * @param document A document to be embedded. + * @returns A promise that resolves to an embedding for the document. + */ + async embedQuery(document: string): Promise { + const data = await this.embedDocuments([document]); + return data[0]; + } +} diff --git a/libs/langchain-community/src/embeddings/gradient_ai.ts b/libs/langchain-community/src/embeddings/gradient_ai.ts new file mode 100644 index 000000000000..0dbc3e932e70 --- /dev/null +++ b/libs/langchain-community/src/embeddings/gradient_ai.ts @@ -0,0 +1,118 @@ +import { Gradient } from "@gradientai/nodejs-sdk"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; + +/** + * Interface for GradientEmbeddings parameters. Extends EmbeddingsParams and + * defines additional parameters specific to the GradientEmbeddings class. + */ +export interface GradientEmbeddingsParams extends EmbeddingsParams { + /** + * Gradient AI Access Token. + * Provide Access Token if you do not wish to automatically pull from env. + */ + gradientAccessKey?: string; + /** + * Gradient Workspace Id. + * Provide workspace id if you do not wish to automatically pull from env. + */ + workspaceId?: string; +} + +/** + * Class for generating embeddings using the Gradient AI's API. Extends the + * Embeddings class and implements GradientEmbeddingsParams and + */ +export class GradientEmbeddings + extends Embeddings + implements GradientEmbeddingsParams +{ + gradientAccessKey?: string; + + workspaceId?: string; + + batchSize = 128; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + model: any; + + constructor(fields: GradientEmbeddingsParams) { + super(fields); + + this.gradientAccessKey = + fields?.gradientAccessKey ?? + getEnvironmentVariable("GRADIENT_ACCESS_TOKEN"); + this.workspaceId = + fields?.workspaceId ?? getEnvironmentVariable("GRADIENT_WORKSPACE_ID"); + + if (!this.gradientAccessKey) { + throw new Error("Missing Gradient AI Access Token"); + } + + if (!this.workspaceId) { + throw new Error("Missing Gradient AI Workspace ID"); + } + } + + /** + * Method to generate embeddings for an array of documents. Splits the + * documents into batches and makes requests to the Gradient API to generate + * embeddings. + * @param texts Array of documents to generate embeddings for. + * @returns Promise that resolves to a 2D array of embeddings for each document. + */ + async embedDocuments(texts: string[]): Promise { + await this.setModel(); + + const mappedTexts = texts.map((text) => ({ input: text })); + + const batches = chunkArray(mappedTexts, this.batchSize); + + const batchRequests = batches.map((batch) => + this.caller.call(async () => + this.model.generateEmbeddings({ + inputs: batch, + }) + ) + ); + const batchResponses = await Promise.all(batchRequests); + + const embeddings: number[][] = []; + for (let i = 0; i < batchResponses.length; i += 1) { + const batch = batches[i]; + const { embeddings: batchResponse } = batchResponses[i]; + for (let j = 0; j < batch.length; j += 1) { + embeddings.push(batchResponse[j].embedding); + } + } + return embeddings; + } + + /** + * Method to generate an embedding for a single document. Calls the + * embedDocuments method with the document as the input. + * @param text Document to generate an embedding for. + * @returns Promise that resolves to an embedding for the document. + */ + async embedQuery(text: string): Promise { + const data = await this.embedDocuments([text]); + return data[0]; + } + + /** + * Method to set the model to use for generating embeddings. + * @sets the class' `model` value to that of the retrieved Embeddings Model. + */ + async setModel() { + if (this.model) return; + + const gradient = new Gradient({ + accessToken: this.gradientAccessKey, + workspaceId: this.workspaceId, + }); + this.model = await gradient.getEmbeddingsModel({ + slug: "bge-large", + }); + } +} diff --git a/libs/langchain-community/src/embeddings/hf.ts b/libs/langchain-community/src/embeddings/hf.ts new file mode 100644 index 000000000000..cbe66a5b3a46 --- /dev/null +++ b/libs/langchain-community/src/embeddings/hf.ts @@ -0,0 +1,77 @@ +import { HfInference, HfInferenceEndpoint } from "@huggingface/inference"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the HuggingFaceInferenceEmbeddings class. + */ +export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams { + apiKey?: string; + model?: string; + endpointUrl?: string; +} + +/** + * Class that extends the Embeddings class and provides methods for + * generating embeddings using Hugging Face models through the + * HuggingFaceInference API. + */ +export class HuggingFaceInferenceEmbeddings + extends Embeddings + implements HuggingFaceInferenceEmbeddingsParams +{ + apiKey?: string; + + model: string; + + endpointUrl?: string; + + client: HfInference | HfInferenceEndpoint; + + constructor(fields?: HuggingFaceInferenceEmbeddingsParams) { + super(fields ?? {}); + + this.model = fields?.model ?? "BAAI/bge-base-en-v1.5"; + this.apiKey = + fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY"); + this.endpointUrl = fields?.endpointUrl; + this.client = this.endpointUrl + ? new HfInference(this.apiKey).endpoint(this.endpointUrl) + : new HfInference(this.apiKey); + } + + async _embed(texts: string[]): Promise { + // replace newlines, which can negatively affect performance. + const clean = texts.map((text) => text.replace(/\n/g, " ")); + return this.caller.call(() => + this.client.featureExtraction({ + model: this.model, + inputs: clean, + }) + ) as Promise; + } + + /** + * Method that takes a document as input and returns a promise that + * resolves to an embedding for the document. It calls the _embed method + * with the document as the input and returns the first embedding in the + * resulting array. + * @param document Document to generate an embedding for. + * @returns Promise that resolves to an embedding for the document. + */ + embedQuery(document: string): Promise { + return this._embed([document]).then((embeddings) => embeddings[0]); + } + + /** + * Method that takes an array of documents as input and returns a promise + * that resolves to a 2D array of embeddings for each document. It calls + * the _embed method with the documents as the input. + * @param documents Array of documents to generate embeddings for. + * @returns Promise that resolves to a 2D array of embeddings for each document. + */ + embedDocuments(documents: string[]): Promise { + return this._embed(documents); + } +} diff --git a/libs/langchain-community/src/embeddings/hf_transformers.ts b/libs/langchain-community/src/embeddings/hf_transformers.ts new file mode 100644 index 000000000000..92edeae1d4b0 --- /dev/null +++ b/libs/langchain-community/src/embeddings/hf_transformers.ts @@ -0,0 +1,105 @@ +import { Pipeline, pipeline } from "@xenova/transformers"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; + +export interface HuggingFaceTransformersEmbeddingsParams + extends EmbeddingsParams { + /** Model name to use */ + modelName: string; + + /** + * Timeout to use when making requests to OpenAI. + */ + timeout?: number; + + /** + * The maximum number of documents to embed in a single request. + */ + batchSize?: number; + + /** + * Whether to strip new lines from the input text. This is recommended by + * OpenAI, but may not be suitable for all use cases. + */ + stripNewLines?: boolean; +} + +/** + * @example + * ```typescript + * const model = new HuggingFaceTransformersEmbeddings({ + * modelName: "Xenova/all-MiniLM-L6-v2", + * }); + * + * // Embed a single query + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * + * // Embed multiple documents + * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); + * console.log({ documentRes }); + * ``` + */ +export class HuggingFaceTransformersEmbeddings + extends Embeddings + implements HuggingFaceTransformersEmbeddingsParams +{ + modelName = "Xenova/all-MiniLM-L6-v2"; + + batchSize = 512; + + stripNewLines = true; + + timeout?: number; + + private pipelinePromise: Promise; + + constructor(fields?: Partial) { + super(fields ?? {}); + + this.modelName = fields?.modelName ?? this.modelName; + this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; + this.timeout = fields?.timeout; + } + + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray( + this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => this.runEmbedding(batch)); + const batchResponses = await Promise.all(batchRequests); + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batchResponse = batchResponses[i]; + for (let j = 0; j < batchResponse.length; j += 1) { + embeddings.push(batchResponse[j]); + } + } + + return embeddings; + } + + async embedQuery(text: string): Promise { + const data = await this.runEmbedding([ + this.stripNewLines ? text.replace(/\n/g, " ") : text, + ]); + return data[0]; + } + + private async runEmbedding(texts: string[]) { + const pipe = await (this.pipelinePromise ??= pipeline( + "feature-extraction", + this.modelName + )); + + return this.caller.call(async () => { + const output = await pipe(texts, { pooling: "mean", normalize: true }); + return output.tolist(); + }); + } +} diff --git a/libs/langchain-community/src/embeddings/llama_cpp.ts b/libs/langchain-community/src/embeddings/llama_cpp.ts new file mode 100644 index 000000000000..266f4b610948 --- /dev/null +++ b/libs/langchain-community/src/embeddings/llama_cpp.ts @@ -0,0 +1,103 @@ +import { LlamaModel, LlamaContext } from "node-llama-cpp"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { + LlamaBaseCppInputs, + createLlamaModel, + createLlamaContext, +} from "../util/llama_cpp.js"; + +/** + * Note that the modelPath is the only required parameter. For testing you + * can set this in the environment variable `LLAMA_PATH`. + */ +export interface LlamaCppEmbeddingsParams + extends LlamaBaseCppInputs, + EmbeddingsParams {} + +/** + * @example + * ```typescript + * // Initialize LlamaCppEmbeddings with the path to the model file + * const embeddings = new LlamaCppEmbeddings({ + * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin", + * }); + * + * // Embed a query string using the Llama embeddings + * const res = embeddings.embedQuery("Hello Llama!"); + * + * // Output the resulting embeddings + * console.log(res); + * + * ``` + */ +export class LlamaCppEmbeddings extends Embeddings { + _model: LlamaModel; + + _context: LlamaContext; + + constructor(inputs: LlamaCppEmbeddingsParams) { + super(inputs); + const _inputs = inputs; + _inputs.embedding = true; + + this._model = createLlamaModel(_inputs); + this._context = createLlamaContext(this._model, _inputs); + } + + /** + * Generates embeddings for an array of texts. + * @param texts - An array of strings to generate embeddings for. + * @returns A Promise that resolves to an array of embeddings. + */ + async embedDocuments(texts: string[]): Promise { + const tokensArray = []; + + for (const text of texts) { + const encodings = await this.caller.call( + () => + new Promise((resolve) => { + resolve(this._context.encode(text)); + }) + ); + tokensArray.push(encodings); + } + + const embeddings: number[][] = []; + + for (const tokens of tokensArray) { + const embedArray: number[] = []; + + for (let i = 0; i < tokens.length; i += 1) { + const nToken: number = +tokens[i]; + embedArray.push(nToken); + } + + embeddings.push(embedArray); + } + + return embeddings; + } + + /** + * Generates an embedding for a single text. + * @param text - A string to generate an embedding for. + * @returns A Promise that resolves to an array of numbers representing the embedding. + */ + async embedQuery(text: string): Promise { + const tokens: number[] = []; + + const encodings = await this.caller.call( + () => + new Promise((resolve) => { + resolve(this._context.encode(text)); + }) + ); + + for (let i = 0; i < encodings.length; i += 1) { + const token: number = +encodings[i]; + tokens.push(token); + } + + return tokens; + } +} diff --git a/libs/langchain-community/src/embeddings/minimax.ts b/libs/langchain-community/src/embeddings/minimax.ts new file mode 100644 index 000000000000..110116abd7c2 --- /dev/null +++ b/libs/langchain-community/src/embeddings/minimax.ts @@ -0,0 +1,222 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; +import { ConfigurationParameters } from "../chat_models/minimax.js"; + +/** + * Interface for MinimaxEmbeddings parameters. Extends EmbeddingsParams and + * defines additional parameters specific to the MinimaxEmbeddings class. + */ +export interface MinimaxEmbeddingsParams extends EmbeddingsParams { + /** Model name to use */ + modelName: string; + + /** + * API key to use when making requests. Defaults to the value of + * `MINIMAX_GROUP_ID` environment variable. + */ + minimaxGroupId?: string; + + /** + * Secret key to use when making requests. Defaults to the value of + * `MINIMAX_API_KEY` environment variable. + */ + minimaxApiKey?: string; + + /** + * The maximum number of documents to embed in a single request. This is + * limited by the Minimax API to a maximum of 4096. + */ + batchSize?: number; + + /** + * Whether to strip new lines from the input text. This is recommended by + * Minimax, but may not be suitable for all use cases. + */ + stripNewLines?: boolean; + + /** + * The target use-case after generating the vector. + * When using embeddings, the vector of the target content is first generated through the db and stored in the vector database, + * and then the vector of the retrieval text is generated through the query. + * Note: For the parameters of the partial algorithm, we adopted a separate algorithm plan for query and db. + * Therefore, for a paragraph of text, if it is to be used as a retrieval text, it should use the db, + * and if it is used as a retrieval text, it should use the query. + */ + type?: "db" | "query"; +} + +export interface CreateMinimaxEmbeddingRequest { + /** + * @type {string} + * @memberof CreateMinimaxEmbeddingRequest + */ + model: string; + + /** + * Text to generate vector expectation + * @type {CreateEmbeddingRequestInput} + * @memberof CreateMinimaxEmbeddingRequest + */ + texts: string[]; + + /** + * The target use-case after generating the vector. When using embeddings, + * first generate the vector of the target content through the db and store it in the vector database, + * and then generate the vector of the retrieval text through the query. + * Note: For the parameter of the algorithm, we use the algorithm scheme of query and db separation, + * so a text, if it is to be retrieved as a text, should use the db, + * if it is used as a retrieval text, should use the query. + * @type {string} + * @memberof CreateMinimaxEmbeddingRequest + */ + type: "db" | "query"; +} + +/** + * Class for generating embeddings using the Minimax API. Extends the + * Embeddings class and implements MinimaxEmbeddingsParams + * @example + * ```typescript + * const embeddings = new MinimaxEmbeddings(); + * + * // Embed a single query + * const queryEmbedding = await embeddings.embedQuery("Hello world"); + * console.log(queryEmbedding); + * + * // Embed multiple documents + * const documentsEmbedding = await embeddings.embedDocuments([ + * "Hello world", + * "Bye bye", + * ]); + * console.log(documentsEmbedding); + * ``` + */ +export class MinimaxEmbeddings + extends Embeddings + implements MinimaxEmbeddingsParams +{ + modelName = "embo-01"; + + batchSize = 512; + + stripNewLines = true; + + minimaxGroupId?: string; + + minimaxApiKey?: string; + + type: "db" | "query" = "db"; + + apiUrl: string; + + basePath?: string = "https://api.minimax.chat/v1"; + + headers?: Record; + + constructor( + fields?: Partial & { + configuration?: ConfigurationParameters; + } + ) { + const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; + super(fieldsWithDefaults); + + this.minimaxGroupId = + fields?.minimaxGroupId ?? getEnvironmentVariable("MINIMAX_GROUP_ID"); + if (!this.minimaxGroupId) { + throw new Error("Minimax GroupID not found"); + } + + this.minimaxApiKey = + fields?.minimaxApiKey ?? getEnvironmentVariable("MINIMAX_API_KEY"); + + if (!this.minimaxApiKey) { + throw new Error("Minimax ApiKey not found"); + } + + this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; + this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; + this.type = fieldsWithDefaults?.type ?? this.type; + this.stripNewLines = + fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; + this.basePath = fields?.configuration?.basePath ?? this.basePath; + this.apiUrl = `${this.basePath}/embeddings`; + this.headers = fields?.configuration?.headers ?? this.headers; + } + + /** + * Method to generate embeddings for an array of documents. Splits the + * documents into batches and makes requests to the Minimax API to generate + * embeddings. + * @param texts Array of documents to generate embeddings for. + * @returns Promise that resolves to a 2D array of embeddings for each document. + */ + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray( + this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => + this.embeddingWithRetry({ + model: this.modelName, + texts: batch, + type: this.type, + }) + ); + const batchResponses = await Promise.all(batchRequests); + + const embeddings: number[][] = []; + for (let i = 0; i < batchResponses.length; i += 1) { + const batch = batches[i]; + const { vectors: batchResponse } = batchResponses[i]; + for (let j = 0; j < batch.length; j += 1) { + embeddings.push(batchResponse[j]); + } + } + return embeddings; + } + + /** + * Method to generate an embedding for a single document. Calls the + * embeddingWithRetry method with the document as the input. + * @param text Document to generate an embedding for. + * @returns Promise that resolves to an embedding for the document. + */ + async embedQuery(text: string): Promise { + const { vectors } = await this.embeddingWithRetry({ + model: this.modelName, + texts: [this.stripNewLines ? text.replace(/\n/g, " ") : text], + type: this.type, + }); + return vectors[0]; + } + + /** + * Private method to make a request to the Minimax API to generate + * embeddings. Handles the retry logic and returns the response from the + * API. + * @param request Request to send to the Minimax API. + * @returns Promise that resolves to the response from the API. + */ + private async embeddingWithRetry(request: CreateMinimaxEmbeddingRequest) { + const makeCompletionRequest = async () => { + const url = `${this.apiUrl}?GroupId=${this.minimaxGroupId}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.minimaxApiKey}`, + ...this.headers, + }, + body: JSON.stringify(request), + }); + + const json = await response.json(); + return json; + }; + + return this.caller.call(makeCompletionRequest); + } +} diff --git a/libs/langchain-community/src/embeddings/ollama.ts b/libs/langchain-community/src/embeddings/ollama.ts new file mode 100644 index 000000000000..39f8da6b0aa6 --- /dev/null +++ b/libs/langchain-community/src/embeddings/ollama.ts @@ -0,0 +1,148 @@ +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; +import { OllamaInput, OllamaRequestParams } from "../util/ollama.js"; + +type CamelCasedRequestOptions = Omit< + OllamaInput, + "baseUrl" | "model" | "format" +>; + +/** + * Interface for OllamaEmbeddings parameters. Extends EmbeddingsParams and + * defines additional parameters specific to the OllamaEmbeddings class. + */ +interface OllamaEmbeddingsParams extends EmbeddingsParams { + /** The Ollama model to use, e.g: "llama2:13b" */ + model?: string; + + /** Base URL of the Ollama server, defaults to "http://localhost:11434" */ + baseUrl?: string; + + /** Advanced Ollama API request parameters in camelCase, see + * https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values + * for details of the available parameters. + */ + requestOptions?: CamelCasedRequestOptions; +} + +export class OllamaEmbeddings extends Embeddings { + model = "llama2"; + + baseUrl = "http://localhost:11434"; + + requestOptions?: OllamaRequestParams["options"]; + + constructor(params?: OllamaEmbeddingsParams) { + super(params || {}); + + if (params?.model) { + this.model = params.model; + } + + if (params?.baseUrl) { + this.baseUrl = params.baseUrl; + } + + if (params?.requestOptions) { + this.requestOptions = this._convertOptions(params.requestOptions); + } + } + + /** convert camelCased Ollama request options like "useMMap" to + * the snake_cased equivalent which the ollama API actually uses. + * Used only for consistency with the llms/Ollama and chatModels/Ollama classes + */ + _convertOptions(requestOptions: CamelCasedRequestOptions) { + const snakeCasedOptions: Record = {}; + const mapping: Record = { + embeddingOnly: "embedding_only", + f16KV: "f16_kv", + frequencyPenalty: "frequency_penalty", + logitsAll: "logits_all", + lowVram: "low_vram", + mainGpu: "main_gpu", + mirostat: "mirostat", + mirostatEta: "mirostat_eta", + mirostatTau: "mirostat_tau", + numBatch: "num_batch", + numCtx: "num_ctx", + numGpu: "num_gpu", + numGqa: "num_gqa", + numKeep: "num_keep", + numThread: "num_thread", + penalizeNewline: "penalize_newline", + presencePenalty: "presence_penalty", + repeatLastN: "repeat_last_n", + repeatPenalty: "repeat_penalty", + ropeFrequencyBase: "rope_frequency_base", + ropeFrequencyScale: "rope_frequency_scale", + temperature: "temperature", + stop: "stop", + tfsZ: "tfs_z", + topK: "top_k", + topP: "top_p", + typicalP: "typical_p", + useMLock: "use_mlock", + useMMap: "use_mmap", + vocabOnly: "vocab_only", + }; + + for (const [key, value] of Object.entries(requestOptions)) { + const snakeCasedOption = mapping[key as keyof CamelCasedRequestOptions]; + if (snakeCasedOption) { + snakeCasedOptions[snakeCasedOption] = value; + } + } + return snakeCasedOptions; + } + + async _request(prompt: string): Promise { + const { model, baseUrl, requestOptions } = this; + + let formattedBaseUrl = baseUrl; + if (formattedBaseUrl.startsWith("http://localhost:")) { + // Node 18 has issues with resolving "localhost" + // See https://github.com/node-fetch/node-fetch/issues/1624 + formattedBaseUrl = formattedBaseUrl.replace( + "http://localhost:", + "http://127.0.0.1:" + ); + } + + const response = await fetch(`${formattedBaseUrl}/api/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + prompt, + model, + options: requestOptions, + }), + }); + if (!response.ok) { + throw new Error( + `Request to Ollama server failed: ${response.status} ${response.statusText}` + ); + } + + const json = await response.json(); + return json.embedding; + } + + async _embed(strings: string[]): Promise { + const embeddings: number[][] = []; + + for await (const prompt of strings) { + const embedding = await this.caller.call(() => this._request(prompt)); + embeddings.push(embedding); + } + + return embeddings; + } + + async embedDocuments(documents: string[]) { + return this._embed(documents); + } + + async embedQuery(document: string) { + return (await this.embedDocuments([document]))[0]; + } +} diff --git a/libs/langchain-community/src/embeddings/tensorflow.ts b/libs/langchain-community/src/embeddings/tensorflow.ts new file mode 100644 index 000000000000..ae96f8cc9b30 --- /dev/null +++ b/libs/langchain-community/src/embeddings/tensorflow.ts @@ -0,0 +1,91 @@ +import { load } from "@tensorflow-models/universal-sentence-encoder"; +import * as tf from "@tensorflow/tfjs-core"; + +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the TensorFlowEmbeddings class. + */ +export interface TensorFlowEmbeddingsParams extends EmbeddingsParams {} + +/** + * Class that extends the Embeddings class and provides methods for + * generating embeddings using the Universal Sentence Encoder model from + * TensorFlow.js. + * @example + * ```typescript + * const embeddings = new TensorFlowEmbeddings(); + * const store = new MemoryVectorStore(embeddings); + * + * const documents = [ + * "A document", + * "Some other piece of text", + * "One more", + * "And another", + * ]; + * + * await store.addDocuments( + * documents.map((pageContent) => new Document({ pageContent })) + * ); + * ``` + */ +export class TensorFlowEmbeddings extends Embeddings { + constructor(fields?: TensorFlowEmbeddingsParams) { + super(fields ?? {}); + + try { + tf.backend(); + } catch (e) { + throw new Error("No TensorFlow backend found, see instructions at ..."); + } + } + + _cached: ReturnType; + + /** + * Private method that loads the Universal Sentence Encoder model if it + * hasn't been loaded already. It returns a promise that resolves to the + * loaded model. + * @returns Promise that resolves to the loaded Universal Sentence Encoder model. + */ + private async load() { + if (this._cached === undefined) { + this._cached = load(); + } + return this._cached; + } + + private _embed(texts: string[]) { + return this.caller.call(async () => { + const model = await this.load(); + return model.embed(texts); + }); + } + + /** + * Method that takes a document as input and returns a promise that + * resolves to an embedding for the document. It calls the _embed method + * with the document as the input and processes the result to return a + * single embedding. + * @param document Document to generate an embedding for. + * @returns Promise that resolves to an embedding for the input document. + */ + embedQuery(document: string): Promise { + return this._embed([document]) + .then((embeddings) => embeddings.array()) + .then((embeddings) => embeddings[0]); + } + + /** + * Method that takes an array of documents as input and returns a promise + * that resolves to a 2D array of embeddings for each document. It calls + * the _embed method with the documents as the input and processes the + * result to return the embeddings. + * @param documents Array of documents to generate embeddings for. + * @returns Promise that resolves to a 2D array of embeddings for each input document. + */ + embedDocuments(documents: string[]): Promise { + return this._embed(documents).then((embeddings) => embeddings.array()); + } +} diff --git a/libs/langchain-community/src/embeddings/voyage.ts b/libs/langchain-community/src/embeddings/voyage.ts new file mode 100644 index 000000000000..30aa556478fe --- /dev/null +++ b/libs/langchain-community/src/embeddings/voyage.ts @@ -0,0 +1,152 @@ +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "../util/chunk.js"; + +/** + * Interface that extends EmbeddingsParams and defines additional + * parameters specific to the VoyageEmbeddings class. + */ +export interface VoyageEmbeddingsParams extends EmbeddingsParams { + modelName: string; + + /** + * The maximum number of documents to embed in a single request. This is + * limited by the Voyage AI API to a maximum of 8. + */ + batchSize?: number; +} + +/** + * Interface for the request body to generate embeddings. + */ +export interface CreateVoyageEmbeddingRequest { + /** + * @type {string} + * @memberof CreateVoyageEmbeddingRequest + */ + model: string; + + /** + * Text to generate vector expectation + * @type {CreateEmbeddingRequestInput} + * @memberof CreateVoyageEmbeddingRequest + */ + input: string | string[]; +} + +/** + * A class for generating embeddings using the Voyage AI API. + */ +export class VoyageEmbeddings + extends Embeddings + implements VoyageEmbeddingsParams +{ + modelName = "voyage-01"; + + batchSize = 8; + + private apiKey: string; + + basePath?: string = "https://api.voyageai.com/v1"; + + apiUrl: string; + + headers?: Record; + + /** + * Constructor for the VoyageEmbeddings class. + * @param fields - An optional object with properties to configure the instance. + */ + constructor( + fields?: Partial & { + verbose?: boolean; + apiKey?: string; + } + ) { + const fieldsWithDefaults = { ...fields }; + + super(fieldsWithDefaults); + + const apiKey = + fieldsWithDefaults?.apiKey || getEnvironmentVariable("VOYAGEAI_API_KEY"); + + if (!apiKey) { + throw new Error("Voyage AI API key not found"); + } + + this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; + this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; + this.apiKey = apiKey; + this.apiUrl = `${this.basePath}/embeddings`; + } + + /** + * Generates embeddings for an array of texts. + * @param texts - An array of strings to generate embeddings for. + * @returns A Promise that resolves to an array of embeddings. + */ + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray(texts, this.batchSize); + + const batchRequests = batches.map((batch) => + this.embeddingWithRetry({ + model: this.modelName, + input: batch, + }) + ); + + const batchResponses = await Promise.all(batchRequests); + + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batch = batches[i]; + const { data: batchResponse } = batchResponses[i]; + for (let j = 0; j < batch.length; j += 1) { + embeddings.push(batchResponse[j].embedding); + } + } + + return embeddings; + } + + /** + * Generates an embedding for a single text. + * @param text - A string to generate an embedding for. + * @returns A Promise that resolves to an array of numbers representing the embedding. + */ + async embedQuery(text: string): Promise { + const { data } = await this.embeddingWithRetry({ + model: this.modelName, + input: text, + }); + + return data[0].embedding; + } + + /** + * Makes a request to the Voyage AI API to generate embeddings for an array of texts. + * @param request - An object with properties to configure the request. + * @returns A Promise that resolves to the response from the Voyage AI API. + */ + + private async embeddingWithRetry(request: CreateVoyageEmbeddingRequest) { + const makeCompletionRequest = async () => { + const url = `${this.apiUrl}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + ...this.headers, + }, + body: JSON.stringify(request), + }); + + const json = await response.json(); + return json; + }; + + return this.caller.call(makeCompletionRequest); + } +} diff --git a/libs/langchain-community/src/graphs/neo4j_graph.ts b/libs/langchain-community/src/graphs/neo4j_graph.ts new file mode 100644 index 000000000000..c404e7e3b2ad --- /dev/null +++ b/libs/langchain-community/src/graphs/neo4j_graph.ts @@ -0,0 +1,286 @@ +import neo4j, { Neo4jError } from "neo4j-driver"; + +interface Neo4jGraphConfig { + url: string; + username: string; + password: string; + database?: string; +} + +interface StructuredSchema { + nodeProps: { [key: NodeType["labels"]]: NodeType["properties"] }; + relProps: { [key: RelType["type"]]: RelType["properties"] }; + relationships: PathType[]; +} + +type NodeType = { + labels: string; + properties: { property: string; type: string }[]; +}; +type RelType = { + type: string; + properties: { property: string; type: string }[]; +}; +type PathType = { start: string; type: string; end: string }; + +/** + * @security *Security note*: Make sure that the database connection uses credentials + * that are narrowly-scoped to only include necessary permissions. + * Failure to do so may result in data corruption or loss, since the calling + * code may attempt commands that would result in deletion, mutation + * of data if appropriately prompted or reading sensitive data if such + * data is present in the database. + * The best way to guard against such negative outcomes is to (as appropriate) + * limit the permissions granted to the credentials used with this tool. + * For example, creating read only users for the database is a good way to + * ensure that the calling code cannot mutate or delete data. + * + * @link See https://js.langchain.com/docs/security for more information. + */ +export class Neo4jGraph { + private driver: neo4j.Driver; + + private database: string; + + private schema = ""; + + private structuredSchema: StructuredSchema = { + nodeProps: {}, + relProps: {}, + relationships: [], + }; + + constructor({ + url, + username, + password, + database = "neo4j", + }: Neo4jGraphConfig) { + try { + this.driver = neo4j.driver(url, neo4j.auth.basic(username, password)); + this.database = database; + } catch (error) { + throw new Error( + "Could not create a Neo4j driver instance. Please check the connection details." + ); + } + } + + static async initialize(config: Neo4jGraphConfig): Promise { + const graph = new Neo4jGraph(config); + + try { + await graph.verifyConnectivity(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (error: any) { + console.log("Failed to verify connection."); + } + + try { + await graph.refreshSchema(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (error: any) { + const message = [ + "Could not use APOC procedures.", + "Please ensure the APOC plugin is installed in Neo4j and that", + "'apoc.meta.data()' is allowed in Neo4j configuration", + ].join("\n"); + + throw new Error(message); + } finally { + console.log("Schema refreshed successfully."); + } + + return graph; + } + + getSchema(): string { + return this.schema; + } + + getStructuredSchema() { + return this.structuredSchema; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async query(query: string, params: any = {}): Promise { + try { + const result = await this.driver.executeQuery(query, params, { + database: this.database, + }); + return toObjects(result.records); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (error: any) { + if ( + // eslint-disable-next-line + error instanceof Neo4jError && + error.code === "Neo.ClientError.Procedure.ProcedureNotFound" + ) { + throw new Error("Procedure not found in Neo4j."); + } + } + return undefined; + } + + async verifyConnectivity() { + await this.driver.verifyAuthentication(); + } + + async refreshSchema() { + const nodePropertiesQuery = ` + CALL apoc.meta.data() + YIELD label, other, elementType, type, property + WHERE NOT type = "RELATIONSHIP" AND elementType = "node" + WITH label AS nodeLabels, collect({property:property, type:type}) AS properties + RETURN {labels: nodeLabels, properties: properties} AS output + `; + + const relPropertiesQuery = ` + CALL apoc.meta.data() + YIELD label, other, elementType, type, property + WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship" + WITH label AS nodeLabels, collect({property:property, type:type}) AS properties + RETURN {type: nodeLabels, properties: properties} AS output + `; + + const relQuery = ` + CALL apoc.meta.data() + YIELD label, other, elementType, type, property + WHERE type = "RELATIONSHIP" AND elementType = "node" + UNWIND other AS other_node + RETURN {start: label, type: property, end: toString(other_node)} AS output + `; + + // Assuming query method is defined and returns a Promise + const nodeProperties: NodeType[] | undefined = ( + await this.query(nodePropertiesQuery) + )?.map((el: { output: NodeType }) => el.output); + + const relationshipsProperties: RelType[] | undefined = ( + await this.query(relPropertiesQuery) + )?.map((el: { output: RelType }) => el.output); + + const relationships: PathType[] | undefined = ( + await this.query(relQuery) + )?.map((el: { output: PathType }) => el.output); + + // Structured schema similar to Python's dictionary comprehension + this.structuredSchema = { + nodeProps: Object.fromEntries( + nodeProperties?.map((el) => [el.labels, el.properties]) || [] + ), + relProps: Object.fromEntries( + relationshipsProperties?.map((el) => [el.type, el.properties]) || [] + ), + relationships: relationships || [], + }; + + // Format node properties + const formattedNodeProps = nodeProperties?.map((el) => { + const propsStr = el.properties + .map((prop) => `${prop.property}: ${prop.type}`) + .join(", "); + return `${el.labels} {${propsStr}}`; + }); + + // Format relationship properties + const formattedRelProps = relationshipsProperties?.map((el) => { + const propsStr = el.properties + .map((prop) => `${prop.property}: ${prop.type}`) + .join(", "); + return `${el.type} {${propsStr}}`; + }); + + // Format relationships + const formattedRels = relationships?.map( + (el) => `(:${el.start})-[:${el.type}]->(:${el.end})` + ); + + // Combine all formatted elements into a single string + this.schema = [ + "Node properties are the following:", + formattedNodeProps?.join(", "), + "Relationship properties are the following:", + formattedRelProps?.join(", "), + "The relationships are the following:", + formattedRels?.join(", "), + ].join("\n"); + } + + async close() { + await this.driver.close(); + } +} + +function toObjects(records: neo4j.Record[]) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const recordValues: Record[] = records.map((record) => { + const rObj = record.toObject(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const out: { [key: string]: any } = {}; + Object.keys(rObj).forEach((key) => { + out[key] = itemIntToString(rObj[key]); + }); + return out; + }); + return recordValues; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function itemIntToString(item: any): any { + if (neo4j.isInt(item)) return item.toString(); + if (Array.isArray(item)) return item.map((ii) => itemIntToString(ii)); + if (["number", "string", "boolean"].indexOf(typeof item) !== -1) return item; + if (item === null) return item; + if (typeof item === "object") return objIntToString(item); +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function objIntToString(obj: any) { + const entry = extractFromNeoObjects(obj); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let newObj: any = null; + if (Array.isArray(entry)) { + newObj = entry.map((item) => itemIntToString(item)); + } else if (entry !== null && typeof entry === "object") { + newObj = {}; + Object.keys(entry).forEach((key) => { + newObj[key] = itemIntToString(entry[key]); + }); + } + return newObj; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function extractFromNeoObjects(obj: any) { + if ( + // eslint-disable-next-line + obj instanceof (neo4j.types.Node as any) || + // eslint-disable-next-line + obj instanceof (neo4j.types.Relationship as any) + ) { + return obj.properties; + // eslint-disable-next-line + } else if (obj instanceof (neo4j.types.Path as any)) { + // eslint-disable-next-line + return [].concat.apply([], extractPathForRows(obj)); + } + return obj; +} + +const extractPathForRows = (path: neo4j.Path) => { + let { segments } = path; + // Zero length path. No relationship, end === start + if (!Array.isArray(path.segments) || path.segments.length < 1) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + segments = [{ ...path, end: null } as any]; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return segments.map((segment: any) => + [ + objIntToString(segment.start), + objIntToString(segment.relationship), + objIntToString(segment.end), + ].filter((part) => part !== null) + ); +}; diff --git a/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts b/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts new file mode 100644 index 000000000000..3b47800fc323 --- /dev/null +++ b/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts @@ -0,0 +1,56 @@ +/* eslint-disable no-process-env */ + +import { test } from "@jest/globals"; +import { Neo4jGraph } from "../neo4j_graph.js"; + +describe.skip("Neo4j Graph Tests", () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + let graph: Neo4jGraph; + + beforeEach(async () => { + graph = await Neo4jGraph.initialize({ url, username, password }); + }); + afterEach(async () => { + await graph.close(); + }); + + test("Schema generation works correctly", async () => { + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + // Clear the database + await graph.query("MATCH (n) DETACH DELETE n"); + + await graph.query( + "CREATE (a:Actor {name:'Bruce Willis'})" + + "-[:ACTED_IN {roles: ['Butch Coolidge']}]->(:Movie {title: 'Pulp Fiction'})" + ); + + await graph.refreshSchema(); + console.log(graph.getSchema()); + + // expect(graph.getSchema()).toMatchInlineSnapshot(` + // "Node properties are the following: + // Actor {name: STRING}, Movie {title: STRING} + // Relationship properties are the following: + // ACTED_IN {roles: LIST} + // The relationships are the following: + // (:Actor)-[:ACTED_IN]->(:Movie)" + // `); + }); + + test("Test that Neo4j database is correctly instantiated and connected", async () => { + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + // Integers are casted to strings in the output + const expectedOutput = [{ output: { str: "test", int: "1" } }]; + const res = await graph.query('RETURN {str: "test", int: 1} AS output'); + await graph.close(); + expect(res).toEqual(expectedOutput); + }); +}); diff --git a/libs/langchain-community/src/index.ts b/libs/langchain-community/src/index.ts deleted file mode 100644 index 564fb4a3c181..000000000000 --- a/libs/langchain-community/src/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export * from "./chat_models.js"; -export * from "./llms.js"; -export * from "./vectorstores.js"; diff --git a/libs/langchain-community/src/llms.ts b/libs/langchain-community/src/llms.ts deleted file mode 100644 index c3ede8d1f29b..000000000000 --- a/libs/langchain-community/src/llms.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms"; -import { type BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; - -// Uncomment if implementing streaming - -// import { -// GenerationChunk, -// } from "@langchain/core/outputs"; - -/** - * Input to LLM class. - */ -export interface LLMIntegrationInput extends BaseLLMParams {} - -/** - * Integration with an LLM. - */ -export class LLMIntegration - extends LLM - implements LLMIntegrationInput -{ - // Used for tracing, replace with the same name as your class - static lc_name() { - return "LLMIntegration"; - } - - lc_serializable = true; - - constructor(fields: LLMIntegrationInput) { - super(fields); - } - - // Replace - _llmType() { - return "llm_integration"; - } - - /** - * For some given input string and options, return a string output. - */ - async _call( - _prompt: string, - _options: this["ParsedCallOptions"], - _runManager?: CallbackManagerForLLMRun - ): Promise { - throw new Error("Not implemented."); - } - - /** - * Implement to support streaming. - * Should yield chunks iteratively. - */ - // async *_streamResponseChunks( - // prompt: string, - // options: this["ParsedCallOptions"], - // runManager?: CallbackManagerForLLMRun - // ): AsyncGenerator { - // const stream = await this.caller.call(async () => - // createStream() - // ); - // for await (const chunk of stream) { - // yield new GenerationChunk({ - // text: chunk.response, - // generationInfo: { - // ...chunk, - // response: undefined, - // }, - // }); - // await runManager?.handleLLMNewToken(chunk.response ?? ""); - // } - // } -} diff --git a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts index 6f64c199888c..802ccf659970 100644 --- a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts +++ b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts @@ -1,6 +1,6 @@ import { test } from "@jest/globals"; import { CloudflareWorkersAI } from "../cloudflare_workersai.js"; -import { getEnvironmentVariable } from "../../util/env.js"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; test("Test CloudflareWorkersAI", async () => { const model = new CloudflareWorkersAI({}); diff --git a/libs/langchain-community/src/llms/tests/fake.test.ts b/libs/langchain-community/src/llms/tests/fake.test.ts deleted file mode 100644 index 3fc53c0cf1b8..000000000000 --- a/libs/langchain-community/src/llms/tests/fake.test.ts +++ /dev/null @@ -1,98 +0,0 @@ -import { describe, test, expect, jest } from "@jest/globals"; -import { FakeListLLM } from "../fake.js"; - -describe("Test FakeListLLM", () => { - test("Should exist", async () => { - const llm = new FakeListLLM({ responses: ["test response"] }); - const response = await llm.call("test prompt"); - - expect(typeof response).toBe("string"); - }); - - test("Should return responses in order", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - }); - const response1 = await llm.call("test prompt"); - const response2 = await llm.call("test prompt"); - - expect(response1).toBe("test response 1"); - expect(response2).toBe("test response 2"); - }); - - test("Should reset index when all responses have been returned", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - }); - const response1 = await llm.call("test prompt"); - const response2 = await llm.call("test prompt"); - const response3 = await llm.call("test prompt"); - - expect(response1).toBe("test response 1"); - expect(response2).toBe("test response 2"); - expect(response3).toBe("test response 1"); - }); - - test("Should return responses after sleep if requested", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - sleep: 10, - }); - const sleepSpy = jest.spyOn(llm, "_sleep"); - - await llm.call("test prompt"); - - expect(sleepSpy).toHaveBeenCalledTimes(1); - }, 3000); - - test("Should stream responses if requested", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - }); - const chunks = []; - - const response = await llm.stream("test prompt"); - for await (const chunk of response) { - chunks.push(chunk); - } - - expect(chunks.length).toBeGreaterThan(1); - expect(chunks.join("")).toBe("test response 1"); - }); - - test("Should return responses in order when streaming", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - }); - const chunks1 = []; - const chunks2 = []; - - const response1 = await llm.stream("test prompt"); - for await (const chunk of response1) { - chunks1.push(chunk); - } - const response2 = await llm.stream("test prompt"); - for await (const chunk of response2) { - chunks2.push(chunk); - } - - expect(chunks1.join("")).toBe("test response 1"); - expect(chunks2.join("")).toBe("test response 2"); - }); - - test("Should stream responses after sleep if requested", async () => { - const llm = new FakeListLLM({ - responses: ["test response 1", "test response 2"], - sleep: 10, - }); - const sleepSpy = jest.spyOn(llm, "_sleep"); - const chunks = []; - - const response = await llm.stream("test prompt"); - for await (const chunk of response) { - chunks.push(chunk); - } - - expect(sleepSpy).toHaveBeenCalledTimes(chunks.length); - }, 3000); -}); diff --git a/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts b/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts index d0fe6cc4268e..a1686ae0814f 100644 --- a/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts +++ b/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { test } from "@jest/globals"; -import { getEnvironmentVariable } from "../../util/env.js"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { LlamaCpp } from "../llama_cpp.js"; const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; diff --git a/libs/langchain-community/src/llms/tests/ollama.int.test.ts b/libs/langchain-community/src/llms/tests/ollama.int.test.ts index d63d3cc0dbd4..13688fe85b22 100644 --- a/libs/langchain-community/src/llms/tests/ollama.int.test.ts +++ b/libs/langchain-community/src/llms/tests/ollama.int.test.ts @@ -1,10 +1,10 @@ import { test } from "@jest/globals"; import { Ollama } from "../ollama.js"; -import { PromptTemplate } from "../../prompts/prompt.js"; +import { PromptTemplate } from "@langchain/core/prompts"; import { BytesOutputParser, StringOutputParser, -} from "../../schema/output_parser.js"; +} from "@langchain/core/output_parsers"; test.skip("test call", async () => { const ollama = new Ollama({}); diff --git a/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts b/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts deleted file mode 100644 index 43f737c552b8..000000000000 --- a/libs/langchain-community/src/llms/tests/openai-chat.int.test.ts +++ /dev/null @@ -1,142 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { OpenAIChat } from "../openai-chat.js"; -import { CallbackManager } from "../../callbacks/index.js"; - -test("Test OpenAI", async () => { - const model = new OpenAIChat({ modelName: "gpt-3.5-turbo", maxTokens: 10 }); - const res = await model.call("Print hello world"); - console.log({ res }); -}); - -test("Test OpenAI with prefix messages", async () => { - const model = new OpenAIChat({ - prefixMessages: [ - { role: "user", content: "My name is John" }, - { role: "assistant", content: "Hi there" }, - ], - maxTokens: 10, - }); - const res = await model.call("What is my name"); - console.log({ res }); -}); - -test("Test OpenAI in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new OpenAIChat({ - maxTokens: 10, - modelName: "gpt-3.5-turbo", - streaming: true, - callbackManager: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }), - }); - const res = await model.call("Print hello world"); - console.log({ res }); - - expect(nrNewTokens > 0).toBe(true); - expect(res).toBe(streamedCompletion); -}, 30000); - -test("Test OpenAI with stop", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - const res = await model.call("Print hello world", ["world"]); - console.log({ res }); -}); - -test("Test OpenAI with stop in object", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - const res = await model.call("Print hello world", { stop: ["world"] }); - console.log({ res }); -}); - -test("Test OpenAI with timeout in call options", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - await expect(() => - model.call("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with timeout in call options and node adapter", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - await expect(() => - model.call("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options and node adapter", async () => { - const model = new OpenAIChat({ maxTokens: 5 }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAIChat stream method", async () => { - const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - console.log(chunks); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test OpenAIChat stream method with abort", async () => { - await expect(async () => { - const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(1000), - } - ); - for await (const chunk of stream) { - console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test OpenAIChat stream method with early break", async () => { - const model = new OpenAIChat({ maxTokens: 50, modelName: "gpt-3.5-turbo" }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - for await (const chunk of stream) { - console.log(chunk); - i += 1; - if (i > 5) { - break; - } - } -}); diff --git a/libs/langchain-community/src/llms/tests/openai.int.test.ts b/libs/langchain-community/src/llms/tests/openai.int.test.ts deleted file mode 100644 index 53421b1ff326..000000000000 --- a/libs/langchain-community/src/llms/tests/openai.int.test.ts +++ /dev/null @@ -1,310 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { LLMResult } from "../../schema/index.js"; -import { OpenAIChat } from "../openai-chat.js"; -import { OpenAI } from "../openai.js"; -import { StringPromptValue } from "../../prompts/index.js"; -import { CallbackManager } from "../../callbacks/index.js"; -import { NewTokenIndices } from "../../callbacks/base.js"; - -test("Test OpenAI", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const res = await model.call("Print hello world"); - console.log({ res }); -}); - -test("Test OpenAI with stop", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const res = await model.call("Print hello world", ["world"]); - console.log({ res }); -}); - -test("Test OpenAI with stop in object", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const res = await model.call("Print hello world", { stop: ["world"] }); - console.log({ res }); -}); - -test("Test OpenAI with timeout in call options", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - await expect(() => - model.call("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with timeout in call options and node adapter", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - await expect(() => - model.call("Print hello world", { - timeout: 10, - }) - ).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with signal in call options and node adapter", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call("Print hello world", { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test("Test OpenAI with concurrency == 1", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - maxConcurrency: 1, - }); - const res = await Promise.all([ - model.call("Print hello world"), - model.call("Print hello world"), - ]); - console.log({ res }); -}); - -test("Test OpenAI with maxTokens -1", async () => { - const model = new OpenAI({ - maxTokens: -1, - modelName: "gpt-3.5-turbo-instruct", - }); - const res = await model.call("Print hello world", ["world"]); - console.log({ res }); -}); - -test("Test OpenAI with chat model returns OpenAIChat", async () => { - const model = new OpenAI({ modelName: "gpt-3.5-turbo" }); - expect(model).toBeInstanceOf(OpenAIChat); - const res = await model.call("Print hello world"); - console.log({ res }); - expect(typeof res).toBe("string"); -}); - -test("Test OpenAI with instruct model returns OpenAI", async () => { - const model = new OpenAI({ modelName: "gpt-3.5-turbo-instruct" }); - expect(model).toBeInstanceOf(OpenAI); - const res = await model.call("Print hello world"); - console.log({ res }); - expect(typeof res).toBe("string"); -}); - -test("Test OpenAI with versioned instruct model returns OpenAI", async () => { - const model = new OpenAI({ modelName: "gpt-3.5-turbo-instruct-0914" }); - expect(model).toBeInstanceOf(OpenAI); - const res = await model.call("Print hello world"); - console.log({ res }); - expect(typeof res).toBe("string"); -}); - -test("Test ChatOpenAI tokenUsage", async () => { - let tokenUsage = { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }; - - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const res = await model.call("Hello"); - console.log({ res }); - - expect(tokenUsage.promptTokens).toBe(1); -}); - -test("Test OpenAI in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - streaming: true, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }), - }); - const res = await model.call("Print hello world"); - console.log({ res }); - - expect(nrNewTokens > 0).toBe(true); - expect(res).toBe(streamedCompletion); -}); - -test("Test OpenAI in streaming mode with multiple prompts", async () => { - let nrNewTokens = 0; - const completions = [ - ["", ""], - ["", ""], - ]; - - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - streaming: true, - n: 2, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - completions[idx.prompt][idx.completion] += token; - }, - }), - }); - const res = await model.generate(["Print hello world", "print hello sea"]); - console.log( - res.generations, - res.generations.map((g) => g[0].generationInfo) - ); - - expect(nrNewTokens > 0).toBe(true); - expect(res.generations.length).toBe(2); - expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( - completions - ); -}); - -test("Test OpenAIChat in streaming mode with multiple prompts", async () => { - let nrNewTokens = 0; - const completions = [[""], [""]]; - - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo", - streaming: true, - n: 1, - callbacks: CallbackManager.fromHandlers({ - async handleLLMNewToken(token: string, idx: NewTokenIndices) { - nrNewTokens += 1; - completions[idx.prompt][idx.completion] += token; - }, - }), - }); - const res = await model.generate(["Print hello world", "print hello sea"]); - console.log( - res.generations, - res.generations.map((g) => g[0].generationInfo) - ); - - expect(nrNewTokens > 0).toBe(true); - expect(res.generations.length).toBe(2); - expect(res.generations.map((g) => g.map((gg) => gg.text))).toEqual( - completions - ); -}); - -test("Test OpenAI prompt value", async () => { - const model = new OpenAI({ - maxTokens: 5, - modelName: "gpt-3.5-turbo-instruct", - }); - const res = await model.generatePrompt([ - new StringPromptValue("Print hello world"), - ]); - expect(res.generations.length).toBe(1); - for (const generation of res.generations) { - expect(generation.length).toBe(1); - for (const g of generation) { - console.log(g.text); - } - } - console.log({ res }); -}); - -test("Test OpenAI stream method", async () => { - const model = new OpenAI({ - maxTokens: 50, - modelName: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream("Print hello world."); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test OpenAI stream method with abort", async () => { - await expect(async () => { - const model = new OpenAI({ - maxTokens: 250, - modelName: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose.", - { - signal: AbortSignal.timeout(1000), - } - ); - for await (const chunk of stream) { - console.log(chunk); - } - }).rejects.toThrow(); -}); - -test("Test OpenAI stream method with early break", async () => { - const model = new OpenAI({ - maxTokens: 50, - modelName: "gpt-3.5-turbo-instruct", - }); - const stream = await model.stream( - "How is your day going? Be extremely verbose." - ); - let i = 0; - for await (const chunk of stream) { - console.log(chunk); - i += 1; - if (i > 5) { - break; - } - } -}); diff --git a/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts b/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts deleted file mode 100644 index f86b90ce76f6..000000000000 --- a/libs/langchain-community/src/llms/tests/prompt_layer.int.test.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { PromptLayerOpenAI } from "../openai.js"; -import { PromptLayerOpenAIChat } from "../openai-chat.js"; - -import { PromptLayerChatOpenAI } from "../../chat_models/openai.js"; -import { SystemMessage } from "../../schema/index.js"; - -test("Test PromptLayerOpenAI returns promptLayerID if returnPromptLayerId=true", async () => { - const model = new PromptLayerOpenAI({ - maxTokens: 5, - modelName: "text-ada-001", - returnPromptLayerId: true, - }); - const res = await model.generate(["Print hello world"]); - console.log(JSON.stringify({ res }, null, 2)); - - expect( - typeof res.generations[0][0].generationInfo?.promptLayerRequestId - ).toBe("number"); - - const modelB = new PromptLayerOpenAI({ - maxTokens: 5, - modelName: "text-ada-001", - }); - const resB = await modelB.generate(["Print hello world"]); - - expect( - resB.generations[0][0].generationInfo?.promptLayerRequestId - ).toBeUndefined(); -}); - -test("Test PromptLayerOpenAIChat returns promptLayerID if returnPromptLayerId=true", async () => { - const model = new PromptLayerOpenAIChat({ - prefixMessages: [ - { - role: "system", - content: "You are a helpful assistant that answers in pirate language", - }, - ], - maxTokens: 5, - returnPromptLayerId: true, - }); - const res = await model.generate(["Print hello world"]); - - expect( - typeof res.generations[0][0].generationInfo?.promptLayerRequestId - ).toBe("number"); - - const modelB = new PromptLayerOpenAIChat({ - prefixMessages: [ - { - role: "system", - content: "You are a helpful assistant that answers in pirate language", - }, - ], - maxTokens: 5, - }); - const resB = await modelB.generate(["Print hello world"]); - - expect( - resB.generations[0][0].generationInfo?.promptLayerRequestId - ).toBeUndefined(); -}); - -test("Test PromptLayerChatOpenAI returns promptLayerID if returnPromptLayerId=true", async () => { - const chat = new PromptLayerChatOpenAI({ - returnPromptLayerId: true, - }); - - const respA = await chat.generate([ - [ - new SystemMessage( - "You are a helpful assistant that translates English to French." - ), - ], - ]); - - expect( - typeof respA.generations[0][0].generationInfo?.promptLayerRequestId - ).toBe("number"); - - const chatB = new PromptLayerChatOpenAI(); - - const respB = await chatB.generate([ - [ - new SystemMessage( - "You are a helpful assistant that translates English to French." - ), - ], - ]); - - expect( - respB.generations[0][0].generationInfo?.promptLayerRequestId - ).toBeUndefined(); -}); diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index d45a9c6d6227..0b2952c46419 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -1,176 +1,70 @@ // Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. export const optionalImportEntrypoints = [ - "langchain/agents/load", - "langchain/agents/toolkits/aws_sfn", - "langchain/agents/toolkits/sql", - "langchain/tools/aws_lambda", - "langchain/tools/aws_sfn", - "langchain/tools/calculator", - "langchain/tools/sql", - "langchain/tools/webbrowser", - "langchain/tools/gmail", - "langchain/tools/google_calendar", - "langchain/chains/load", - "langchain/chains/query_constructor", - "langchain/chains/query_constructor/ir", - "langchain/chains/sql_db", - "langchain/chains/graph_qa/cypher", - "langchain/embeddings/bedrock", - "langchain/embeddings/cloudflare_workersai", - "langchain/embeddings/cohere", - "langchain/embeddings/tensorflow", - "langchain/embeddings/hf", - "langchain/embeddings/hf_transformers", - "langchain/embeddings/googlevertexai", - "langchain/embeddings/googlepalm", - "langchain/embeddings/llama_cpp", - "langchain/embeddings/gradient_ai", - "langchain/llms/load", - "langchain/llms/cohere", - "langchain/llms/hf", - "langchain/llms/raycast", - "langchain/llms/replicate", - "langchain/llms/googlevertexai", - "langchain/llms/googlevertexai/web", - "langchain/llms/googlepalm", - "langchain/llms/gradient_ai", - "langchain/llms/sagemaker_endpoint", - "langchain/llms/watsonx_ai", - "langchain/llms/bedrock", - "langchain/llms/bedrock/web", - "langchain/llms/llama_cpp", - "langchain/llms/writer", - "langchain/llms/portkey", - "langchain/prompts/load", - "langchain/vectorstores/clickhouse", - "langchain/vectorstores/analyticdb", - "langchain/vectorstores/cassandra", - "langchain/vectorstores/convex", - "langchain/vectorstores/elasticsearch", - "langchain/vectorstores/cloudflare_vectorize", - "langchain/vectorstores/closevector/web", - "langchain/vectorstores/closevector/node", - "langchain/vectorstores/chroma", - "langchain/vectorstores/googlevertexai", - "langchain/vectorstores/hnswlib", - "langchain/vectorstores/faiss", - "langchain/vectorstores/weaviate", - "langchain/vectorstores/lancedb", - "langchain/vectorstores/momento_vector_index", - "langchain/vectorstores/mongo", - "langchain/vectorstores/mongodb_atlas", - "langchain/vectorstores/pinecone", - "langchain/vectorstores/qdrant", - "langchain/vectorstores/supabase", - "langchain/vectorstores/opensearch", - "langchain/vectorstores/pgvector", - "langchain/vectorstores/milvus", - "langchain/vectorstores/neo4j_vector", - "langchain/vectorstores/typeorm", - "langchain/vectorstores/myscale", - "langchain/vectorstores/redis", - "langchain/vectorstores/rockset", - "langchain/vectorstores/typesense", - "langchain/vectorstores/singlestore", - "langchain/vectorstores/tigris", - "langchain/vectorstores/usearch", - "langchain/vectorstores/vercel_postgres", - "langchain/vectorstores/voy", - "langchain/vectorstores/zep", - "langchain/memory/zep", - "langchain/document_loaders/web/apify_dataset", - "langchain/document_loaders/web/assemblyai", - "langchain/document_loaders/web/azure_blob_storage_container", - "langchain/document_loaders/web/azure_blob_storage_file", - "langchain/document_loaders/web/cheerio", - "langchain/document_loaders/web/puppeteer", - "langchain/document_loaders/web/playwright", - "langchain/document_loaders/web/college_confidential", - "langchain/document_loaders/web/gitbook", - "langchain/document_loaders/web/hn", - "langchain/document_loaders/web/imsdb", - "langchain/document_loaders/web/figma", - "langchain/document_loaders/web/github", - "langchain/document_loaders/web/notiondb", - "langchain/document_loaders/web/notionapi", - "langchain/document_loaders/web/pdf", - "langchain/document_loaders/web/recursive_url", - "langchain/document_loaders/web/s3", - "langchain/document_loaders/web/sonix_audio", - "langchain/document_loaders/web/confluence", - "langchain/document_loaders/web/youtube", - "langchain/document_loaders/fs/directory", - "langchain/document_loaders/fs/buffer", - "langchain/document_loaders/fs/chatgpt", - "langchain/document_loaders/fs/text", - "langchain/document_loaders/fs/json", - "langchain/document_loaders/fs/srt", - "langchain/document_loaders/fs/pdf", - "langchain/document_loaders/fs/docx", - "langchain/document_loaders/fs/epub", - "langchain/document_loaders/fs/csv", - "langchain/document_loaders/fs/notion", - "langchain/document_loaders/fs/obsidian", - "langchain/document_loaders/fs/unstructured", - "langchain/document_loaders/fs/openai_whisper_audio", - "langchain/document_loaders/fs/pptx", - "langchain/document_transformers/html_to_text", - "langchain/document_transformers/mozilla_readability", - "langchain/chat_models/portkey", - "langchain/chat_models/bedrock", - "langchain/chat_models/bedrock/web", - "langchain/chat_models/googlevertexai", - "langchain/chat_models/googlevertexai/web", - "langchain/chat_models/googlepalm", - "langchain/chat_models/iflytek_xinghuo", - "langchain/chat_models/iflytek_xinghuo/web", - "langchain/chat_models/llama_cpp", - "langchain/sql_db", - "langchain/callbacks/handlers/llmonitor", - "langchain/output_parsers/expression", - "langchain/retrievers/amazon_kendra", - "langchain/retrievers/supabase", - "langchain/retrievers/zep", - "langchain/retrievers/metal", - "langchain/retrievers/self_query", - "langchain/retrievers/self_query/chroma", - "langchain/retrievers/self_query/functional", - "langchain/retrievers/self_query/pinecone", - "langchain/retrievers/self_query/supabase", - "langchain/retrievers/self_query/weaviate", - "langchain/retrievers/self_query/vectara", - "langchain/cache/cloudflare_kv", - "langchain/cache/momento", - "langchain/cache/redis", - "langchain/cache/ioredis", - "langchain/cache/file_system", - "langchain/cache/upstash_redis", - "langchain/stores/doc/gcs", - "langchain/stores/file/node", - "langchain/stores/message/cassandra", - "langchain/stores/message/convex", - "langchain/stores/message/cloudflare_d1", - "langchain/stores/message/dynamodb", - "langchain/stores/message/firestore", - "langchain/stores/message/momento", - "langchain/stores/message/mongodb", - "langchain/stores/message/redis", - "langchain/stores/message/ioredis", - "langchain/stores/message/upstash_redis", - "langchain/stores/message/planetscale", - "langchain/stores/message/xata", - "langchain/storage/convex", - "langchain/storage/ioredis", - "langchain/storage/vercel_kv", - "langchain/storage/upstash_redis", - "langchain/storage/file_system", - "langchain/graphs/neo4j_graph", - "langchain/hub", - "langchain/util/convex", - "langchain/experimental/multimodal_embeddings/googlevertexai", - "langchain/experimental/chat_models/anthropic_functions", - "langchain/experimental/llms/bittensor", - "langchain/experimental/hubs/makersuite/googlemakersuitehub", - "langchain/experimental/tools/pyinterpreter", + "@langchain/community/tools/aws_sfn", + "@langchain/community/tools/gmail", + "@langchain/community/embeddings/bedrock", + "@langchain/community/embeddings/cloudflare_workersai", + "@langchain/community/embeddings/cohere", + "@langchain/community/embeddings/googlepalm", + "@langchain/community/embeddings/googlevertexai", + "@langchain/community/embeddings/gradient_ai", + "@langchain/community/embeddings/hf", + "@langchain/community/embeddings/hf_transformers", + "@langchain/community/embeddings/llama_cpp", + "@langchain/community/embeddings/tensorflow", + "@langchain/community/llms/bedrock", + "@langchain/community/llms/bedrock/web", + "@langchain/community/llms/cohere", + "@langchain/community/llms/googlepalm", + "@langchain/community/llms/googlevertexai", + "@langchain/community/llms/googlevertexai/web", + "@langchain/community/llms/gradient_ai", + "@langchain/community/llms/hf", + "@langchain/community/llms/llama_cpp", + "@langchain/community/llms/portkey", + "@langchain/community/llms/raycast", + "@langchain/community/llms/replicate", + "@langchain/community/llms/sagemaker_endpoint", + "@langchain/community/llms/watsonx_ai", + "@langchain/community/llms/writer", + "@langchain/community/vectorstores/analyticdb", + "@langchain/community/vectorstores/cassandra", + "@langchain/community/vectorstores/chroma", + "@langchain/community/vectorstores/clickhouse", + "@langchain/community/vectorstores/cloudflare_vectorize", + "@langchain/community/vectorstores/convex", + "@langchain/community/vectorstores/elasticsearch", + "@langchain/community/vectorstores/lancedb", + "@langchain/community/vectorstores/milvus", + "@langchain/community/vectorstores/myscale", + "@langchain/community/vectorstores/neo4j_vector", + "@langchain/community/vectorstores/opensearch", + "@langchain/community/vectorstores/qdrant", + "@langchain/community/vectorstores/redis", + "@langchain/community/vectorstores/rockset", + "@langchain/community/vectorstores/singlestore", + "@langchain/community/vectorstores/tigris", + "@langchain/community/vectorstores/typeorm", + "@langchain/community/vectorstores/typesense", + "@langchain/community/vectorstores/vercel_postgres", + "@langchain/community/vectorstores/voy", + "@langchain/community/chat_models/bedrock", + "@langchain/community/chat_models/bedrock/web", + "@langchain/community/chat_models/googlevertexai", + "@langchain/community/chat_models/googlevertexai/web", + "@langchain/community/chat_models/googlepalm", + "@langchain/community/chat_models/iflytek_xinghuo", + "@langchain/community/chat_models/iflytek_xinghuo/web", + "@langchain/community/chat_models/llama_cpp", + "@langchain/community/chat_models/portkey", + "@langchain/community/callbacks/handlers/llmonitor", + "@langchain/community/retrievers/amazon_kendra", + "@langchain/community/retrievers/metal", + "@langchain/community/retrievers/supabase", + "@langchain/community/retrievers/zep", + "@langchain/community/cache/cloudflare_kv", + "@langchain/community/cache/momento", + "@langchain/community/cache/upstash_redis", + "@langchain/community/graphs/neo4j_graph", ]; diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index fa6ef233e228..d62b3d21484e 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -1,108 +1,39 @@ // Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. export * as load__serializable from "../load/serializable.js"; -export * as agents from "../agents/index.js"; -export * as agents__toolkits from "../agents/toolkits/index.js"; -export * as agents__toolkits__connery from "../agents/toolkits/connery/index.js"; -export * as agents__format_scratchpad from "../agents/format_scratchpad/openai_functions.js"; -export * as agents__format_scratchpad__openai_tools from "../agents/format_scratchpad/openai_tools.js"; -export * as agents__format_scratchpad__log from "../agents/format_scratchpad/log.js"; -export * as agents__format_scratchpad__xml from "../agents/format_scratchpad/xml.js"; -export * as agents__format_scratchpad__log_to_message from "../agents/format_scratchpad/log_to_message.js"; -export * as agents__react__output_parser from "../agents/react/output_parser.js"; -export * as agents__xml__output_parser from "../agents/xml/output_parser.js"; -export * as agents__openai__output_parser from "../agents/openai/output_parser.js"; -export * as base_language from "../base_language/index.js"; -export * as tools from "../tools/index.js"; +export * as tools__aiplugin from "../tools/aiplugin.js"; +export * as tools__bingserpapi from "../tools/bingserpapi.js"; +export * as tools__brave_search from "../tools/brave_search.js"; export * as tools__connery from "../tools/connery.js"; -export * as tools__render from "../tools/render.js"; +export * as tools__dadjokeapi from "../tools/dadjokeapi.js"; +export * as tools__dataforseo_api_search from "../tools/dataforseo_api_search.js"; +export * as tools__google_custom_search from "../tools/google_custom_search.js"; export * as tools__google_places from "../tools/google_places.js"; -export * as chains from "../chains/index.js"; -export * as chains__combine_documents__reduce from "../chains/combine_documents/reduce.js"; -export * as chains__openai_functions from "../chains/openai_functions/index.js"; -export * as embeddings__base from "../embeddings/base.js"; -export * as embeddings__cache_backed from "../embeddings/cache_backed.js"; -export * as embeddings__fake from "../embeddings/fake.js"; -export * as embeddings__ollama from "../embeddings/ollama.js"; -export * as embeddings__openai from "../embeddings/openai.js"; +export * as tools__ifttt from "../tools/ifttt.js"; +export * as tools__searchapi from "../tools/searchapi.js"; +export * as tools__searxng_search from "../tools/searxng_search.js"; +export * as tools__serpapi from "../tools/serpapi.js"; +export * as tools__serper from "../tools/serper.js"; +export * as tools__wikipedia_query_run from "../tools/wikipedia_query_run.js"; +export * as tools__wolframalpha from "../tools/wolframalpha.js"; export * as embeddings__minimax from "../embeddings/minimax.js"; +export * as embeddings__ollama from "../embeddings/ollama.js"; export * as embeddings__voyage from "../embeddings/voyage.js"; -export * as llms__base from "../llms/base.js"; -export * as llms__openai from "../llms/openai.js"; export * as llms__ai21 from "../llms/ai21.js"; export * as llms__aleph_alpha from "../llms/aleph_alpha.js"; export * as llms__cloudflare_workersai from "../llms/cloudflare_workersai.js"; -export * as llms__ollama from "../llms/ollama.js"; export * as llms__fireworks from "../llms/fireworks.js"; +export * as llms__ollama from "../llms/ollama.js"; export * as llms__yandex from "../llms/yandex.js"; -export * as llms__fake from "../llms/fake.js"; -export * as prompts from "../prompts/index.js"; -export * as vectorstores__base from "../vectorstores/base.js"; -export * as vectorstores__memory from "../vectorstores/memory.js"; export * as vectorstores__prisma from "../vectorstores/prisma.js"; export * as vectorstores__vectara from "../vectorstores/vectara.js"; export * as vectorstores__xata from "../vectorstores/xata.js"; -export * as text_splitter from "../text_splitter.js"; -export * as memory from "../memory/index.js"; -export * as document from "../document.js"; -export * as document_loaders__base from "../document_loaders/base.js"; -export * as document_loaders__web__searchapi from "../document_loaders/web/searchapi.js"; -export * as document_loaders__web__serpapi from "../document_loaders/web/serpapi.js"; -export * as document_loaders__web__sort_xyz_blockchain from "../document_loaders/web/sort_xyz_blockchain.js"; -export * as document_transformers__openai_functions from "../document_transformers/openai_functions.js"; -export * as chat_models__base from "../chat_models/base.js"; -export * as chat_models__openai from "../chat_models/openai.js"; -export * as chat_models__anthropic from "../chat_models/anthropic.js"; +export * as chat_models__baiduwenxin from "../chat_models/baiduwenxin.js"; export * as chat_models__cloudflare_workersai from "../chat_models/cloudflare_workersai.js"; export * as chat_models__fireworks from "../chat_models/fireworks.js"; -export * as chat_models__baiduwenxin from "../chat_models/baiduwenxin.js"; -export * as chat_models__ollama from "../chat_models/ollama.js"; export * as chat_models__minimax from "../chat_models/minimax.js"; +export * as chat_models__ollama from "../chat_models/ollama.js"; export * as chat_models__yandex from "../chat_models/yandex.js"; -export * as chat_models__fake from "../chat_models/fake.js"; -export * as schema from "../schema/index.js"; -export * as schema__document from "../schema/document.js"; -export * as schema__output_parser from "../schema/output_parser.js"; -export * as schema__prompt_template from "../schema/prompt_template.js"; -export * as schema__query_constructor from "../schema/query_constructor.js"; -export * as schema__retriever from "../schema/retriever.js"; -export * as schema__runnable from "../schema/runnable/index.js"; -export * as schema__storage from "../schema/storage.js"; -export * as callbacks from "../callbacks/index.js"; -export * as output_parsers from "../output_parsers/index.js"; -export * as retrievers__remote from "../retrievers/remote/index.js"; export * as retrievers__chaindesk from "../retrievers/chaindesk.js"; export * as retrievers__databerry from "../retrievers/databerry.js"; -export * as retrievers__contextual_compression from "../retrievers/contextual_compression.js"; -export * as retrievers__document_compressors from "../retrievers/document_compressors/index.js"; -export * as retrievers__multi_query from "../retrievers/multi_query.js"; -export * as retrievers__multi_vector from "../retrievers/multi_vector.js"; -export * as retrievers__parent_document from "../retrievers/parent_document.js"; export * as retrievers__tavily_search_api from "../retrievers/tavily_search_api.js"; -export * as retrievers__time_weighted from "../retrievers/time_weighted.js"; -export * as retrievers__document_compressors__chain_extract from "../retrievers/document_compressors/chain_extract.js"; -export * as retrievers__document_compressors__embeddings_filter from "../retrievers/document_compressors/embeddings_filter.js"; -export * as retrievers__hyde from "../retrievers/hyde.js"; -export * as retrievers__score_threshold from "../retrievers/score_threshold.js"; -export * as retrievers__vespa from "../retrievers/vespa.js"; -export * as cache from "../cache/index.js"; -export * as stores__doc__in_memory from "../stores/doc/in_memory.js"; -export * as stores__file__in_memory from "../stores/file/in_memory.js"; -export * as stores__message__in_memory from "../stores/message/in_memory.js"; -export * as storage__encoder_backed from "../storage/encoder_backed.js"; -export * as storage__in_memory from "../storage/in_memory.js"; -export * as util__document from "../util/document.js"; -export * as util__math from "../util/math.js"; -export * as util__time from "../util/time.js"; -export * as experimental__autogpt from "../experimental/autogpt/index.js"; -export * as experimental__openai_assistant from "../experimental/openai_assistant/index.js"; -export * as experimental__openai_files from "../experimental/openai_files/index.js"; -export * as experimental__babyagi from "../experimental/babyagi/index.js"; -export * as experimental__generative_agents from "../experimental/generative_agents/index.js"; -export * as experimental__plan_and_execute from "../experimental/plan_and_execute/index.js"; -export * as experimental__chat_models__bittensor from "../experimental/chat_models/bittensor.js"; -export * as experimental__chat_models__ollama_functions from "../experimental/chat_models/ollama_functions.js"; -export * as experimental__chains__violation_of_expectations from "../experimental/chains/violation_of_expectations/index.js"; -export * as evaluation from "../evaluation/index.js"; -export * as runnables from "../runnables/index.js"; -export * as runnables__remote from "../runnables/remote.js"; diff --git a/libs/langchain-community/src/load/import_type.d.ts b/libs/langchain-community/src/load/import_type.d.ts index 9b2b3abbde95..81cdc29e7d71 100644 --- a/libs/langchain-community/src/load/import_type.d.ts +++ b/libs/langchain-community/src/load/import_type.d.ts @@ -1,531 +1,210 @@ // Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. export interface OptionalImportMap { - "langchain/agents/load"?: - | typeof import("../agents/load.js") - | Promise; - "langchain/agents/toolkits/aws_sfn"?: - | typeof import("../agents/toolkits/aws_sfn.js") - | Promise; - "langchain/agents/toolkits/sql"?: - | typeof import("../agents/toolkits/sql/index.js") - | Promise; - "langchain/tools/aws_lambda"?: - | typeof import("../tools/aws_lambda.js") - | Promise; - "langchain/tools/aws_sfn"?: + "@langchain/community/tools/aws_sfn"?: | typeof import("../tools/aws_sfn.js") | Promise; - "langchain/tools/calculator"?: - | typeof import("../tools/calculator.js") - | Promise; - "langchain/tools/sql"?: - | typeof import("../tools/sql.js") - | Promise; - "langchain/tools/webbrowser"?: - | typeof import("../tools/webbrowser.js") - | Promise; - "langchain/tools/gmail"?: + "@langchain/community/tools/gmail"?: | typeof import("../tools/gmail/index.js") | Promise; - "langchain/tools/google_calendar"?: - | typeof import("../tools/google_calendar/index.js") - | Promise; - "langchain/chains/load"?: - | typeof import("../chains/load.js") - | Promise; - "langchain/chains/query_constructor"?: - | typeof import("../chains/query_constructor/index.js") - | Promise; - "langchain/chains/query_constructor/ir"?: - | typeof import("../chains/query_constructor/ir.js") - | Promise; - "langchain/chains/sql_db"?: - | typeof import("../chains/sql_db/index.js") - | Promise; - "langchain/chains/graph_qa/cypher"?: - | typeof import("../chains/graph_qa/cypher.js") - | Promise; - "langchain/embeddings/bedrock"?: + "@langchain/community/embeddings/bedrock"?: | typeof import("../embeddings/bedrock.js") | Promise; - "langchain/embeddings/cloudflare_workersai"?: + "@langchain/community/embeddings/cloudflare_workersai"?: | typeof import("../embeddings/cloudflare_workersai.js") | Promise; - "langchain/embeddings/cohere"?: + "@langchain/community/embeddings/cohere"?: | typeof import("../embeddings/cohere.js") | Promise; - "langchain/embeddings/tensorflow"?: - | typeof import("../embeddings/tensorflow.js") - | Promise; - "langchain/embeddings/hf"?: + "@langchain/community/embeddings/googlepalm"?: + | typeof import("../embeddings/googlepalm.js") + | Promise; + "@langchain/community/embeddings/googlevertexai"?: + | typeof import("../embeddings/googlevertexai.js") + | Promise; + "@langchain/community/embeddings/gradient_ai"?: + | typeof import("../embeddings/gradient_ai.js") + | Promise; + "@langchain/community/embeddings/hf"?: | typeof import("../embeddings/hf.js") | Promise; - "langchain/embeddings/hf_transformers"?: + "@langchain/community/embeddings/hf_transformers"?: | typeof import("../embeddings/hf_transformers.js") | Promise; - "langchain/embeddings/googlevertexai"?: - | typeof import("../embeddings/googlevertexai.js") - | Promise; - "langchain/embeddings/googlepalm"?: - | typeof import("../embeddings/googlepalm.js") - | Promise; - "langchain/embeddings/llama_cpp"?: + "@langchain/community/embeddings/llama_cpp"?: | typeof import("../embeddings/llama_cpp.js") | Promise; - "langchain/embeddings/gradient_ai"?: - | typeof import("../embeddings/gradient_ai.js") - | Promise; - "langchain/llms/load"?: - | typeof import("../llms/load.js") - | Promise; - "langchain/llms/cohere"?: + "@langchain/community/embeddings/tensorflow"?: + | typeof import("../embeddings/tensorflow.js") + | Promise; + "@langchain/community/llms/bedrock"?: + | typeof import("../llms/bedrock/index.js") + | Promise; + "@langchain/community/llms/bedrock/web"?: + | typeof import("../llms/bedrock/web.js") + | Promise; + "@langchain/community/llms/cohere"?: | typeof import("../llms/cohere.js") | Promise; - "langchain/llms/hf"?: - | typeof import("../llms/hf.js") - | Promise; - "langchain/llms/raycast"?: - | typeof import("../llms/raycast.js") - | Promise; - "langchain/llms/replicate"?: - | typeof import("../llms/replicate.js") - | Promise; - "langchain/llms/googlevertexai"?: + "@langchain/community/llms/googlepalm"?: + | typeof import("../llms/googlepalm.js") + | Promise; + "@langchain/community/llms/googlevertexai"?: | typeof import("../llms/googlevertexai/index.js") | Promise; - "langchain/llms/googlevertexai/web"?: + "@langchain/community/llms/googlevertexai/web"?: | typeof import("../llms/googlevertexai/web.js") | Promise; - "langchain/llms/googlepalm"?: - | typeof import("../llms/googlepalm.js") - | Promise; - "langchain/llms/gradient_ai"?: + "@langchain/community/llms/gradient_ai"?: | typeof import("../llms/gradient_ai.js") | Promise; - "langchain/llms/sagemaker_endpoint"?: + "@langchain/community/llms/hf"?: + | typeof import("../llms/hf.js") + | Promise; + "@langchain/community/llms/llama_cpp"?: + | typeof import("../llms/llama_cpp.js") + | Promise; + "@langchain/community/llms/portkey"?: + | typeof import("../llms/portkey.js") + | Promise; + "@langchain/community/llms/raycast"?: + | typeof import("../llms/raycast.js") + | Promise; + "@langchain/community/llms/replicate"?: + | typeof import("../llms/replicate.js") + | Promise; + "@langchain/community/llms/sagemaker_endpoint"?: | typeof import("../llms/sagemaker_endpoint.js") | Promise; - "langchain/llms/watsonx_ai"?: + "@langchain/community/llms/watsonx_ai"?: | typeof import("../llms/watsonx_ai.js") | Promise; - "langchain/llms/bedrock"?: - | typeof import("../llms/bedrock/index.js") - | Promise; - "langchain/llms/bedrock/web"?: - | typeof import("../llms/bedrock/web.js") - | Promise; - "langchain/llms/llama_cpp"?: - | typeof import("../llms/llama_cpp.js") - | Promise; - "langchain/llms/writer"?: + "@langchain/community/llms/writer"?: | typeof import("../llms/writer.js") | Promise; - "langchain/llms/portkey"?: - | typeof import("../llms/portkey.js") - | Promise; - "langchain/prompts/load"?: - | typeof import("../prompts/load.js") - | Promise; - "langchain/vectorstores/clickhouse"?: - | typeof import("../vectorstores/clickhouse.js") - | Promise; - "langchain/vectorstores/analyticdb"?: + "@langchain/community/vectorstores/analyticdb"?: | typeof import("../vectorstores/analyticdb.js") | Promise; - "langchain/vectorstores/cassandra"?: + "@langchain/community/vectorstores/cassandra"?: | typeof import("../vectorstores/cassandra.js") | Promise; - "langchain/vectorstores/convex"?: + "@langchain/community/vectorstores/chroma"?: + | typeof import("../vectorstores/chroma.js") + | Promise; + "@langchain/community/vectorstores/clickhouse"?: + | typeof import("../vectorstores/clickhouse.js") + | Promise; + "@langchain/community/vectorstores/cloudflare_vectorize"?: + | typeof import("../vectorstores/cloudflare_vectorize.js") + | Promise; + "@langchain/community/vectorstores/convex"?: | typeof import("../vectorstores/convex.js") | Promise; - "langchain/vectorstores/elasticsearch"?: + "@langchain/community/vectorstores/elasticsearch"?: | typeof import("../vectorstores/elasticsearch.js") | Promise; - "langchain/vectorstores/cloudflare_vectorize"?: - | typeof import("../vectorstores/cloudflare_vectorize.js") - | Promise; - "langchain/vectorstores/closevector/web"?: - | typeof import("../vectorstores/closevector/web.js") - | Promise; - "langchain/vectorstores/closevector/node"?: - | typeof import("../vectorstores/closevector/node.js") - | Promise; - "langchain/vectorstores/chroma"?: - | typeof import("../vectorstores/chroma.js") - | Promise; - "langchain/vectorstores/googlevertexai"?: - | typeof import("../vectorstores/googlevertexai.js") - | Promise; - "langchain/vectorstores/hnswlib"?: - | typeof import("../vectorstores/hnswlib.js") - | Promise; - "langchain/vectorstores/faiss"?: - | typeof import("../vectorstores/faiss.js") - | Promise; - "langchain/vectorstores/weaviate"?: - | typeof import("../vectorstores/weaviate.js") - | Promise; - "langchain/vectorstores/lancedb"?: + "@langchain/community/vectorstores/lancedb"?: | typeof import("../vectorstores/lancedb.js") | Promise; - "langchain/vectorstores/momento_vector_index"?: - | typeof import("../vectorstores/momento_vector_index.js") - | Promise; - "langchain/vectorstores/mongo"?: - | typeof import("../vectorstores/mongo.js") - | Promise; - "langchain/vectorstores/mongodb_atlas"?: - | typeof import("../vectorstores/mongodb_atlas.js") - | Promise; - "langchain/vectorstores/pinecone"?: - | typeof import("../vectorstores/pinecone.js") - | Promise; - "langchain/vectorstores/qdrant"?: - | typeof import("../vectorstores/qdrant.js") - | Promise; - "langchain/vectorstores/supabase"?: - | typeof import("../vectorstores/supabase.js") - | Promise; - "langchain/vectorstores/opensearch"?: - | typeof import("../vectorstores/opensearch.js") - | Promise; - "langchain/vectorstores/pgvector"?: - | typeof import("../vectorstores/pgvector.js") - | Promise; - "langchain/vectorstores/milvus"?: + "@langchain/community/vectorstores/milvus"?: | typeof import("../vectorstores/milvus.js") | Promise; - "langchain/vectorstores/neo4j_vector"?: - | typeof import("../vectorstores/neo4j_vector.js") - | Promise; - "langchain/vectorstores/typeorm"?: - | typeof import("../vectorstores/typeorm.js") - | Promise; - "langchain/vectorstores/myscale"?: + "@langchain/community/vectorstores/myscale"?: | typeof import("../vectorstores/myscale.js") | Promise; - "langchain/vectorstores/redis"?: + "@langchain/community/vectorstores/neo4j_vector"?: + | typeof import("../vectorstores/neo4j_vector.js") + | Promise; + "@langchain/community/vectorstores/opensearch"?: + | typeof import("../vectorstores/opensearch.js") + | Promise; + "@langchain/community/vectorstores/qdrant"?: + | typeof import("../vectorstores/qdrant.js") + | Promise; + "@langchain/community/vectorstores/redis"?: | typeof import("../vectorstores/redis.js") | Promise; - "langchain/vectorstores/rockset"?: + "@langchain/community/vectorstores/rockset"?: | typeof import("../vectorstores/rockset.js") | Promise; - "langchain/vectorstores/typesense"?: - | typeof import("../vectorstores/typesense.js") - | Promise; - "langchain/vectorstores/singlestore"?: + "@langchain/community/vectorstores/singlestore"?: | typeof import("../vectorstores/singlestore.js") | Promise; - "langchain/vectorstores/tigris"?: + "@langchain/community/vectorstores/tigris"?: | typeof import("../vectorstores/tigris.js") | Promise; - "langchain/vectorstores/usearch"?: - | typeof import("../vectorstores/usearch.js") - | Promise; - "langchain/vectorstores/vercel_postgres"?: + "@langchain/community/vectorstores/typeorm"?: + | typeof import("../vectorstores/typeorm.js") + | Promise; + "@langchain/community/vectorstores/typesense"?: + | typeof import("../vectorstores/typesense.js") + | Promise; + "@langchain/community/vectorstores/vercel_postgres"?: | typeof import("../vectorstores/vercel_postgres.js") | Promise; - "langchain/vectorstores/voy"?: + "@langchain/community/vectorstores/voy"?: | typeof import("../vectorstores/voy.js") | Promise; - "langchain/vectorstores/zep"?: - | typeof import("../vectorstores/zep.js") - | Promise; - "langchain/memory/zep"?: - | typeof import("../memory/zep.js") - | Promise; - "langchain/document_loaders/web/apify_dataset"?: - | typeof import("../document_loaders/web/apify_dataset.js") - | Promise; - "langchain/document_loaders/web/assemblyai"?: - | typeof import("../document_loaders/web/assemblyai.js") - | Promise; - "langchain/document_loaders/web/azure_blob_storage_container"?: - | typeof import("../document_loaders/web/azure_blob_storage_container.js") - | Promise; - "langchain/document_loaders/web/azure_blob_storage_file"?: - | typeof import("../document_loaders/web/azure_blob_storage_file.js") - | Promise; - "langchain/document_loaders/web/cheerio"?: - | typeof import("../document_loaders/web/cheerio.js") - | Promise; - "langchain/document_loaders/web/puppeteer"?: - | typeof import("../document_loaders/web/puppeteer.js") - | Promise; - "langchain/document_loaders/web/playwright"?: - | typeof import("../document_loaders/web/playwright.js") - | Promise; - "langchain/document_loaders/web/college_confidential"?: - | typeof import("../document_loaders/web/college_confidential.js") - | Promise; - "langchain/document_loaders/web/gitbook"?: - | typeof import("../document_loaders/web/gitbook.js") - | Promise; - "langchain/document_loaders/web/hn"?: - | typeof import("../document_loaders/web/hn.js") - | Promise; - "langchain/document_loaders/web/imsdb"?: - | typeof import("../document_loaders/web/imsdb.js") - | Promise; - "langchain/document_loaders/web/figma"?: - | typeof import("../document_loaders/web/figma.js") - | Promise; - "langchain/document_loaders/web/github"?: - | typeof import("../document_loaders/web/github.js") - | Promise; - "langchain/document_loaders/web/notiondb"?: - | typeof import("../document_loaders/web/notiondb.js") - | Promise; - "langchain/document_loaders/web/notionapi"?: - | typeof import("../document_loaders/web/notionapi.js") - | Promise; - "langchain/document_loaders/web/pdf"?: - | typeof import("../document_loaders/web/pdf.js") - | Promise; - "langchain/document_loaders/web/recursive_url"?: - | typeof import("../document_loaders/web/recursive_url.js") - | Promise; - "langchain/document_loaders/web/s3"?: - | typeof import("../document_loaders/web/s3.js") - | Promise; - "langchain/document_loaders/web/sonix_audio"?: - | typeof import("../document_loaders/web/sonix_audio.js") - | Promise; - "langchain/document_loaders/web/confluence"?: - | typeof import("../document_loaders/web/confluence.js") - | Promise; - "langchain/document_loaders/web/youtube"?: - | typeof import("../document_loaders/web/youtube.js") - | Promise; - "langchain/document_loaders/fs/directory"?: - | typeof import("../document_loaders/fs/directory.js") - | Promise; - "langchain/document_loaders/fs/buffer"?: - | typeof import("../document_loaders/fs/buffer.js") - | Promise; - "langchain/document_loaders/fs/chatgpt"?: - | typeof import("../document_loaders/fs/chatgpt.js") - | Promise; - "langchain/document_loaders/fs/text"?: - | typeof import("../document_loaders/fs/text.js") - | Promise; - "langchain/document_loaders/fs/json"?: - | typeof import("../document_loaders/fs/json.js") - | Promise; - "langchain/document_loaders/fs/srt"?: - | typeof import("../document_loaders/fs/srt.js") - | Promise; - "langchain/document_loaders/fs/pdf"?: - | typeof import("../document_loaders/fs/pdf.js") - | Promise; - "langchain/document_loaders/fs/docx"?: - | typeof import("../document_loaders/fs/docx.js") - | Promise; - "langchain/document_loaders/fs/epub"?: - | typeof import("../document_loaders/fs/epub.js") - | Promise; - "langchain/document_loaders/fs/csv"?: - | typeof import("../document_loaders/fs/csv.js") - | Promise; - "langchain/document_loaders/fs/notion"?: - | typeof import("../document_loaders/fs/notion.js") - | Promise; - "langchain/document_loaders/fs/obsidian"?: - | typeof import("../document_loaders/fs/obsidian.js") - | Promise; - "langchain/document_loaders/fs/unstructured"?: - | typeof import("../document_loaders/fs/unstructured.js") - | Promise; - "langchain/document_loaders/fs/openai_whisper_audio"?: - | typeof import("../document_loaders/fs/openai_whisper_audio.js") - | Promise; - "langchain/document_loaders/fs/pptx"?: - | typeof import("../document_loaders/fs/pptx.js") - | Promise; - "langchain/document_transformers/html_to_text"?: - | typeof import("../document_transformers/html_to_text.js") - | Promise; - "langchain/document_transformers/mozilla_readability"?: - | typeof import("../document_transformers/mozilla_readability.js") - | Promise; - "langchain/chat_models/portkey"?: - | typeof import("../chat_models/portkey.js") - | Promise; - "langchain/chat_models/bedrock"?: + "@langchain/community/chat_models/bedrock"?: | typeof import("../chat_models/bedrock/index.js") | Promise; - "langchain/chat_models/bedrock/web"?: + "@langchain/community/chat_models/bedrock/web"?: | typeof import("../chat_models/bedrock/web.js") | Promise; - "langchain/chat_models/googlevertexai"?: + "@langchain/community/chat_models/googlevertexai"?: | typeof import("../chat_models/googlevertexai/index.js") | Promise; - "langchain/chat_models/googlevertexai/web"?: + "@langchain/community/chat_models/googlevertexai/web"?: | typeof import("../chat_models/googlevertexai/web.js") | Promise; - "langchain/chat_models/googlepalm"?: + "@langchain/community/chat_models/googlepalm"?: | typeof import("../chat_models/googlepalm.js") | Promise; - "langchain/chat_models/iflytek_xinghuo"?: + "@langchain/community/chat_models/iflytek_xinghuo"?: | typeof import("../chat_models/iflytek_xinghuo/index.js") | Promise; - "langchain/chat_models/iflytek_xinghuo/web"?: + "@langchain/community/chat_models/iflytek_xinghuo/web"?: | typeof import("../chat_models/iflytek_xinghuo/web.js") | Promise; - "langchain/chat_models/llama_cpp"?: + "@langchain/community/chat_models/llama_cpp"?: | typeof import("../chat_models/llama_cpp.js") | Promise; - "langchain/sql_db"?: - | typeof import("../sql_db.js") - | Promise; - "langchain/callbacks/handlers/llmonitor"?: + "@langchain/community/chat_models/portkey"?: + | typeof import("../chat_models/portkey.js") + | Promise; + "@langchain/community/callbacks/handlers/llmonitor"?: | typeof import("../callbacks/handlers/llmonitor.js") | Promise; - "langchain/output_parsers/expression"?: - | typeof import("../output_parsers/expression.js") - | Promise; - "langchain/retrievers/amazon_kendra"?: + "@langchain/community/retrievers/amazon_kendra"?: | typeof import("../retrievers/amazon_kendra.js") | Promise; - "langchain/retrievers/supabase"?: + "@langchain/community/retrievers/metal"?: + | typeof import("../retrievers/metal.js") + | Promise; + "@langchain/community/retrievers/supabase"?: | typeof import("../retrievers/supabase.js") | Promise; - "langchain/retrievers/zep"?: + "@langchain/community/retrievers/zep"?: | typeof import("../retrievers/zep.js") | Promise; - "langchain/retrievers/metal"?: - | typeof import("../retrievers/metal.js") - | Promise; - "langchain/retrievers/self_query"?: - | typeof import("../retrievers/self_query/index.js") - | Promise; - "langchain/retrievers/self_query/chroma"?: - | typeof import("../retrievers/self_query/chroma.js") - | Promise; - "langchain/retrievers/self_query/functional"?: - | typeof import("../retrievers/self_query/functional.js") - | Promise; - "langchain/retrievers/self_query/pinecone"?: - | typeof import("../retrievers/self_query/pinecone.js") - | Promise; - "langchain/retrievers/self_query/supabase"?: - | typeof import("../retrievers/self_query/supabase.js") - | Promise; - "langchain/retrievers/self_query/weaviate"?: - | typeof import("../retrievers/self_query/weaviate.js") - | Promise; - "langchain/retrievers/self_query/vectara"?: - | typeof import("../retrievers/self_query/vectara.js") - | Promise; - "langchain/cache/cloudflare_kv"?: + "@langchain/community/cache/cloudflare_kv"?: | typeof import("../cache/cloudflare_kv.js") | Promise; - "langchain/cache/momento"?: + "@langchain/community/cache/momento"?: | typeof import("../cache/momento.js") | Promise; - "langchain/cache/redis"?: - | typeof import("../cache/redis.js") - | Promise; - "langchain/cache/ioredis"?: - | typeof import("../cache/ioredis.js") - | Promise; - "langchain/cache/file_system"?: - | typeof import("../cache/file_system.js") - | Promise; - "langchain/cache/upstash_redis"?: + "@langchain/community/cache/upstash_redis"?: | typeof import("../cache/upstash_redis.js") | Promise; - "langchain/stores/doc/gcs"?: - | typeof import("../stores/doc/gcs.js") - | Promise; - "langchain/stores/file/node"?: - | typeof import("../stores/file/node.js") - | Promise; - "langchain/stores/message/cassandra"?: - | typeof import("../stores/message/cassandra.js") - | Promise; - "langchain/stores/message/convex"?: - | typeof import("../stores/message/convex.js") - | Promise; - "langchain/stores/message/cloudflare_d1"?: - | typeof import("../stores/message/cloudflare_d1.js") - | Promise; - "langchain/stores/message/dynamodb"?: - | typeof import("../stores/message/dynamodb.js") - | Promise; - "langchain/stores/message/firestore"?: - | typeof import("../stores/message/firestore.js") - | Promise; - "langchain/stores/message/momento"?: - | typeof import("../stores/message/momento.js") - | Promise; - "langchain/stores/message/mongodb"?: - | typeof import("../stores/message/mongodb.js") - | Promise; - "langchain/stores/message/redis"?: - | typeof import("../stores/message/redis.js") - | Promise; - "langchain/stores/message/ioredis"?: - | typeof import("../stores/message/ioredis.js") - | Promise; - "langchain/stores/message/upstash_redis"?: - | typeof import("../stores/message/upstash_redis.js") - | Promise; - "langchain/stores/message/planetscale"?: - | typeof import("../stores/message/planetscale.js") - | Promise; - "langchain/stores/message/xata"?: - | typeof import("../stores/message/xata.js") - | Promise; - "langchain/storage/convex"?: - | typeof import("../storage/convex.js") - | Promise; - "langchain/storage/ioredis"?: - | typeof import("../storage/ioredis.js") - | Promise; - "langchain/storage/vercel_kv"?: - | typeof import("../storage/vercel_kv.js") - | Promise; - "langchain/storage/upstash_redis"?: - | typeof import("../storage/upstash_redis.js") - | Promise; - "langchain/storage/file_system"?: - | typeof import("../storage/file_system.js") - | Promise; - "langchain/graphs/neo4j_graph"?: + "@langchain/community/graphs/neo4j_graph"?: | typeof import("../graphs/neo4j_graph.js") | Promise; - "langchain/hub"?: - | typeof import("../hub.js") - | Promise; - "langchain/util/convex"?: - | typeof import("../util/convex.js") - | Promise; - "langchain/experimental/multimodal_embeddings/googlevertexai"?: - | typeof import("../experimental/multimodal_embeddings/googlevertexai.js") - | Promise; - "langchain/experimental/chat_models/anthropic_functions"?: - | typeof import("../experimental/chat_models/anthropic_functions.js") - | Promise; - "langchain/experimental/llms/bittensor"?: - | typeof import("../experimental/llms/bittensor.js") - | Promise; - "langchain/experimental/hubs/makersuite/googlemakersuitehub"?: - | typeof import("../experimental/hubs/makersuite/googlemakersuitehub.js") - | Promise; - "langchain/experimental/tools/pyinterpreter"?: - | typeof import("../experimental/tools/pyinterpreter.js") - | Promise; } export interface SecretMap { - ANTHROPIC_API_KEY?: string; AWS_ACCESS_KEY_ID?: string; - AWS_SECRETE_ACCESS_KEY?: string; AWS_SECRET_ACCESS_KEY?: string; AWS_SESSION_TOKEN?: string; - AZURE_OPENAI_API_KEY?: string; BAIDU_API_KEY?: string; BAIDU_SECRET_KEY?: string; BEDROCK_AWS_ACCESS_KEY_ID?: string; @@ -549,23 +228,10 @@ export interface SecretMap { MILVUS_USERNAME?: string; MINIMAX_API_KEY?: string; MINIMAX_GROUP_ID?: string; - OPENAI_API_KEY?: string; - OPENAI_ORGANIZATION?: string; - PLANETSCALE_DATABASE_URL?: string; - PLANETSCALE_HOST?: string; - PLANETSCALE_PASSWORD?: string; - PLANETSCALE_USERNAME?: string; - PROMPTLAYER_API_KEY?: string; QDRANT_API_KEY?: string; QDRANT_URL?: string; - REDIS_PASSWORD?: string; - REDIS_URL?: string; - REDIS_USERNAME?: string; - REMOTE_RETRIEVER_AUTH_BEARER?: string; REPLICATE_API_TOKEN?: string; SEARXNG_API_BASE?: string; - UPSTASH_REDIS_REST_TOKEN?: string; - UPSTASH_REDIS_REST_URL?: string; VECTARA_API_KEY?: string; VECTARA_CORPUS_ID?: string; VECTARA_CUSTOMER_ID?: string; @@ -574,7 +240,6 @@ export interface SecretMap { WRITER_ORG_ID?: string; YC_API_KEY?: string; YC_IAM_TOKEN?: string; - ZAPIER_NLA_API_KEY?: string; ZEP_API_KEY?: string; ZEP_API_URL?: string; } diff --git a/libs/langchain-community/src/retrievers/amazon_kendra.ts b/libs/langchain-community/src/retrievers/amazon_kendra.ts new file mode 100644 index 000000000000..d61600fbe76a --- /dev/null +++ b/libs/langchain-community/src/retrievers/amazon_kendra.ts @@ -0,0 +1,317 @@ +import { + AttributeFilter, + DocumentAttribute, + DocumentAttributeValue, + KendraClient, + KendraClientConfig, + QueryCommand, + QueryCommandOutput, + QueryResultItem, + RetrieveCommand, + RetrieveCommandOutput, + RetrieveResultItem, +} from "@aws-sdk/client-kendra"; + +import { BaseRetriever } from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; + +/** + * Interface for the arguments required to initialize an + * AmazonKendraRetriever instance. + */ +export interface AmazonKendraRetrieverArgs { + indexId: string; + topK: number; + region: string; + attributeFilter?: AttributeFilter; + clientOptions?: KendraClientConfig; +} + +/** + * Class for interacting with Amazon Kendra, an intelligent search service + * provided by AWS. Extends the BaseRetriever class. + * @example + * ```typescript + * const retriever = new AmazonKendraRetriever({ + * topK: 10, + * indexId: "YOUR_INDEX_ID", + * region: "us-east-2", + * clientOptions: { + * credentials: { + * accessKeyId: "YOUR_ACCESS_KEY_ID", + * secretAccessKey: "YOUR_SECRET_ACCESS_KEY", + * }, + * }, + * }); + * + * const docs = await retriever.getRelevantDocuments("How are clouds formed?"); + * ``` + */ +export class AmazonKendraRetriever extends BaseRetriever { + static lc_name() { + return "AmazonKendraRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "amazon_kendra"]; + + indexId: string; + + topK: number; + + kendraClient: KendraClient; + + attributeFilter?: AttributeFilter; + + constructor({ + indexId, + topK = 10, + clientOptions, + attributeFilter, + region, + }: AmazonKendraRetrieverArgs) { + super(); + + if (!region) { + throw new Error("Please pass regionName field to the constructor!"); + } + + if (!indexId) { + throw new Error("Please pass Kendra Index Id to the constructor"); + } + + this.topK = topK; + this.kendraClient = new KendraClient({ + region, + ...clientOptions, + }); + this.attributeFilter = attributeFilter; + this.indexId = indexId; + } + + // A method to combine title and excerpt into a single string. + /** + * Combines title and excerpt into a single string. + * @param title The title of the document. + * @param excerpt An excerpt from the document. + * @returns A single string combining the title and excerpt. + */ + combineText(title?: string, excerpt?: string): string { + let text = ""; + if (title) { + text += `Document Title: ${title}\n`; + } + if (excerpt) { + text += `Document Excerpt: \n${excerpt}\n`; + } + return text; + } + + // A method to clean the result text by replacing sequences of whitespace with a single space and removing ellipses. + /** + * Cleans the result text by replacing sequences of whitespace with a + * single space and removing ellipses. + * @param resText The result text to clean. + * @returns The cleaned result text. + */ + cleanResult(resText: string) { + const res = resText.replace(/\s+/g, " ").replace(/\.\.\./g, ""); + return res; + } + + // A method to extract the attribute value from a DocumentAttributeValue object. + /** + * Extracts the attribute value from a DocumentAttributeValue object. + * @param value The DocumentAttributeValue object to extract the value from. + * @returns The extracted attribute value. + */ + getDocAttributeValue(value: DocumentAttributeValue) { + if (value.DateValue) { + return value.DateValue; + } + if (value.LongValue) { + return value.LongValue; + } + if (value.StringListValue) { + return value.StringListValue; + } + if (value.StringValue) { + return value.StringValue; + } + return ""; + } + + // A method to extract the attribute key-value pairs from an array of DocumentAttribute objects. + /** + * Extracts the attribute key-value pairs from an array of + * DocumentAttribute objects. + * @param documentAttributes The array of DocumentAttribute objects to extract the key-value pairs from. + * @returns An object containing the extracted attribute key-value pairs. + */ + getDocAttributes(documentAttributes?: DocumentAttribute[]): { + [key: string]: unknown; + } { + const attributes: { [key: string]: unknown } = {}; + if (documentAttributes) { + for (const attr of documentAttributes) { + if (attr.Key && attr.Value) { + attributes[attr.Key] = this.getDocAttributeValue(attr.Value); + } + } + } + return attributes; + } + + // A method to convert a RetrieveResultItem object into a Document object. + /** + * Converts a RetrieveResultItem object into a Document object. + * @param item The RetrieveResultItem object to convert. + * @returns A Document object. + */ + convertRetrieverItem(item: RetrieveResultItem) { + const title = item.DocumentTitle || ""; + const excerpt = item.Content ? this.cleanResult(item.Content) : ""; + const pageContent = this.combineText(title, excerpt); + const source = item.DocumentURI; + const attributes = this.getDocAttributes(item.DocumentAttributes); + const metadata = { + source, + title, + excerpt, + document_attributes: attributes, + }; + + return new Document({ pageContent, metadata }); + } + + // A method to extract the top-k documents from a RetrieveCommandOutput object. + /** + * Extracts the top-k documents from a RetrieveCommandOutput object. + * @param response The RetrieveCommandOutput object to extract the documents from. + * @param pageSize The number of documents to extract. + * @returns An array of Document objects. + */ + getRetrieverDocs( + response: RetrieveCommandOutput, + pageSize: number + ): Document[] { + if (!response.ResultItems) return []; + const { length } = response.ResultItems; + const count = length < pageSize ? length : pageSize; + + return response.ResultItems.slice(0, count).map((item) => + this.convertRetrieverItem(item) + ); + } + + // A method to extract the excerpt text from a QueryResultItem object. + /** + * Extracts the excerpt text from a QueryResultItem object. + * @param item The QueryResultItem object to extract the excerpt text from. + * @returns The extracted excerpt text. + */ + getQueryItemExcerpt(item: QueryResultItem) { + if ( + item.AdditionalAttributes && + item.AdditionalAttributes.length && + item.AdditionalAttributes[0].Key === "AnswerText" + ) { + if (!item.AdditionalAttributes) { + return ""; + } + if (!item.AdditionalAttributes[0]) { + return ""; + } + + return this.cleanResult( + item.AdditionalAttributes[0].Value?.TextWithHighlightsValue?.Text || "" + ); + } else if (item.DocumentExcerpt) { + return this.cleanResult(item.DocumentExcerpt.Text || ""); + } else { + return ""; + } + } + + // A method to convert a QueryResultItem object into a Document object. + /** + * Converts a QueryResultItem object into a Document object. + * @param item The QueryResultItem object to convert. + * @returns A Document object. + */ + convertQueryItem(item: QueryResultItem) { + const title = item.DocumentTitle?.Text || ""; + const excerpt = this.getQueryItemExcerpt(item); + const pageContent = this.combineText(title, excerpt); + const source = item.DocumentURI; + const attributes = this.getDocAttributes(item.DocumentAttributes); + const metadata = { + source, + title, + excerpt, + document_attributes: attributes, + }; + + return new Document({ pageContent, metadata }); + } + + // A method to extract the top-k documents from a QueryCommandOutput object. + /** + * Extracts the top-k documents from a QueryCommandOutput object. + * @param response The QueryCommandOutput object to extract the documents from. + * @param pageSize The number of documents to extract. + * @returns An array of Document objects. + */ + getQueryDocs(response: QueryCommandOutput, pageSize: number) { + if (!response.ResultItems) return []; + const { length } = response.ResultItems; + const count = length < pageSize ? length : pageSize; + return response.ResultItems.slice(0, count).map((item) => + this.convertQueryItem(item) + ); + } + + // A method to send a retrieve or query request to Kendra and return the top-k documents. + /** + * Sends a retrieve or query request to Kendra and returns the top-k + * documents. + * @param query The query to send to Kendra. + * @param topK The number of top documents to return. + * @param attributeFilter Optional filter to apply when retrieving documents. + * @returns A Promise that resolves to an array of Document objects. + */ + async queryKendra( + query: string, + topK: number, + attributeFilter?: AttributeFilter + ) { + const retrieveCommand = new RetrieveCommand({ + IndexId: this.indexId, + QueryText: query, + PageSize: topK, + AttributeFilter: attributeFilter, + }); + + const retrieveResponse = await this.kendraClient.send(retrieveCommand); + const retriveLength = retrieveResponse.ResultItems?.length; + + if (retriveLength === 0) { + // Retrieve API returned 0 results, call query API + const queryCommand = new QueryCommand({ + IndexId: this.indexId, + QueryText: query, + PageSize: topK, + AttributeFilter: attributeFilter, + }); + + const queryResponse = await this.kendraClient.send(queryCommand); + return this.getQueryDocs(queryResponse, this.topK); + } else { + return this.getRetrieverDocs(retrieveResponse, this.topK); + } + } + + async _getRelevantDocuments(query: string): Promise { + const docs = await this.queryKendra(query, this.topK, this.attributeFilter); + return docs; + } +} diff --git a/libs/langchain-community/src/retrievers/chaindesk.ts b/libs/langchain-community/src/retrievers/chaindesk.ts new file mode 100644 index 000000000000..317006c8f29d --- /dev/null +++ b/libs/langchain-community/src/retrievers/chaindesk.ts @@ -0,0 +1,97 @@ +import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; +import { AsyncCaller, type AsyncCallerParams } from "@langchain/core/utils/async_caller"; + +export interface ChaindeskRetrieverArgs + extends AsyncCallerParams, + BaseRetrieverInput { + datastoreId: string; + topK?: number; + filter?: Record; + apiKey?: string; +} + +interface Berry { + text: string; + score: number; + source?: string; + [key: string]: unknown; +} + +/** + * @example + * ```typescript + * const retriever = new ChaindeskRetriever({ + * datastoreId: "DATASTORE_ID", + * apiKey: "CHAINDESK_API_KEY", + * topK: 8, + * }); + * const docs = await retriever.getRelevantDocuments("hello"); + * ``` + */ +export class ChaindeskRetriever extends BaseRetriever { + static lc_name() { + return "ChaindeskRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "chaindesk"]; + + caller: AsyncCaller; + + datastoreId: string; + + topK?: number; + + filter?: Record; + + apiKey?: string; + + constructor({ + datastoreId, + apiKey, + topK, + filter, + ...rest + }: ChaindeskRetrieverArgs) { + super(); + + this.caller = new AsyncCaller(rest); + this.datastoreId = datastoreId; + this.apiKey = apiKey; + this.topK = topK; + this.filter = filter; + } + + async getRelevantDocuments(query: string): Promise { + const r = await this.caller.call( + fetch, + `https://app.chaindesk.ai/api/datastores/${this.datastoreId}/query`, + { + method: "POST", + body: JSON.stringify({ + query, + ...(this.topK ? { topK: this.topK } : {}), + ...(this.filter ? { filters: this.filter } : {}), + }), + headers: { + "Content-Type": "application/json", + ...(this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}), + }, + } + ); + + const { results } = (await r.json()) as { results: Berry[] }; + + return results.map( + ({ text, score, source, ...rest }) => + new Document({ + pageContent: text, + metadata: { + score, + source, + ...rest, + }, + }) + ); + } +} diff --git a/libs/langchain-community/src/retrievers/databerry.ts b/libs/langchain-community/src/retrievers/databerry.ts new file mode 100644 index 000000000000..c38fbf5816e3 --- /dev/null +++ b/libs/langchain-community/src/retrievers/databerry.ts @@ -0,0 +1,94 @@ +import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; +import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; + +/** + * Interface for the arguments required to create a new instance of + * DataberryRetriever. + */ +export interface DataberryRetrieverArgs + extends AsyncCallerParams, + BaseRetrieverInput { + datastoreUrl: string; + topK?: number; + apiKey?: string; +} + +/** + * Interface for the structure of a Berry object returned by the Databerry + * API. + */ +interface Berry { + text: string; + score: number; + source?: string; + [key: string]: unknown; +} + +/** + * A specific implementation of a document retriever for the Databerry + * API. It extends the BaseRetriever class, which is an abstract base + * class for a document retrieval system in LangChain. + */ +/** @deprecated Use "langchain/retrievers/chaindesk" instead */ +export class DataberryRetriever extends BaseRetriever { + static lc_name() { + return "DataberryRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "databerry"]; + + get lc_secrets() { + return { apiKey: "DATABERRY_API_KEY" }; + } + + get lc_aliases() { + return { apiKey: "api_key" }; + } + + caller: AsyncCaller; + + datastoreUrl: string; + + topK?: number; + + apiKey?: string; + + constructor(fields: DataberryRetrieverArgs) { + super(fields); + const { datastoreUrl, apiKey, topK, ...rest } = fields; + + this.caller = new AsyncCaller(rest); + this.datastoreUrl = datastoreUrl; + this.apiKey = apiKey; + this.topK = topK; + } + + async _getRelevantDocuments(query: string): Promise { + const r = await this.caller.call(fetch, this.datastoreUrl, { + method: "POST", + body: JSON.stringify({ + query, + ...(this.topK ? { topK: this.topK } : {}), + }), + headers: { + "Content-Type": "application/json", + ...(this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}), + }, + }); + + const { results } = (await r.json()) as { results: Berry[] }; + + return results.map( + ({ text, score, source, ...rest }) => + new Document({ + pageContent: text, + metadata: { + score, + source, + ...rest, + }, + }) + ); + } +} diff --git a/libs/langchain-community/src/retrievers/metal.ts b/libs/langchain-community/src/retrievers/metal.ts new file mode 100644 index 000000000000..da0bb4749912 --- /dev/null +++ b/libs/langchain-community/src/retrievers/metal.ts @@ -0,0 +1,70 @@ +import Metal from "@getmetal/metal-sdk"; + +import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; + +/** + * Interface for the fields required during the initialization of a + * `MetalRetriever` instance. It extends the `BaseRetrieverInput` + * interface and adds a `client` field of type `Metal`. + */ +export interface MetalRetrieverFields extends BaseRetrieverInput { + client: Metal; +} + +/** + * Interface to represent a response item from the Metal service. It + * contains a `text` field and an index signature to allow for additional + * unknown properties. + */ +interface ResponseItem { + text: string; + [key: string]: unknown; +} + +/** + * Class used to interact with the Metal service, a managed retrieval & + * memory platform. It allows you to index your data into Metal and run + * semantic search and retrieval on it. It extends the `BaseRetriever` + * class and requires a `Metal` instance and a dictionary of parameters to + * pass to the Metal API during its initialization. + * @example + * ```typescript + * const retriever = new MetalRetriever({ + * client: new Metal( + * process.env.METAL_API_KEY, + * process.env.METAL_CLIENT_ID, + * process.env.METAL_INDEX_ID, + * ), + * }); + * const docs = await retriever.getRelevantDocuments("hello"); + * ``` + */ +export class MetalRetriever extends BaseRetriever { + static lc_name() { + return "MetalRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "metal"]; + + private client: Metal; + + constructor(fields: MetalRetrieverFields) { + super(fields); + + this.client = fields.client; + } + + async _getRelevantDocuments(query: string): Promise { + const res = await this.client.search({ text: query }); + + const items = ("data" in res ? res.data : res) as ResponseItem[]; + return items.map( + ({ text, metadata }) => + new Document({ + pageContent: text, + metadata: metadata as Record, + }) + ); + } +} diff --git a/libs/langchain-community/src/retrievers/supabase.ts b/libs/langchain-community/src/retrievers/supabase.ts new file mode 100644 index 000000000000..ba4f66bff283 --- /dev/null +++ b/libs/langchain-community/src/retrievers/supabase.ts @@ -0,0 +1,238 @@ +import type { SupabaseClient } from "@supabase/supabase-js"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { + CallbackManagerForRetrieverRun, + Callbacks, +} from "@langchain/core/callbacks/manager"; + +interface SearchEmbeddingsParams { + query_embedding: number[]; + match_count: number; // int + filter?: Record; // jsonb +} + +interface SearchKeywordParams { + query_text: string; + match_count: number; // int +} + +interface SearchResponseRow { + id: number; + content: string; + metadata: object; + similarity: number; +} + +type SearchResult = [Document, number, number]; + +export interface SupabaseLibArgs extends BaseRetrieverInput { + client: SupabaseClient; + /** + * The table name on Supabase. Defaults to "documents". + */ + tableName?: string; + /** + * The name of the Similarity search function on Supabase. Defaults to "match_documents". + */ + similarityQueryName?: string; + /** + * The name of the Keyword search function on Supabase. Defaults to "kw_match_documents". + */ + keywordQueryName?: string; + /** + * The number of documents to return from the similarity search. Defaults to 2. + */ + similarityK?: number; + /** + * The number of documents to return from the keyword search. Defaults to 2. + */ + keywordK?: number; +} + +export interface SupabaseHybridSearchParams { + query: string; + similarityK: number; + keywordK: number; +} + +/** + * Class for performing hybrid search operations on a Supabase database. + * It extends the `BaseRetriever` class and implements methods for + * similarity search, keyword search, and hybrid search. + */ +export class SupabaseHybridSearch extends BaseRetriever { + static lc_name() { + return "SupabaseHybridSearch"; + } + + lc_namespace = ["langchain", "retrievers", "supabase"]; + + similarityK: number; + + query: string; + + keywordK: number; + + similarityQueryName: string; + + client: SupabaseClient; + + tableName: string; + + keywordQueryName: string; + + embeddings: Embeddings; + + constructor(embeddings: Embeddings, args: SupabaseLibArgs) { + super(args); + this.embeddings = embeddings; + this.client = args.client; + this.tableName = args.tableName || "documents"; + this.similarityQueryName = args.similarityQueryName || "match_documents"; + this.keywordQueryName = args.keywordQueryName || "kw_match_documents"; + this.similarityK = args.similarityK || 2; + this.keywordK = args.keywordK || 2; + } + + /** + * Performs a similarity search on the Supabase database using the + * provided query and returns the top 'k' similar documents. + * @param query The query to use for the similarity search. + * @param k The number of top similar documents to return. + * @param _callbacks Optional callbacks to pass to the embedQuery method. + * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its similarity score, and its ID. + */ + protected async similaritySearch( + query: string, + k: number, + _callbacks?: Callbacks // implement passing to embedQuery later + ): Promise { + const embeddedQuery = await this.embeddings.embedQuery(query); + + const matchDocumentsParams: SearchEmbeddingsParams = { + query_embedding: embeddedQuery, + match_count: k, + }; + + if (Object.keys(this.metadata ?? {}).length > 0) { + matchDocumentsParams.filter = this.metadata; + } + + const { data: searches, error } = await this.client.rpc( + this.similarityQueryName, + matchDocumentsParams + ); + + if (error) { + throw new Error( + `Error searching for documents: ${error.code} ${error.message} ${error.details}` + ); + } + + return (searches as SearchResponseRow[]).map((resp) => [ + new Document({ + metadata: resp.metadata, + pageContent: resp.content, + }), + resp.similarity, + resp.id, + ]); + } + + /** + * Performs a keyword search on the Supabase database using the provided + * query and returns the top 'k' documents that match the keywords. + * @param query The query to use for the keyword search. + * @param k The number of top documents to return that match the keywords. + * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its similarity score multiplied by 10, and its ID. + */ + protected async keywordSearch( + query: string, + k: number + ): Promise { + const kwMatchDocumentsParams: SearchKeywordParams = { + query_text: query, + match_count: k, + }; + + const { data: searches, error } = await this.client.rpc( + this.keywordQueryName, + kwMatchDocumentsParams + ); + + if (error) { + throw new Error( + `Error searching for documents: ${error.code} ${error.message} ${error.details}` + ); + } + + return (searches as SearchResponseRow[]).map((resp) => [ + new Document({ + metadata: resp.metadata, + pageContent: resp.content, + }), + resp.similarity * 10, + resp.id, + ]); + } + + /** + * Combines the results of the `similaritySearch` and `keywordSearch` + * methods and returns the top 'k' documents based on a combination of + * similarity and keyword matching. + * @param query The query to use for the hybrid search. + * @param similarityK The number of top similar documents to return. + * @param keywordK The number of top documents to return that match the keywords. + * @param callbacks Optional callbacks to pass to the similaritySearch method. + * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its combined score, and its ID. + */ + protected async hybridSearch( + query: string, + similarityK: number, + keywordK: number, + callbacks?: Callbacks + ): Promise { + const similarity_search = this.similaritySearch( + query, + similarityK, + callbacks + ); + + const keyword_search = this.keywordSearch(query, keywordK); + + return Promise.all([similarity_search, keyword_search]) + .then((results) => results.flat()) + .then((results) => { + const picks = new Map(); + + results.forEach((result) => { + const id = result[2]; + const nextScore = result[1]; + const prevScore = picks.get(id)?.[1]; + + if (prevScore === undefined || nextScore > prevScore) { + picks.set(id, result); + } + }); + + return Array.from(picks.values()); + }) + .then((results) => results.sort((a, b) => b[1] - a[1])); + } + + async _getRelevantDocuments( + query: string, + runManager?: CallbackManagerForRetrieverRun + ): Promise { + const searchResults = await this.hybridSearch( + query, + this.similarityK, + this.keywordK, + runManager?.getChild("hybrid_search") + ); + + return searchResults.map(([doc]) => doc); + } +} diff --git a/libs/langchain-community/src/retrievers/tavily_search_api.ts b/libs/langchain-community/src/retrievers/tavily_search_api.ts new file mode 100644 index 000000000000..d8ba3e112cc7 --- /dev/null +++ b/libs/langchain-community/src/retrievers/tavily_search_api.ts @@ -0,0 +1,140 @@ +import { Document } from "@langchain/core/documents"; +import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager"; +import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Options for the HydeRetriever class, which includes a BaseLanguageModel + * instance, a VectorStore instance, and an optional promptTemplate which + * can either be a BasePromptTemplate instance or a PromptKey. + */ +export type TavilySearchAPIRetrieverFields = BaseRetrieverInput & { + k?: number; + includeGeneratedAnswer?: boolean; + includeRawContent?: boolean; + includeImages?: boolean; + searchDepth?: "basic" | "advanced"; + includeDomains?: string[]; + excludeDomains?: string[]; + kwargs?: Record; + apiKey?: string; +}; + +/** + * A class for retrieving documents related to a given search term + * using the Tavily Search API. + */ +export class TavilySearchAPIRetriever extends BaseRetriever { + static lc_name() { + return "TavilySearchAPIRetriever"; + } + + get lc_namespace(): string[] { + return ["langchain", "retrievers", "tavily_search_api"]; + } + + k = 10; + + includeGeneratedAnswer = false; + + includeRawContent = false; + + includeImages = false; + + searchDepth = "basic"; + + includeDomains?: string[]; + + excludeDomains?: string[]; + + kwargs: Record = {}; + + apiKey?: string; + + constructor(fields?: TavilySearchAPIRetrieverFields) { + super(fields); + this.k = fields?.k ?? this.k; + this.includeGeneratedAnswer = + fields?.includeGeneratedAnswer ?? this.includeGeneratedAnswer; + this.includeRawContent = + fields?.includeRawContent ?? this.includeRawContent; + this.includeImages = fields?.includeImages ?? this.includeImages; + this.searchDepth = fields?.searchDepth ?? this.searchDepth; + this.includeDomains = fields?.includeDomains ?? this.includeDomains; + this.excludeDomains = fields?.excludeDomains ?? this.excludeDomains; + this.kwargs = fields?.kwargs ?? this.kwargs; + this.apiKey = fields?.apiKey ?? getEnvironmentVariable("TAVILY_API_KEY"); + if (this.apiKey === undefined) { + throw new Error( + `No Tavily API key found. Either set an environment variable named "TAVILY_API_KEY" or pass an API key as "apiKey".` + ); + } + } + + async _getRelevantDocuments( + query: string, + _runManager?: CallbackManagerForRetrieverRun + ): Promise { + const body: Record = { + query, + include_answer: this.includeGeneratedAnswer, + include_raw_content: this.includeRawContent, + include_images: this.includeImages, + max_results: this.k, + search_depth: this.searchDepth, + api_key: this.apiKey, + }; + if (this.includeDomains) { + body.include_domains = this.includeDomains; + } + if (this.excludeDomains) { + body.exclude_domains = this.excludeDomains; + } + + const response = await fetch("https://api.tavily.com/search", { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ ...body, ...this.kwargs }), + }); + const json = await response.json(); + if (!response.ok) { + throw new Error( + `Request failed with status code ${response.status}: ${json.error}` + ); + } + if (!Array.isArray(json.results)) { + throw new Error(`Could not parse Tavily results. Please try again.`); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const docs: Document[] = json.results.map((result: any) => { + const pageContent = this.includeRawContent + ? result.raw_content + : result.content; + const metadata = { + title: result.title, + source: result.url, + ...Object.fromEntries( + Object.entries(result).filter( + ([k]) => !["content", "title", "url", "raw_content"].includes(k) + ) + ), + images: json.images, + }; + return new Document({ pageContent, metadata }); + }); + if (this.includeGeneratedAnswer) { + docs.push( + new Document({ + pageContent: json.answer, + metadata: { + title: "Suggested Answer", + source: "https://tavily.com/", + }, + }) + ); + } + return docs; + } +} diff --git a/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts b/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts new file mode 100644 index 000000000000..be3f1b3d0701 --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts @@ -0,0 +1,22 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { test } from "@jest/globals"; +import { AmazonKendraRetriever } from "../amazon_kendra.js"; + +test.skip("AmazonKendraRetriever", async () => { + const retriever = new AmazonKendraRetriever({ + topK: 10, + indexId: "5c0fcb10-9573-42df-8846-e30d69004ec5", + region: "us-east-2", + clientOptions: { + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }, + }); + + const docs = await retriever.getRelevantDocuments("How are clouds formed?"); + + console.log(docs); +}); diff --git a/libs/langchain-community/src/retrievers/tests/metal.int.test.ts b/libs/langchain-community/src/retrievers/tests/metal.int.test.ts new file mode 100644 index 000000000000..f462984ba66f --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/metal.int.test.ts @@ -0,0 +1,22 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { test, expect } from "@jest/globals"; +import Metal from "@getmetal/metal-sdk"; + +import { MetalRetriever } from "../metal.js"; + +test("MetalRetriever", async () => { + const MetalSDK = Metal; + const client = new MetalSDK( + process.env.METAL_API_KEY!, + process.env.METAL_CLIENT_ID!, + process.env.METAL_INDEX_ID + ); + const retriever = new MetalRetriever({ client }); + + const docs = await retriever.getRelevantDocuments("hello"); + + expect(docs.length).toBeGreaterThan(0); + + console.log(docs); +}); diff --git a/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts b/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts new file mode 100644 index 000000000000..a6ff76833074 --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts @@ -0,0 +1,27 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { test, expect } from "@jest/globals"; +import { createClient } from "@supabase/supabase-js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { SupabaseHybridSearch } from "../supabase.js"; + +test("Supabase hybrid keyword search", async () => { + const client = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_PRIVATE_KEY! + ); + + const embeddings = new OpenAIEmbeddings(); + + const retriever = new SupabaseHybridSearch(embeddings, { + client, + similarityK: 2, + keywordK: 2, + }); + + expect(retriever).toBeDefined(); + + const results = await retriever.getRelevantDocuments("hello bye"); + + expect(results.length).toBeGreaterThan(0); +}); diff --git a/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts b/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts new file mode 100644 index 000000000000..fd71faed7ee3 --- /dev/null +++ b/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts @@ -0,0 +1,17 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { test, expect } from "@jest/globals"; + +import { TavilySearchAPIRetriever } from "../tavily_search_api.js"; + +test.skip("TavilySearchAPIRetriever", async () => { + const retriever = new TavilySearchAPIRetriever({ + includeImages: true, + includeRawContent: true, + }); + + const docs = await retriever.getRelevantDocuments("what bear is best?"); + expect(docs.length).toBeGreaterThan(0); + + console.log(docs); +}); diff --git a/libs/langchain-community/src/retrievers/zep.ts b/libs/langchain-community/src/retrievers/zep.ts new file mode 100644 index 000000000000..5ea21ab6c83f --- /dev/null +++ b/libs/langchain-community/src/retrievers/zep.ts @@ -0,0 +1,169 @@ +import { + MemorySearchPayload, + MemorySearchResult, + NotFoundError, + ZepClient, +} from "@getzep/zep-js"; +import { BaseRetriever, BaseRetrieverInput } from "@langchain/core/retrievers"; +import { Document } from "@langchain/core/documents"; + +/** + * Configuration interface for the ZepRetriever class. Extends the + * BaseRetrieverInput interface. + * + * @argument {string} sessionId - The ID of the Zep session. + * @argument {string} url - The URL of the Zep API. + * @argument {number} [topK] - The number of results to return. + * @argument {string} [apiKey] - The API key for the Zep API. + * @argument [searchScope] [searchScope] - The scope of the search: "messages" or "summary". + * @argument [searchType] [searchType] - The type of search to perform: "similarity" or "mmr". + * @argument {number} [mmrLambda] - The lambda value for the MMR search. + * @argument {Record} [filter] - The metadata filter to apply to the search. + */ +export interface ZepRetrieverConfig extends BaseRetrieverInput { + sessionId: string; + url: string; + topK?: number; + apiKey?: string; + searchScope?: "messages" | "summary"; + searchType?: "similarity" | "mmr"; + mmrLambda?: number; + filter?: Record; +} + +/** + * Class for retrieving information from a Zep long-term memory store. + * Extends the BaseRetriever class. + * @example + * ```typescript + * const retriever = new ZepRetriever({ + * url: "http: + * sessionId: "session_exampleUUID", + * topK: 3, + * }); + * const query = "Can I drive red cars in France?"; + * const docs = await retriever.getRelevantDocuments(query); + * ``` + */ +export class ZepRetriever extends BaseRetriever { + static lc_name() { + return "ZepRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "zep"]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + apiKey: "ZEP_API_KEY", + url: "ZEP_API_URL", + }; + } + + get lc_aliases(): { [key: string]: string } | undefined { + return { apiKey: "api_key" }; + } + + zepClientPromise: Promise; + + private sessionId: string; + + private topK?: number; + + private searchScope?: "messages" | "summary"; + + private searchType?: "similarity" | "mmr"; + + private mmrLambda?: number; + + private filter?: Record; + + constructor(config: ZepRetrieverConfig) { + super(config); + this.sessionId = config.sessionId; + this.topK = config.topK; + this.searchScope = config.searchScope; + this.searchType = config.searchType; + this.mmrLambda = config.mmrLambda; + this.filter = config.filter; + this.zepClientPromise = ZepClient.init(config.url, config.apiKey); + } + + /** + * Converts an array of message search results to an array of Document objects. + * @param {MemorySearchResult[]} results - The array of search results. + * @returns {Document[]} An array of Document objects representing the search results. + */ + private searchMessageResultToDoc(results: MemorySearchResult[]): Document[] { + return results + .filter((r) => r.message) + .map( + ({ + message: { content, metadata: messageMetadata } = {}, + dist, + ...rest + }) => + new Document({ + pageContent: content ?? "", + metadata: { score: dist, ...messageMetadata, ...rest }, + }) + ); + } + + /** + * Converts an array of summary search results to an array of Document objects. + * @param {MemorySearchResult[]} results - The array of search results. + * @returns {Document[]} An array of Document objects representing the search results. + */ + private searchSummaryResultToDoc(results: MemorySearchResult[]): Document[] { + return results + .filter((r) => r.summary) + .map( + ({ + summary: { content, metadata: summaryMetadata } = {}, + dist, + ...rest + }) => + new Document({ + pageContent: content ?? "", + metadata: { score: dist, ...summaryMetadata, ...rest }, + }) + ); + } + + /** + * Retrieves the relevant documents based on the given query. + * @param {string} query - The query string. + * @returns {Promise} A promise that resolves to an array of relevant Document objects. + */ + async _getRelevantDocuments(query: string): Promise { + const payload: MemorySearchPayload = { + text: query, + metadata: this.filter, + search_scope: this.searchScope, + search_type: this.searchType, + mmr_lambda: this.mmrLambda, + }; + // Wait for ZepClient to be initialized + const zepClient = await this.zepClientPromise; + if (!zepClient) { + throw new Error("ZepClient is not initialized"); + } + try { + const results: MemorySearchResult[] = await zepClient.memory.searchMemory( + this.sessionId, + payload, + this.topK + ); + return this.searchScope === "summary" + ? this.searchSummaryResultToDoc(results) + : this.searchMessageResultToDoc(results); + } catch (error) { + // eslint-disable-next-line no-instanceof/no-instanceof + if (error instanceof NotFoundError) { + return Promise.resolve([]); // Return an empty Document array + } + // If it's not a NotFoundError, throw the error again + throw error; + } + } +} diff --git a/libs/langchain-community/src/tests/chat_models.test.ts b/libs/langchain-community/src/tests/chat_models.test.ts deleted file mode 100644 index 5d609f496501..000000000000 --- a/libs/langchain-community/src/tests/chat_models.test.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { test } from "@jest/globals"; - -test("Test chat model", async () => { - // Your test here -}); diff --git a/libs/langchain-community/src/tests/integration.int.test.ts b/libs/langchain-community/src/tests/integration.int.test.ts deleted file mode 100644 index 7fce4ce53302..000000000000 --- a/libs/langchain-community/src/tests/integration.int.test.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { test } from "@jest/globals"; - -test("Test chat model", async () => { - // Your integration test here -}); diff --git a/libs/langchain-community/src/tests/llms.test.ts b/libs/langchain-community/src/tests/llms.test.ts deleted file mode 100644 index 3428ecaaf599..000000000000 --- a/libs/langchain-community/src/tests/llms.test.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { test } from "@jest/globals"; - -test("Test LLM", async () => { - // Your test here -}); diff --git a/libs/langchain-community/src/tests/vectorstores.test.ts b/libs/langchain-community/src/tests/vectorstores.test.ts deleted file mode 100644 index 023cfbd8b77c..000000000000 --- a/libs/langchain-community/src/tests/vectorstores.test.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { test } from "@jest/globals"; - -test("Test vectorstore", async () => { - // Your test here -}); diff --git a/libs/langchain-community/src/tools/google_calendar/base.ts b/libs/langchain-community/src/tools/google_calendar/base.ts deleted file mode 100644 index 6d4baa7a8957..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/base.ts +++ /dev/null @@ -1,102 +0,0 @@ -import { google } from "googleapis"; -import { Tool } from "@langchain/core/tools"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { BaseLLM } from "@langchain/core/language_models/llms"; - -export interface GoogleCalendarAgentParams { - credentials?: { - clientEmail?: string; - privateKey?: string; - calendarId?: string; - }; - scopes?: string[]; - model?: BaseLLM; -} - -export class GoogleCalendarBase extends Tool { - name = "Google Calendar"; - - description = - "A tool to lookup Google Calendar events and create events in Google Calendar"; - - protected clientEmail: string; - - protected privateKey: string; - - protected calendarId: string; - - protected scopes: string[]; - - protected llm: BaseLLM; - - get lc_namespace() { - return ["langchain-community", "tools"]; - } - - constructor( - fields: GoogleCalendarAgentParams = { - credentials: { - clientEmail: getEnvironmentVariable("GOOGLE_CALENDAR_CLIENT_EMAIL"), - privateKey: getEnvironmentVariable("GOOGLE_CALENDAR_PRIVATE_KEY"), - calendarId: getEnvironmentVariable("GOOGLE_CALENDAR_CALENDAR_ID"), - }, - scopes: [ - "https://www.googleapis.com/auth/calendar", - "https://www.googleapis.com/auth/calendar.events", - ], - } - ) { - super(...arguments); - - if (!fields.model) { - throw new Error("Missing llm instance to interact with Google Calendar"); - } - - if (!fields.credentials) { - throw new Error("Missing credentials to authenticate to Google Calendar"); - } - - if (!fields.credentials.clientEmail) { - throw new Error( - "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" - ); - } - - if (!fields.credentials.privateKey) { - throw new Error( - "Missing GOOGLE_CALENDAR_PRIVATE_KEY to interact with Google Calendar" - ); - } - - if (!fields.credentials.calendarId) { - throw new Error( - "Missing GOOGLE_CALENDAR_CALENDAR_ID to interact with Google Calendar" - ); - } - - this.clientEmail = fields.credentials.clientEmail; - this.privateKey = fields.credentials.privateKey; - this.calendarId = fields.credentials.calendarId; - this.scopes = fields.scopes || []; - this.llm = fields.model; - } - - getModel() { - return this.llm; - } - - async getAuth() { - const auth = new google.auth.JWT( - this.clientEmail, - undefined, - this.privateKey, - this.scopes - ); - - return auth; - } - - async _call(input: string) { - return input; - } -} diff --git a/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts b/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts deleted file mode 100644 index 4ec331768179..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/commands/run-create-events.ts +++ /dev/null @@ -1,129 +0,0 @@ -import { google, calendar_v3 } from "googleapis"; -import type { JWT, GaxiosResponse } from "googleapis-common"; -import { PromptTemplate } from "../../../prompts/index.js"; -import { LLMChain } from "../../../chains/index.js"; -import { CREATE_EVENT_PROMPT } from "../prompts/index.js"; -import { getTimezoneOffsetInHours } from "../utils/get-timezone-offset-in-hours.js"; -import { BaseLLM } from "../../../llms/base.js"; -import { CallbackManagerForToolRun } from "../../../callbacks/manager.js"; - -type CreateEventParams = { - eventSummary: string; - eventStartTime: string; - eventEndTime: string; - userTimezone: string; - eventLocation?: string; - eventDescription?: string; -}; - -const createEvent = async ( - { - eventSummary, - eventStartTime, - eventEndTime, - userTimezone, - eventLocation = "", - eventDescription = "", - }: CreateEventParams, - calendarId: string, - auth: JWT -) => { - const calendar = google.calendar("v3"); - const event = { - summary: eventSummary, - location: eventLocation, - description: eventDescription, - start: { - dateTime: eventStartTime, - timeZone: userTimezone, - }, - end: { - dateTime: eventEndTime, - timeZone: userTimezone, - }, - }; - - try { - const createdEvent = await calendar.events.insert({ - auth, - calendarId, - requestBody: event, - }); - - return createdEvent; - } catch (error) { - return { - error: `An error occurred: ${error}`, - }; - } -}; - -type RunCreateEventParams = { - calendarId: string; - auth: JWT; - model: BaseLLM; -}; - -const runCreateEvent = async ( - query: string, - { calendarId, auth, model }: RunCreateEventParams, - runManager?: CallbackManagerForToolRun -) => { - const prompt = new PromptTemplate({ - template: CREATE_EVENT_PROMPT, - inputVariables: ["date", "query", "u_timezone", "dayName"], - }); - const createEventChain = new LLMChain({ - llm: model, - prompt, - }); - - const date = new Date().toISOString(); - const u_timezone = getTimezoneOffsetInHours(); - const dayName = new Date().toLocaleString("en-us", { weekday: "long" }); - - const output = await createEventChain.call( - { - query, - date, - u_timezone, - dayName, - }, - runManager?.getChild() - ); - const loaded = JSON.parse(output.text); - - const [ - eventSummary, - eventStartTime, - eventEndTime, - eventLocation, - eventDescription, - userTimezone, - ] = Object.values(loaded); - - const event = await createEvent( - { - eventSummary, - eventStartTime, - eventEndTime, - userTimezone, - eventLocation, - eventDescription, - } as CreateEventParams, - calendarId, - auth - ); - - if (!(event as { error: string }).error) { - return `Event created successfully, details: event ${ - (event as GaxiosResponse).data.htmlLink - }`; - } - - return `An error occurred creating the event: ${ - (event as { error: string }).error - }`; -}; - -export { runCreateEvent }; diff --git a/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts b/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts deleted file mode 100644 index c757931e9ca7..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/commands/run-view-events.ts +++ /dev/null @@ -1,85 +0,0 @@ -import { calendar_v3 } from "googleapis"; -import type { JWT } from "googleapis-common"; -import { PromptTemplate } from "../../../prompts/index.js"; -import { LLMChain } from "../../../chains/index.js"; -import { VIEW_EVENTS_PROMPT } from "../prompts/index.js"; -import { getTimezoneOffsetInHours } from "../utils/get-timezone-offset-in-hours.js"; -import { BaseLLM } from "../../../llms/base.js"; -import { CallbackManagerForToolRun } from "../../../callbacks/manager.js"; - -type RunViewEventParams = { - calendarId: string; - auth: JWT; - model: BaseLLM; -}; - -const runViewEvents = async ( - query: string, - { model, auth, calendarId }: RunViewEventParams, - runManager?: CallbackManagerForToolRun -) => { - const calendar = new calendar_v3.Calendar({}); - - const prompt = new PromptTemplate({ - template: VIEW_EVENTS_PROMPT, - inputVariables: ["date", "query", "u_timezone", "dayName"], - }); - - const viewEventsChain = new LLMChain({ - llm: model, - prompt, - }); - - const date = new Date().toISOString(); - const u_timezone = getTimezoneOffsetInHours(); - const dayName = new Date().toLocaleString("en-us", { weekday: "long" }); - - const output = await viewEventsChain.call( - { - query, - date, - u_timezone, - dayName, - }, - runManager?.getChild() - ); - const loaded = JSON.parse(output.text); - - try { - const response = await calendar.events.list({ - auth, - calendarId, - ...loaded, - }); - - const curatedItems = - response.data && response.data.items - ? response.data.items.map( - ({ - status, - summary, - description, - start, - end, - }: // eslint-disable-next-line @typescript-eslint/no-explicit-any - any) => ({ - status, - summary, - description, - start, - end, - }) - ) - : []; - - return `Result for the prompt "${query}": \n${JSON.stringify( - curatedItems, - null, - 2 - )}`; - } catch (error) { - return `An error occurred: ${error}`; - } -}; - -export { runViewEvents }; diff --git a/libs/langchain-community/src/tools/google_calendar/create.ts b/libs/langchain-community/src/tools/google_calendar/create.ts deleted file mode 100644 index fd54ab8b14e7..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/create.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { CallbackManagerForToolRun } from "@langchain/core/callbacks/manager"; -import { GoogleCalendarBase, GoogleCalendarAgentParams } from "./base.js"; -import { runCreateEvent } from "./commands/run-create-events.js"; -import { CREATE_TOOL_DESCRIPTION } from "./descriptions.js"; - -/** - * @example - * ```typescript - * const googleCalendarCreateTool = new GoogleCalendarCreateTool({ - * credentials: { - * clientEmail: process.env.GOOGLE_CALENDAR_CLIENT_EMAIL, - * privateKey: process.env.GOOGLE_CALENDAR_PRIVATE_KEY, - * calendarId: process.env.GOOGLE_CALENDAR_CALENDAR_ID, - * }, - * scopes: [ - * "https: - * "https: - * ], - * model: new ChatOpenAI({}), - * }); - * const createInput = `Create a meeting with John Doe next Friday at 4pm - adding to the agenda of it the result of 99 + 99`; - * const createResult = await googleCalendarCreateTool.invoke({ - * input: createInput, - * }); - * console.log("Create Result", createResult); - * ``` - */ -export class GoogleCalendarCreateTool extends GoogleCalendarBase { - name = "google_calendar_create"; - - description = CREATE_TOOL_DESCRIPTION; - - constructor(fields: GoogleCalendarAgentParams) { - super(fields); - } - - async _call(query: string, runManager?: CallbackManagerForToolRun) { - const auth = await this.getAuth(); - const model = this.getModel(); - - return runCreateEvent( - query, - { - auth, - model, - calendarId: this.calendarId, - }, - runManager - ); - } -} diff --git a/libs/langchain-community/src/tools/google_calendar/descriptions.ts b/libs/langchain-community/src/tools/google_calendar/descriptions.ts deleted file mode 100644 index dc0a736c53c2..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/descriptions.ts +++ /dev/null @@ -1,24 +0,0 @@ -export const CREATE_TOOL_DESCRIPTION = `A tool for creating Google Calendar events and meetings. - -INPUT example: -"action": "google_calendar_create", -"action_input": "create a new meeting with John Doe tomorrow at 4pm" - -OUTPUT: -Output is a confirmation of a created event. -`; - -export const VIEW_TOOL_DESCRIPTION = `A tool for retrieving Google Calendar events and meetings. -INPUT examples: -"action": "google_calendar_view", -"action_input": "display meetings for today" - -"action": "google_calendar_view", -"action_input": "show events for tomorrow" - -"action": "google_calendar_view", -"action_input": "display meetings for tomorrow between 4pm and 8pm" - -OUTPUT: -- title, start time, end time, attendees, description (if available) -`; diff --git a/libs/langchain-community/src/tools/google_calendar/index.ts b/libs/langchain-community/src/tools/google_calendar/index.ts deleted file mode 100644 index c7c8b3a10699..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export { GoogleCalendarCreateTool } from "./create.js"; -export { GoogleCalendarViewTool } from "./view.js"; -export type { GoogleCalendarAgentParams } from "./base.js"; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts b/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts deleted file mode 100644 index b3ac77762ed8..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/prompts/create-event-prompt.ts +++ /dev/null @@ -1,56 +0,0 @@ -export const CREATE_EVENT_PROMPT = ` -Date format: YYYY-MM-DDThh:mm:ss+00:00 -Based on this event description: "Joey birthday tomorrow at 7 pm", -output a json of the following parameters: -Today's datetime on UTC time 2023-05-02T10:00:00+00:00, it's Tuesday and timezone -of the user is -5, take into account the timezone of the user and today's date. -1. event_summary -2. event_start_time -3. event_end_time -4. event_location -5. event_description -6. user_timezone -event_summary: -{{ - "event_summary": "Joey birthday", - "event_start_time": "2023-05-03T19:00:00-05:00", - "event_end_time": "2023-05-03T20:00:00-05:00", - "event_location": "", - "event_description": "", - "user_timezone": "America/New_York" -}} - -Date format: YYYY-MM-DDThh:mm:ss+00:00 -Based on this event description: "Create a meeting for 5 pm on Saturday with Joey", -output a json of the following parameters: -Today's datetime on UTC time 2023-05-04T10:00:00+00:00, it's Thursday and timezone -of the user is -5, take into account the timezone of the user and today's date. -1. event_summary -2. event_start_time -3. event_end_time -4. event_location -5. event_description -6. user_timezone -event_summary: -{{ - "event_summary": "Meeting with Joey", - "event_start_time": "2023-05-06T17:00:00-05:00", - "event_end_time": "2023-05-06T18:00:00-05:00", - "event_location": "", - "event_description": "", - "user_timezone": "America/New_York" -}} - -Date format: YYYY-MM-DDThh:mm:ss+00:00 -Based on this event description: "{query}", output a json of the -following parameters: -Today's datetime on UTC time {date}, it's {dayName} and timezone of the user {u_timezone}, -take into account the timezone of the user and today's date. -1. event_summary -2. event_start_time -3. event_end_time -4. event_location -5. event_description -6. user_timezone -event_summary: -`; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/index.ts b/libs/langchain-community/src/tools/google_calendar/prompts/index.ts deleted file mode 100644 index a8a7ca38343c..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/prompts/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { CREATE_EVENT_PROMPT } from "./create-event-prompt.js"; -export { VIEW_EVENTS_PROMPT } from "./view-events-prompt.js"; diff --git a/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts b/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts deleted file mode 100644 index 0053e9a1b391..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/prompts/view-events-prompt.ts +++ /dev/null @@ -1,34 +0,0 @@ -export const VIEW_EVENTS_PROMPT = ` -Date format: YYYY-MM-DDThh:mm:ss+00:00 -Based on this event description: 'View my events on Thursday', -output a json of the following parameters: -Today's datetime on UTC time 2023-05-02T10:00:00+00:00, it's Tuesday and timezone -of the user is -5, take into account the timezone of the user and today's date. -If the user is searching for events with a specific title, person or location, put it into the search_query parameter. -1. time_min -2. time_max -3. user_timezone -4. max_results -5. search_query -event_summary: -{{ - "time_min": "2023-05-04T00:00:00-05:00", - "time_max": "2023-05-04T23:59:59-05:00", - "user_timezone": "America/New_York", - "max_results": 10, - "search_query": "" -}} - -Date format: YYYY-MM-DDThh:mm:ss+00:00 -Based on this event description: '{query}', output a json of the -following parameters: -Today's datetime on UTC time {date}, today it's {dayName} and timezone of the user {u_timezone}, -take into account the timezone of the user and today's date. -If the user is searching for events with a specific title, person or location, put it into the search_query parameter. -1. time_min -2. time_max -3. user_timezone -4. max_results -5. search_query -event_summary: -`; diff --git a/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts b/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts deleted file mode 100644 index 17bed380c094..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/utils/get-timezone-offset-in-hours.ts +++ /dev/null @@ -1,7 +0,0 @@ -const getTimezoneOffsetInHours = () => { - const offsetInMinutes = new Date().getTimezoneOffset(); - const offsetInHours = -offsetInMinutes / 60; - return offsetInHours; -}; - -export { getTimezoneOffsetInHours }; diff --git a/libs/langchain-community/src/tools/google_calendar/view.ts b/libs/langchain-community/src/tools/google_calendar/view.ts deleted file mode 100644 index 94168bf88cbc..000000000000 --- a/libs/langchain-community/src/tools/google_calendar/view.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { CallbackManagerForToolRun } from "@langchain/core/callbacks/manager"; - -import { GoogleCalendarBase, GoogleCalendarAgentParams } from "./base.js"; -import { VIEW_TOOL_DESCRIPTION } from "./descriptions.js"; - -import { runViewEvents } from "./commands/run-view-events.js"; - -/** - * @example - * ```typescript - * const googleCalendarViewTool = new GoogleCalendarViewTool({ - * credentials: { - * clientEmail: process.env.GOOGLE_CALENDAR_CLIENT_EMAIL, - * privateKey: process.env.GOOGLE_CALENDAR_PRIVATE_KEY, - * calendarId: process.env.GOOGLE_CALENDAR_CALENDAR_ID, - * }, - * scopes: [ - * "https: - * "https: - * ], - * model: new ChatOpenAI({}), - * }); - * const viewInput = `What meetings do I have this week?`; - * const viewResult = await googleCalendarViewTool.invoke({ input: viewInput }); - * console.log("View Result", viewResult); - * ``` - */ -export class GoogleCalendarViewTool extends GoogleCalendarBase { - name = "google_calendar_view"; - - description = VIEW_TOOL_DESCRIPTION; - - constructor(fields: GoogleCalendarAgentParams) { - super(fields); - } - - async _call(query: string, runManager?: CallbackManagerForToolRun) { - const auth = await this.getAuth(); - const model = this.getModel(); - - return runViewEvents( - query, - { - auth, - model, - calendarId: this.calendarId, - }, - runManager - ); - } -} diff --git a/libs/langchain-community/src/tools/IFTTTWebhook.ts b/libs/langchain-community/src/tools/ifttt.ts similarity index 100% rename from libs/langchain-community/src/tools/IFTTTWebhook.ts rename to libs/langchain-community/src/tools/ifttt.ts diff --git a/libs/langchain-community/src/tools/json.ts b/libs/langchain-community/src/tools/json.ts deleted file mode 100644 index d921d632874f..000000000000 --- a/libs/langchain-community/src/tools/json.ts +++ /dev/null @@ -1,154 +0,0 @@ -import jsonpointer from "jsonpointer"; -import { Tool, type ToolParams } from "@langchain/core/tools"; -import { Serializable } from "../load/serializable.js"; - -export type Json = - | string - | number - | boolean - | null - | { [key: string]: Json } - | Json[]; - -export type JsonObject = { [key: string]: Json }; - -/** - * Represents a JSON object in the LangChain framework. Provides methods - * to get keys and values from the JSON object. - */ -export class JsonSpec extends Serializable { - get lc_namespace() { - return ["langchain-community", "tools"]; - } - - obj: JsonObject; - - maxValueLength = 4000; - - constructor(obj: JsonObject, max_value_length = 4000) { - super(...arguments); - this.obj = obj; - this.maxValueLength = max_value_length; - } - - /** - * Retrieves all keys at a given path in the JSON object. - * @param input The path to the keys in the JSON object, provided as a string in JSON pointer syntax. - * @returns A string containing all keys at the given path, separated by commas. - */ - public getKeys(input: string): string { - const pointer = jsonpointer.compile(input); - const res = pointer.get(this.obj) as Json; - if (typeof res === "object" && !Array.isArray(res) && res !== null) { - return Object.keys(res) - .map((i) => i.replaceAll("~", "~0").replaceAll("/", "~1")) - .join(", "); - } - - throw new Error( - `Value at ${input} is not a dictionary, get the value directly instead.` - ); - } - - /** - * Retrieves the value at a given path in the JSON object. - * @param input The path to the value in the JSON object, provided as a string in JSON pointer syntax. - * @returns The value at the given path in the JSON object, as a string. If the value is a large dictionary or exceeds the maximum length, a message is returned instead. - */ - public getValue(input: string): string { - const pointer = jsonpointer.compile(input); - const res = pointer.get(this.obj) as Json; - - if (res === null || res === undefined) { - throw new Error(`Value at ${input} is null or undefined.`); - } - - const str = typeof res === "object" ? JSON.stringify(res) : res.toString(); - if ( - typeof res === "object" && - !Array.isArray(res) && - str.length > this.maxValueLength - ) { - return `Value is a large dictionary, should explore its keys directly.`; - } - - if (str.length > this.maxValueLength) { - return `${str.slice(0, this.maxValueLength)}...`; - } - return str; - } -} - -export interface JsonToolFields extends ToolParams { - jsonSpec: JsonSpec; -} - -/** - * A tool in the LangChain framework that lists all keys at a given path - * in a JSON object. - */ -export class JsonListKeysTool extends Tool { - static lc_name() { - return "JsonListKeysTool"; - } - - name = "json_list_keys"; - - jsonSpec: JsonSpec; - - constructor(jsonSpec: JsonSpec); - - constructor(fields: JsonToolFields); - - constructor(fields: JsonSpec | JsonToolFields) { - if (!("jsonSpec" in fields)) { - // eslint-disable-next-line no-param-reassign - fields = { jsonSpec: fields }; - } - super(fields); - - this.jsonSpec = fields.jsonSpec; - } - - /** @ignore */ - async _call(input: string) { - try { - return this.jsonSpec.getKeys(input); - } catch (error) { - return `${error}`; - } - } - - description = `Can be used to list all keys at a given path. - Before calling this you should be SURE that the path to this exists. - The input is a text representation of the path to the json as json pointer syntax (e.g. /key1/0/key2).`; -} - -/** - * A tool in the LangChain framework that retrieves the value at a given - * path in a JSON object. - */ -export class JsonGetValueTool extends Tool { - static lc_name() { - return "JsonGetValueTool"; - } - - name = "json_get_value"; - - constructor(public jsonSpec: JsonSpec) { - super(); - } - - /** @ignore */ - async _call(input: string) { - try { - return this.jsonSpec.getValue(input); - } catch (error) { - return `${error}`; - } - } - - description = `Can be used to see value in string format at a given path. - Before calling this you should be SURE that the path to this exists. - The input is a text representation of the path to the json as json pointer syntax (e.g. /key1/0/key2).`; -} diff --git a/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts b/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts deleted file mode 100644 index 1af4604bd992..000000000000 --- a/libs/langchain-community/src/tools/tests/aiplugin.int.test.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { AIPluginTool } from "../aiplugin.js"; - -test("AIPluginTool", async () => { - const tool = await AIPluginTool.fromPluginUrl( - "https://www.klarna.com/.well-known/ai-plugin.json" - ); - - expect(await tool.call(undefined)).toMatchInlineSnapshot(` - "Usage Guide: Assistant uses the Klarna plugin to get relevant product suggestions for any shopping or product discovery purpose. Assistant will reply with the following 3 paragraphs 1) Search Results 2) Product Comparison of the Search Results 3) Followup Questions. The first paragraph contains a list of the products with their attributes listed clearly and concisely as bullet points under the product, together with a link to the product and an explanation. Links will always be returned and should be shown to the user. The second paragraph compares the results returned in a summary sentence starting with "In summary". Assistant comparisons consider only the most important features of the products that will help them fit the users request, and each product mention is brief, short and concise. In the third paragraph assistant always asks helpful follow-up questions and end with a question mark. When assistant is asking a follow-up question, it uses it's product expertise to provide information pertaining to the subject of the user's request that may guide them in their search for the right product. - - OpenAPI Spec in JSON or YAML format: - {"openapi":"3.0.1","info":{"version":"v0","title":"Open AI Klarna product Api"},"servers":[{"url":"https://www.klarna.com/us/shopping"}],"tags":[{"name":"open-ai-product-endpoint","description":"Open AI Product Endpoint. Query for products."}],"paths":{"/public/openai/v0/products":{"get":{"tags":["open-ai-product-endpoint"],"summary":"API for fetching Klarna product information","operationId":"productsUsingGET","parameters":[{"name":"countryCode","in":"query","description":"ISO 3166 country code with 2 characters based on the user location. Currently, only US, GB, DE, SE and DK are supported.","required":true,"schema":{"type":"string"}},{"name":"q","in":"query","description":"A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. If the user speaks another language than English, translate their request into English (example: translate fia med knuff to ludo board game)!","required":true,"schema":{"type":"string"}},{"name":"size","in":"query","description":"number of products returned","required":false,"schema":{"type":"integer"}},{"name":"min_price","in":"query","description":"(Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for.","required":false,"schema":{"type":"integer"}},{"name":"max_price","in":"query","description":"(Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for.","required":false,"schema":{"type":"integer"}}],"responses":{"200":{"description":"Products found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ProductResponse"}}}},"503":{"description":"one or more services are unavailable"}},"deprecated":false}}},"components":{"schemas":{"Product":{"type":"object","properties":{"attributes":{"type":"array","items":{"type":"string"}},"name":{"type":"string"},"price":{"type":"string"},"url":{"type":"string"}},"title":"Product"},"ProductResponse":{"type":"object","properties":{"products":{"type":"array","items":{"$ref":"#/components/schemas/Product"}}},"title":"ProductResponse"}}}}" - `); - - expect(await tool.call({})).toMatch(/Usage Guide/); - - expect(await tool.call("")).toMatch(/OpenAPI Spec/); -}); diff --git a/libs/langchain-community/src/tools/tests/brave_search.int.test.ts b/libs/langchain-community/src/tools/tests/brave_search.int.test.ts deleted file mode 100644 index 8894e15bfb40..000000000000 --- a/libs/langchain-community/src/tools/tests/brave_search.int.test.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { test } from "@jest/globals"; -import { BraveSearch } from "../brave_search.js"; - -test.skip("BraveSearchTool", async () => { - const tool = new BraveSearch(); - - const result = await tool.call("What is Langchain?"); - - console.log({ result }); -}); diff --git a/libs/langchain-community/src/tools/tests/chain.test.ts b/libs/langchain-community/src/tools/tests/chain.test.ts deleted file mode 100644 index 8fd8571e83b6..000000000000 --- a/libs/langchain-community/src/tools/tests/chain.test.ts +++ /dev/null @@ -1,142 +0,0 @@ -import { test, expect, jest } from "@jest/globals"; - -import { ChainTool } from "../chain.js"; -import { LLMChain } from "../../chains/llm_chain.js"; -import { PromptTemplate } from "../../prompts/prompt.js"; -import { LLM } from "../../llms/base.js"; -import { VectorDBQAChain } from "../../chains/vector_db_qa.js"; -import { MemoryVectorStore } from "../../vectorstores/memory.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; - -class FakeLLM extends LLM { - _llmType() { - return "fake"; - } - - async _call(prompt: string): Promise { - return prompt; - } -} - -test("chain tool with llm chain and local callback", async () => { - const calls: string[] = []; - const handleToolStart = jest.fn(() => { - calls.push("tool start"); - }); - const handleToolEnd = jest.fn(() => { - calls.push("tool end"); - }); - const handleLLMStart = jest.fn(() => { - calls.push("llm start"); - }); - const handleLLMEnd = jest.fn(() => { - calls.push("llm end"); - }); - const handleChainStart = jest.fn(() => { - calls.push("chain start"); - }); - const handleChainEnd = jest.fn(() => { - calls.push("chain end"); - }); - - const chain = new LLMChain({ - llm: new FakeLLM({}), - prompt: PromptTemplate.fromTemplate("hello world"), - }); - const tool = new ChainTool({ chain, name: "fake", description: "fake" }); - const result = await tool.call("hi", [ - { - awaitHandlers: true, - handleToolStart, - handleToolEnd, - handleLLMStart, - handleLLMEnd, - handleChainStart, - handleChainEnd, - }, - ]); - expect(result).toMatchInlineSnapshot(`"hello world"`); - expect(handleToolStart).toBeCalledTimes(1); - expect(handleToolEnd).toBeCalledTimes(1); - expect(handleLLMStart).toBeCalledTimes(1); - expect(handleLLMEnd).toBeCalledTimes(1); - expect(handleChainStart).toBeCalledTimes(1); - expect(handleChainEnd).toBeCalledTimes(1); - expect(calls).toMatchInlineSnapshot(` - [ - "tool start", - "chain start", - "llm start", - "llm end", - "chain end", - "tool end", - ] - `); -}); - -test("chain tool with vectordbqa chain", async () => { - const calls: string[] = []; - const handleToolStart = jest.fn(() => { - calls.push("tool start"); - }); - const handleToolEnd = jest.fn(() => { - calls.push("tool end"); - }); - const handleLLMStart = jest.fn(() => { - calls.push("llm start"); - }); - const handleLLMEnd = jest.fn(() => { - calls.push("llm end"); - }); - const handleChainStart = jest.fn(() => { - calls.push("chain start"); - }); - const handleChainEnd = jest.fn(() => { - calls.push("chain end"); - }); - - const chain = VectorDBQAChain.fromLLM( - new FakeLLM({}), - await MemoryVectorStore.fromExistingIndex(new FakeEmbeddings()) - ); - const tool = new ChainTool({ chain, name: "fake", description: "fake" }); - const result = await tool.call("hi", [ - { - awaitHandlers: true, - handleToolStart, - handleToolEnd, - handleLLMStart, - handleLLMEnd, - handleChainStart, - handleChainEnd, - }, - ]); - expect(result).toMatchInlineSnapshot(` - "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. - - - - Question: hi - Helpful Answer:" - `); - expect(handleToolStart).toBeCalledTimes(1); - expect(handleToolEnd).toBeCalledTimes(1); - expect(handleLLMStart).toBeCalledTimes(1); - expect(handleLLMEnd).toBeCalledTimes(1); - expect(handleChainStart).toBeCalledTimes(3); - expect(handleChainEnd).toBeCalledTimes(3); - expect(calls).toMatchInlineSnapshot(` - [ - "tool start", - "chain start", - "chain start", - "chain start", - "llm start", - "llm end", - "chain end", - "chain end", - "chain end", - "tool end", - ] - `); -}); diff --git a/libs/langchain-community/src/tools/tests/gmail.test.ts b/libs/langchain-community/src/tools/tests/gmail.test.ts deleted file mode 100644 index e44b6f7fef36..000000000000 --- a/libs/langchain-community/src/tools/tests/gmail.test.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { jest, expect, describe } from "@jest/globals"; -import { GmailGetMessage } from "../gmail/get_message.js"; - -jest.mock("googleapis", () => ({ - google: { - auth: { - JWT: jest.fn().mockImplementation(() => ({})), - }, - }, -})); - -describe("GmailBaseTool using GmailGetMessage", () => { - it("should be setup with correct parameters", async () => { - const params = { - credentials: { - clientEmail: "test@email.com", - privateKey: "privateKey", - }, - scopes: ["gmail_scope1"], - }; - const instance = new GmailGetMessage(params); - expect(instance.name).toBe("gmail_get_message"); - }); - - it("should throw an error if both privateKey and keyfile are missing", async () => { - const params = { - credentials: {}, - scopes: ["gmail_scope1"], - }; - - expect(() => new GmailGetMessage(params)).toThrow(); - }); - - it("should throw error with only client_email", async () => { - const params = { - credentials: { - clientEmail: "client_email", - }, - }; - - expect(() => new GmailGetMessage(params)).toThrow(); - }); - - it("should throw error with only private_key", async () => { - const params = { - credentials: { - privateKey: "privateKey", - }, - }; - - expect(() => new GmailGetMessage(params)).toThrow(); - }); - - it("should throw error with only keyfile", async () => { - const params = { - credentials: { - keyfile: "keyfile", - }, - }; - - expect(() => new GmailGetMessage(params)).toThrow(); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/google_calendar.test.ts b/libs/langchain-community/src/tools/tests/google_calendar.test.ts deleted file mode 100644 index 78816006f9ef..000000000000 --- a/libs/langchain-community/src/tools/tests/google_calendar.test.ts +++ /dev/null @@ -1,110 +0,0 @@ -import { jest, expect, describe } from "@jest/globals"; -import { LLM } from "../../llms/base.js"; -import { - GoogleCalendarCreateTool, - GoogleCalendarViewTool, -} from "../google_calendar/index.js"; - -jest.mock("googleapis", () => ({ - google: { - auth: { - JWT: jest.fn().mockImplementation(() => ({})), - }, - }, -})); - -jest.mock("../../util/env.js", () => ({ - getEnvironmentVariable: () => "key", -})); - -// jest.mock("../google_calendar/commands/run-create-events.js", () => ({ -// runCreateEvent: jest.fn(), -// })); - -// jest.mock("../google_calendar/commands/run-view-events.js", () => ({ -// runViewEvents: jest.fn(), -// })); - -class FakeLLM extends LLM { - _llmType() { - return "fake"; - } - - async _call(prompt: string): Promise { - return prompt; - } -} - -describe("GoogleCalendarCreateTool", () => { - it("should be setup with correct parameters", async () => { - const params = { - credentials: { - clientEmail: "test@email.com", - privateKey: "privateKey", - calendarId: "calendarId", - }, - model: new FakeLLM({}), - }; - - const instance = new GoogleCalendarCreateTool(params); - expect(instance.name).toBe("google_calendar_create"); - }); - - it("should throw an error if missing credentials", async () => { - const params = { - credentials: {}, - model: new FakeLLM({}), - }; - expect(() => new GoogleCalendarCreateTool(params)).toThrow( - "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" - ); - }); - - it("should throw an error if missing model", async () => { - const params = { - credentials: { - clientEmail: "test", - }, - }; - expect(() => new GoogleCalendarCreateTool(params)).toThrow( - "Missing llm instance to interact with Google Calendar" - ); - }); -}); - -describe("GoogleCalendarViewTool", () => { - it("should be setup with correct parameters", async () => { - const params = { - credentials: { - clientEmail: "test@email.com", - privateKey: "privateKey", - calendarId: "calendarId", - }, - model: new FakeLLM({}), - }; - - const instance = new GoogleCalendarViewTool(params); - expect(instance.name).toBe("google_calendar_view"); - }); - - it("should throw an error if missing credentials", async () => { - const params = { - credentials: {}, - model: new FakeLLM({}), - }; - expect(() => new GoogleCalendarViewTool(params)).toThrow( - "Missing GOOGLE_CALENDAR_CLIENT_EMAIL to interact with Google Calendar" - ); - }); - - it("should throw an error if missing model", async () => { - const params = { - credentials: { - clientEmail: "test", - }, - }; - expect(() => new GoogleCalendarViewTool(params)).toThrow( - "Missing llm instance to interact with Google Calendar" - ); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts b/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts deleted file mode 100644 index 5ba1f005c7c9..000000000000 --- a/libs/langchain-community/src/tools/tests/google_custom_search.int.test.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { test } from "@jest/globals"; -import { GoogleCustomSearch } from "../google_custom_search.js"; - -test.skip("GoogleCustomSearchTool", async () => { - const tool = new GoogleCustomSearch(); - - const result = await tool.call("What is Langchain?"); - - console.log({ result }); -}); diff --git a/libs/langchain-community/src/tools/tests/google_places.int.test.ts b/libs/langchain-community/src/tools/tests/google_places.int.test.ts deleted file mode 100644 index a00bfaefc339..000000000000 --- a/libs/langchain-community/src/tools/tests/google_places.int.test.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { expect, describe } from "@jest/globals"; -import { GooglePlacesAPI } from "../google_places.js"; - -describe("GooglePlacesAPI", () => { - test("should be setup with correct parameters", async () => { - const instance = new GooglePlacesAPI(); - expect(instance.name).toBe("google_places"); - }); - - test("GooglePlacesAPI returns expected result for valid query", async () => { - const tool = new GooglePlacesAPI(); - - const result = await tool.call("EatonCenter"); - - expect(result).toContain("220 Yonge St"); - expect(result).toContain("CF Toronto Eaton Centre"); - }); - - test("GooglePlacesAPI returns '' for query on an non-existent place", async () => { - const tool = new GooglePlacesAPI(); - - const result = await tool.call("ihfwehnwfi"); - - expect(result).toContain(""); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/searchapi.test.ts b/libs/langchain-community/src/tools/tests/searchapi.test.ts deleted file mode 100644 index af90291b0cec..000000000000 --- a/libs/langchain-community/src/tools/tests/searchapi.test.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { SearchApi } from "../../tools/searchapi.js"; - -describe("SearchApi test suite", () => { - class SearchApiUrlTester extends SearchApi { - testThisUrl(): string { - return this.buildUrl("Query"); - } - } - - test("Test default url", async () => { - const searchApi = new SearchApiUrlTester("ApiKey", { - hl: "en", - gl: "us", - }); - expect(searchApi.testThisUrl()).toEqual( - "https://www.searchapi.io/api/v1/search?engine=google&api_key=ApiKey&hl=en&gl=us&q=Query" - ); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/serpapi.test.ts b/libs/langchain-community/src/tools/tests/serpapi.test.ts deleted file mode 100644 index f40f0ab2846f..000000000000 --- a/libs/langchain-community/src/tools/tests/serpapi.test.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { SerpAPI } from "../../tools/serpapi.js"; - -describe("serp api test suite", () => { - class SerpApiUrlTester extends SerpAPI { - testThisUrl(): string { - return this.buildUrl("search", this.params, this.baseUrl); - } - } - - test("Test default url", async () => { - const serpApi = new SerpApiUrlTester( - "Not a real key but constructor error if not set", - { - hl: "en", - gl: "us", - } - ); - expect(serpApi.testThisUrl()).toEqual( - "https://serpapi.com/search?hl=en&gl=us" - ); - }); - - test("Test override url", async () => { - const serpApiProxied = new SerpApiUrlTester( - "Not a real key but constructor error if not set", - { - gl: "us", - }, - "https://totallyProxied.com" - ); - - expect( - serpApiProxied.testThisUrl() === "https://totallyProxied.com/search?gl=us" - ); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts b/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts deleted file mode 100644 index 13c736de8ec9..000000000000 --- a/libs/langchain-community/src/tools/tests/webbrowser.int.test.ts +++ /dev/null @@ -1,124 +0,0 @@ -import { test, expect, describe } from "@jest/globals"; -import { WebBrowser } from "../webbrowser.js"; -import { ChatOpenAI } from "../../chat_models/openai.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import fetchAdapter from "../../util/axios-fetch-adapter.js"; - -describe("webbrowser Test suite", () => { - test("get word of the day", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://www.merriam-webster.com/word-of-the-day","word of the day"` - ); - - expect(result).toContain("Word of the Day:"); - }); - - test("get a summary of the page when empty request with fetch adapter", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ - model, - embeddings, - axiosConfig: { - adapter: fetchAdapter, - }, - }); - const result = await browser.call( - `"https://www.merriam-webster.com/word-of-the-day",""` - ); - - // fuzzy, sometimes its capped and others not - expect(result).toMatch(/word of the day/i); - }); - - test("error no url", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call(`"",""`); - - expect(result).toEqual("TypeError [ERR_INVALID_URL]: Invalid URL"); - }); - - test("error no protocol or malformed", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"www.merriam-webster.com/word-of-the-day","word of the day"` - ); - - expect(result).toEqual("TypeError [ERR_INVALID_URL]: Invalid URL"); - }); - - test("error bad site", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://www.hDjRBKoAD0EIbF29TWM4rbXDGGM5Nhy4uzNEAdDS.com","word of the day"` - ); - - expect(result).toEqual( - "Error: getaddrinfo ENOTFOUND www.hdjrbkoad0eibf29twm4rbxdggm5nhy4uzneadds.com" - ); - }); - - test.skip("get a summary of a page that detects scraping", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://www.musicgateway.com/spotify-pre-save",""` - ); - - expect(result).not.toEqual("Error: http response 403"); - }); - - // cant we figure the headers to fix this? - test.skip("get a summary of a page that detects scraping 2", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://parade.com/991228/marynliles/couples-goals",""` - ); - expect(result).not.toEqual("Error: http response 403"); - }); - - test("get a summary of a page that rejects unauthorized", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://firstround.com/review/how-to-fix-the-co-founder-fights-youre-sick-of-having-lessons-from-couples-therapist-esther-perel",""` - ); - - expect(result).toContain("Esther Perel"); - }); - - // other urls that have done this too - // "https://wsimag.com/economy-and-politics/15473-power-and-money", - // "https://thriveglobal.com/stories/sleep-what-to-do-what-not-to-do", - test.skip("get a summary of a page that redirects too many times", async () => { - const model = new ChatOpenAI({ temperature: 0 }); - const embeddings = new OpenAIEmbeddings(); - - const browser = new WebBrowser({ model, embeddings }); - const result = await browser.call( - `"https://www.healtheuropa.eu/why-mdma-must-be-reclassified-as-a-schedule-2-drug/95780",""` - ); - expect(result).toContain("Beckley Foundation"); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/webbrowser.test.ts b/libs/langchain-community/src/tools/tests/webbrowser.test.ts deleted file mode 100644 index c950e5fac592..000000000000 --- a/libs/langchain-community/src/tools/tests/webbrowser.test.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { test, expect, describe } from "@jest/globals"; -import { readFileSync } from "fs"; -import { getText, parseInputs } from "../webbrowser.js"; - -describe("webbrowser Test suite", () => { - const html = readFileSync("./src/tools/fixtures/wordoftheday.html", "utf8"); - - test("parse html to text and links", async () => { - const baseUrl = "https://www.merriam-webster.com/word-of-the-day"; - const text = getText(html, baseUrl, false); - expect(text).toContain("Word of the Day: Foible"); - }); - - test("parseInputs", () => { - expect( - parseInputs(`"https://www.merriam-webster.com/word-of-the-day",""`) - ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); - expect( - parseInputs( - `"https://www.merriam-webster.com/word-of-the-day","word of the day"` - ) - ).toEqual([ - "https://www.merriam-webster.com/word-of-the-day", - "word of the day", - ]); - expect( - parseInputs(`"https://www.merriam-webster.com/word-of-the-day","`) - ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); - expect( - parseInputs(`"https://www.merriam-webster.com/word-of-the-day",`) - ).toEqual(["https://www.merriam-webster.com/word-of-the-day", ""]); - expect( - parseInputs(`"https://www.merriam-webster.com/word-of-the-day"`) - ).toEqual(["https://www.merriam-webster.com/word-of-the-day", undefined]); - }); -}); diff --git a/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts b/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts deleted file mode 100644 index 73c7bbdc8bef..000000000000 --- a/libs/langchain-community/src/tools/tests/wikipedia.int.test.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { WikipediaQueryRun } from "../wikipedia_query_run.js"; - -test.skip("WikipediaQueryRunTool returns a string for valid query", async () => { - const tool = new WikipediaQueryRun(); - const result = await tool.call("Langchain"); - expect(typeof result).toBe("string"); -}); - -test.skip("WikipediaQueryRunTool returns non-empty string for valid query", async () => { - const tool = new WikipediaQueryRun(); - const result = await tool.call("Langchain"); - console.log(result); - expect(result).not.toBe(""); -}); - -test.skip("WikipediaQueryRunTool returns 'No good Wikipedia Search Result was found' for bad query", async () => { - const tool = new WikipediaQueryRun(); - const result = await tool.call("kjdsfklfjskladjflkdsajflkadsjf"); - console.log(result); - expect(result).toBe("No good Wikipedia Search Result was found"); -}); diff --git a/libs/langchain-community/src/tools/tests/wolframalpha.test.ts b/libs/langchain-community/src/tools/tests/wolframalpha.test.ts deleted file mode 100644 index 2c2b7f7fd297..000000000000 --- a/libs/langchain-community/src/tools/tests/wolframalpha.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { jest, afterEach, beforeEach, describe, expect } from "@jest/globals"; -import { WolframAlphaTool } from "../wolframalpha.js"; - -const MOCK_APP_ID = "[MOCK_APP_ID]"; -const QUERY_1 = "What is 2 + 2?"; -const MOCK_ANSWER = "[MOCK_ANSWER]"; - -describe("wolfram alpha test suite", () => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let fetchMock: any; - - beforeEach(() => { - fetchMock = jest.spyOn(global, "fetch").mockImplementation( - async () => - ({ - text: () => Promise.resolve(MOCK_ANSWER), - } as Response) - ); - }); - - afterEach(() => { - fetchMock.mockRestore(); - }); - - test("test query parameters passed correctly", async () => { - const wolframAlpha = new WolframAlphaTool({ - appid: MOCK_APP_ID, - }); - await wolframAlpha._call(QUERY_1); - const [url] = fetchMock.mock.calls[0]; - const parsedUrl = new URL(url); - const params = new URLSearchParams(parsedUrl.search); - - expect(fetchMock).toBeCalledTimes(1); - expect(params.get("appid")).toBe(MOCK_APP_ID); - expect(params.get("input")).toBe(QUERY_1); - }); - - test("test answer retrieved", async () => { - const wolframAlpha = new WolframAlphaTool({ - appid: MOCK_APP_ID, - }); - - const answer = await wolframAlpha._call(QUERY_1); - expect(answer).toBe(MOCK_ANSWER); - }); -}); diff --git a/libs/langchain-community/src/types/openai-types.ts b/libs/langchain-community/src/types/openai-types.ts index f3df0278a6a9..cf4aa3f80665 100644 --- a/libs/langchain-community/src/types/openai-types.ts +++ b/libs/langchain-community/src/types/openai-types.ts @@ -1,7 +1,7 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { TiktokenModel } from "js-tiktoken/lite"; -import { BaseLanguageModelCallOptions } from "../base_language/index.js"; +import { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; // reexport this type from the included package so we can easily override and extend it if needed in the future // also makes it easier for folks to import this type without digging around into the dependent packages diff --git a/libs/langchain-community/src/util/chunk.ts b/libs/langchain-community/src/util/chunk.ts new file mode 100644 index 000000000000..340ce0a46604 --- /dev/null +++ b/libs/langchain-community/src/util/chunk.ts @@ -0,0 +1,8 @@ +export const chunkArray = (arr: T[], chunkSize: number) => + arr.reduce((chunks, elem, index) => { + const chunkIndex = Math.floor(index / chunkSize); + const chunk = chunks[chunkIndex] || []; + // eslint-disable-next-line no-param-reassign + chunks[chunkIndex] = chunk.concat([elem]); + return chunks; + }, [] as T[][]); diff --git a/libs/langchain-community/src/util/convex.ts b/libs/langchain-community/src/util/convex.ts new file mode 100644 index 000000000000..8638aa9d04d4 --- /dev/null +++ b/libs/langchain-community/src/util/convex.ts @@ -0,0 +1,82 @@ +/* eslint-disable spaced-comment */ + +// eslint-disable-next-line import/no-extraneous-dependencies +import { + internalQueryGeneric as internalQuery, + internalMutationGeneric as internalMutation, +} from "convex/server"; +// eslint-disable-next-line import/no-extraneous-dependencies +import { GenericId, v } from "convex/values"; + +export const get = /*#__PURE__*/ internalQuery({ + args: { + id: /*#__PURE__*/ v.string(), + }, + handler: async (ctx, args) => { + const result = await ctx.db.get(args.id as GenericId); + return result; + }, +}); + +export const insert = /*#__PURE__*/ internalMutation({ + args: { + table: /*#__PURE__*/ v.string(), + document: /*#__PURE__*/ v.any(), + }, + handler: async (ctx, args) => { + await ctx.db.insert(args.table, args.document); + }, +}); + +export const lookup = /*#__PURE__*/ internalQuery({ + args: { + table: /*#__PURE__*/ v.string(), + index: /*#__PURE__*/ v.string(), + keyField: /*#__PURE__*/ v.string(), + key: /*#__PURE__*/ v.string(), + }, + handler: async (ctx, args) => { + const result = await ctx.db + .query(args.table) + .withIndex(args.index, (q) => q.eq(args.keyField, args.key)) + .collect(); + return result; + }, +}); + +export const upsert = /*#__PURE__*/ internalMutation({ + args: { + table: /*#__PURE__*/ v.string(), + index: /*#__PURE__*/ v.string(), + keyField: /*#__PURE__*/ v.string(), + key: /*#__PURE__*/ v.string(), + document: /*#__PURE__*/ v.any(), + }, + handler: async (ctx, args) => { + const existing = await ctx.db + .query(args.table) + .withIndex(args.index, (q) => q.eq(args.keyField, args.key)) + .unique(); + if (existing !== null) { + await ctx.db.replace(existing._id, args.document); + } else { + await ctx.db.insert(args.table, args.document); + } + }, +}); + +export const deleteMany = /*#__PURE__*/ internalMutation({ + args: { + table: /*#__PURE__*/ v.string(), + index: /*#__PURE__*/ v.string(), + keyField: /*#__PURE__*/ v.string(), + key: /*#__PURE__*/ v.string(), + }, + handler: async (ctx, args) => { + const existing = await ctx.db + .query(args.table) + .withIndex(args.index, (q) => q.eq(args.keyField, args.key)) + .collect(); + await Promise.all(existing.map((doc) => ctx.db.delete(doc._id))); + }, +}); diff --git a/libs/langchain-community/src/util/googlevertexai-connection.ts b/libs/langchain-community/src/util/googlevertexai-connection.ts index ee4f83de7e59..96ed10ebca5c 100644 --- a/libs/langchain-community/src/util/googlevertexai-connection.ts +++ b/libs/langchain-community/src/util/googlevertexai-connection.ts @@ -1,5 +1,6 @@ -import { BaseLanguageModelCallOptions } from "../base_language/index.js"; -import { AsyncCaller, AsyncCallerCallOptions } from "./async_caller.js"; +import { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; +import { AsyncCaller, AsyncCallerCallOptions } from "@langchain/core/utils/async_caller"; +import { GenerationChunk } from "@langchain/core/outputs"; import type { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, @@ -11,7 +12,6 @@ import type { GoogleAbstractedClientOps, GoogleAbstractedClientOpsMethod, } from "../types/googlevertexai-types.js"; -import { GenerationChunk } from "../schema/index.js"; export abstract class GoogleConnection< CallOptions extends AsyncCallerCallOptions, diff --git a/libs/langchain-community/src/util/googlevertexai-webauth.ts b/libs/langchain-community/src/util/googlevertexai-webauth.ts index 87168fbaa1b0..e5a5f39ff11a 100644 --- a/libs/langchain-community/src/util/googlevertexai-webauth.ts +++ b/libs/langchain-community/src/util/googlevertexai-webauth.ts @@ -3,7 +3,7 @@ import { getCredentials, Credentials, } from "web-auth-library/google"; -import { getEnvironmentVariable } from "./env.js"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import type { GoogleAbstractedClient, GoogleAbstractedClientOps, diff --git a/libs/langchain-community/src/util/momento.ts b/libs/langchain-community/src/util/momento.ts new file mode 100644 index 000000000000..2ef4666ed34e --- /dev/null +++ b/libs/langchain-community/src/util/momento.ts @@ -0,0 +1,26 @@ +/* eslint-disable no-instanceof/no-instanceof */ +import { ICacheClient, CreateCache } from "@gomomento/sdk"; + +/** + * Utility function to ensure that a Momento cache exists. + * If the cache does not exist, it is created. + * + * @param client The Momento cache client. + * @param cacheName The name of the cache to ensure exists. + */ +export async function ensureCacheExists( + client: ICacheClient, + cacheName: string +): Promise { + const createResponse = await client.createCache(cacheName); + if ( + createResponse instanceof CreateCache.Success || + createResponse instanceof CreateCache.AlreadyExists + ) { + // pass + } else if (createResponse instanceof CreateCache.Error) { + throw createResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${createResponse.toString()}`); + } +} diff --git a/libs/langchain-community/src/util/testing.ts b/libs/langchain-community/src/util/testing.ts new file mode 100644 index 000000000000..205ebc941f5b --- /dev/null +++ b/libs/langchain-community/src/util/testing.ts @@ -0,0 +1,107 @@ +import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; + +/** + * A class that provides fake embeddings by overriding the embedDocuments + * and embedQuery methods to return fixed values. + */ +export class FakeEmbeddings extends Embeddings { + constructor(params?: EmbeddingsParams) { + super(params ?? {}); + } + + /** + * Generates fixed embeddings for a list of documents. + * @param documents List of documents to generate embeddings for. + * @returns A promise that resolves with a list of fixed embeddings for each document. + */ + embedDocuments(documents: string[]): Promise { + return Promise.resolve(documents.map(() => [0.1, 0.2, 0.3, 0.4])); + } + + /** + * Generates a fixed embedding for a query. + * @param _ The query to generate an embedding for. + * @returns A promise that resolves with a fixed embedding for the query. + */ + embedQuery(_: string): Promise { + return Promise.resolve([0.1, 0.2, 0.3, 0.4]); + } +} + +/** + * An interface that defines additional parameters specific to the + * SyntheticEmbeddings class. + */ +interface SyntheticEmbeddingsParams extends EmbeddingsParams { + vectorSize: number; +} + +/** + * A class that provides synthetic embeddings by overriding the + * embedDocuments and embedQuery methods to generate embeddings based on + * the input documents. The embeddings are generated by converting each + * document into chunks, calculating a numerical value for each chunk, and + * returning an array of these values as the embedding. + */ +export class SyntheticEmbeddings + extends Embeddings + implements SyntheticEmbeddingsParams +{ + vectorSize: number; + + constructor(params?: SyntheticEmbeddingsParams) { + super(params ?? {}); + this.vectorSize = params?.vectorSize ?? 4; + } + + /** + * Generates synthetic embeddings for a list of documents. + * @param documents List of documents to generate embeddings for. + * @returns A promise that resolves with a list of synthetic embeddings for each document. + */ + async embedDocuments(documents: string[]): Promise { + return Promise.all(documents.map((doc) => this.embedQuery(doc))); + } + + /** + * Generates a synthetic embedding for a document. The document is + * converted into chunks, a numerical value is calculated for each chunk, + * and an array of these values is returned as the embedding. + * @param document The document to generate an embedding for. + * @returns A promise that resolves with a synthetic embedding for the document. + */ + async embedQuery(document: string): Promise { + let doc = document; + + // Only use the letters (and space) from the document, and make them lower case + doc = doc.toLowerCase().replaceAll(/[^a-z ]/g, ""); + + // Pad the document to make sure it has a divisible number of chunks + const padMod = doc.length % this.vectorSize; + const padGapSize = padMod === 0 ? 0 : this.vectorSize - padMod; + const padSize = doc.length + padGapSize; + doc = doc.padEnd(padSize, " "); + + // Break it into chunks + const chunkSize = doc.length / this.vectorSize; + const docChunk = []; + for (let co = 0; co < doc.length; co += chunkSize) { + docChunk.push(doc.slice(co, co + chunkSize)); + } + + // Turn each chunk into a number + const ret: number[] = docChunk.map((s) => { + let sum = 0; + // Get a total value by adding the value of each character in the string + for (let co = 0; co < s.length; co += 1) { + sum += s === " " ? 0 : s.charCodeAt(co); + } + // Reduce this to a number between 0 and 25 inclusive + // Then get the fractional number by dividing it by 26 + const ret = (sum % 26) / 26; + return ret; + }); + + return ret; + } +} diff --git a/libs/langchain-community/src/util/time.ts b/libs/langchain-community/src/util/time.ts new file mode 100644 index 000000000000..f6f5263e4722 --- /dev/null +++ b/libs/langchain-community/src/util/time.ts @@ -0,0 +1,10 @@ +/** + * Sleep for a given amount of time. + * @param ms - The number of milliseconds to sleep for. Defaults to 1000. + * @returns A promise that resolves when the sleep is complete. + */ +export async function sleep(ms = 1000): Promise { + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); +} diff --git a/libs/langchain-community/src/vectorstores.ts b/libs/langchain-community/src/vectorstores.ts deleted file mode 100644 index 27c83543801b..000000000000 --- a/libs/langchain-community/src/vectorstores.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { Embeddings } from "@langchain/core/embeddings"; -import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents"; - -/** - * Database config for your vectorstore. - */ -export interface VectorstoreIntegrationParams {} - -/** - * Class for managing and operating vector search applications with - * Tigris, an open-source Serverless NoSQL Database and Search Platform. - */ -export class VectorstoreIntegration extends VectorStore { - // Replace - _vectorstoreType(): string { - return "vectorstore_integration"; - } - - constructor(embeddings: Embeddings, params: VectorstoreIntegrationParams) { - super(embeddings, params); - this.embeddings = embeddings; - } - - /** - * Method to add an array of documents to the vectorstore. - * - * Useful to override in case your vectorstore doesn't work directly with embeddings. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] } | string[] - ): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method to add raw vectors to the vectorstore. - */ - async addVectors( - _vectors: number[][], - _documents: Document[], - _options?: { ids?: string[] } | string[] - ) { - throw new Error("Not implemented."); - } - - /** - * Method to perform a similarity search over the vectorstore and return - * the k most similar vectors along with their similarity scores. - */ - async similaritySearchVectorWithScore( - _query: number[], - _k: number, - _filter?: object - ): Promise<[Document, number][]> { - throw new Error("Not implemented."); - } - - /** - * Static method to create a new instance of the vectorstore from an - * array of Document instances. - * - * Other common static initializer names are fromExistingIndex, initialize, and fromTexts. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: VectorstoreIntegrationParams - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} diff --git a/libs/langchain-community/src/vectorstores/analyticdb.ts b/libs/langchain-community/src/vectorstores/analyticdb.ts new file mode 100644 index 000000000000..bd4f5caffabe --- /dev/null +++ b/libs/langchain-community/src/vectorstores/analyticdb.ts @@ -0,0 +1,390 @@ +import * as uuid from "uuid"; +import pg, { Pool, PoolConfig } from "pg"; +import { from as copyFrom } from "pg-copy-streams"; +import { pipeline } from "node:stream/promises"; +import { Readable } from "node:stream"; + +import { VectorStore } from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; + +const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document"; + +/** + * Interface defining the arguments required to create an instance of + * `AnalyticDBVectorStore`. + */ +export interface AnalyticDBArgs { + connectionOptions: PoolConfig; + embeddingDimension?: number; + collectionName?: string; + preDeleteCollection?: boolean; +} + +/** + * Interface defining the structure of data to be stored in the + * AnalyticDB. + */ +interface DataType { + id: string; + embedding: number[]; + document: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadata: Record; +} + +/** + * Class that provides methods for creating and managing a collection of + * documents in an AnalyticDB, adding documents or vectors to the + * collection, performing similarity search on vectors, and creating an + * instance of `AnalyticDBVectorStore` from texts or documents. + */ +export class AnalyticDBVectorStore extends VectorStore { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + declare FilterType: Record; + + private pool: Pool; + + private embeddingDimension?: number; + + private collectionName: string; + + private preDeleteCollection: boolean; + + private isCreateCollection = false; + + _vectorstoreType(): string { + return "analyticdb"; + } + + constructor(embeddings: Embeddings, args: AnalyticDBArgs) { + super(embeddings, args); + + this.pool = new pg.Pool({ + host: args.connectionOptions.host, + port: args.connectionOptions.port, + database: args.connectionOptions.database, + user: args.connectionOptions.user, + password: args.connectionOptions.password, + }); + this.embeddingDimension = args.embeddingDimension; + this.collectionName = + args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME; + this.preDeleteCollection = args.preDeleteCollection || false; + } + + /** + * Closes all the clients in the pool and terminates the pool. + * @returns Promise that resolves when all clients are closed and the pool is terminated. + */ + async end(): Promise { + return this.pool.end(); + } + + /** + * Creates a new table in the database if it does not already exist. The + * table is created with columns for id, embedding, document, and + * metadata. An index is also created on the embedding column if it does + * not already exist. + * @returns Promise that resolves when the table and index are created. + */ + async createTableIfNotExists(): Promise { + if (!this.embeddingDimension) { + this.embeddingDimension = ( + await this.embeddings.embedQuery("test") + ).length; + } + const client = await this.pool.connect(); + try { + await client.query("BEGIN"); + // Create the table if it doesn't exist + await client.query(` + CREATE TABLE IF NOT EXISTS ${this.collectionName} ( + id TEXT PRIMARY KEY DEFAULT NULL, + embedding REAL[], + document TEXT, + metadata JSON + ); + `); + + // Check if the index exists + const indexName = `${this.collectionName}_embedding_idx`; + const indexQuery = ` + SELECT 1 + FROM pg_indexes + WHERE indexname = '${indexName}'; + `; + const result = await client.query(indexQuery); + + // Create the index if it doesn't exist + if (result.rowCount === 0) { + const indexStatement = ` + CREATE INDEX ${indexName} + ON ${this.collectionName} USING ann(embedding) + WITH ( + "dim" = ${this.embeddingDimension}, + "hnsw_m" = 100 + ); + `; + await client.query(indexStatement); + } + await client.query("COMMIT"); + } catch (err) { + await client.query("ROLLBACK"); + throw err; + } finally { + client.release(); + } + } + + /** + * Deletes the collection from the database if it exists. + * @returns Promise that resolves when the collection is deleted. + */ + async deleteCollection(): Promise { + const dropStatement = `DROP TABLE IF EXISTS ${this.collectionName};`; + await this.pool.query(dropStatement); + } + + /** + * Creates a new collection in the database. If `preDeleteCollection` is + * true, any existing collection with the same name is deleted before the + * new collection is created. + * @returns Promise that resolves when the collection is created. + */ + async createCollection(): Promise { + if (this.preDeleteCollection) { + await this.deleteCollection(); + } + await this.createTableIfNotExists(); + this.isCreateCollection = true; + } + + /** + * Adds an array of documents to the collection. The documents are first + * converted to vectors using the `embedDocuments` method of the + * `embeddings` instance. + * @param documents Array of Document instances to be added to the collection. + * @returns Promise that resolves when the documents are added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Adds an array of vectors and corresponding documents to the collection. + * The vectors and documents are batch inserted into the database. + * @param vectors Array of vectors to be added to the collection. + * @param documents Array of Document instances corresponding to the vectors. + * @returns Promise that resolves when the vectors and documents are added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + if (vectors.length !== documents.length) { + throw new Error(`Vectors and documents must have the same length`); + } + if (!this.embeddingDimension) { + this.embeddingDimension = ( + await this.embeddings.embedQuery("test") + ).length; + } + if (vectors[0].length !== this.embeddingDimension) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${this.embeddingDimension})` + ); + } + + if (!this.isCreateCollection) { + await this.createCollection(); + } + + const client = await this.pool.connect(); + try { + const chunkSize = 500; + const chunksTableData: DataType[] = []; + + for (let i = 0; i < documents.length; i += 1) { + chunksTableData.push({ + id: uuid.v4(), + embedding: vectors[i], + document: documents[i].pageContent, + metadata: documents[i].metadata, + }); + + // Execute the batch insert when the batch size is reached + if (chunksTableData.length === chunkSize) { + const rs = new Readable(); + let currentIndex = 0; + rs._read = function () { + if (currentIndex === chunkSize) { + rs.push(null); + } else { + const data = chunksTableData[currentIndex]; + rs.push( + `${data.id}\t{${data.embedding.join(",")}}\t${ + data.document + }\t${JSON.stringify(data.metadata)}\n` + ); + currentIndex += 1; + } + }; + const ws = client.query( + copyFrom( + `COPY ${this.collectionName}(id, embedding, document, metadata) FROM STDIN` + ) + ); + + await pipeline(rs, ws); + // Clear the chunksTableData list for the next batch + chunksTableData.length = 0; + } + } + + // Insert any remaining records that didn't make up a full batch + if (chunksTableData.length > 0) { + const rs = new Readable(); + let currentIndex = 0; + rs._read = function () { + if (currentIndex === chunksTableData.length) { + rs.push(null); + } else { + const data = chunksTableData[currentIndex]; + rs.push( + `${data.id}\t{${data.embedding.join(",")}}\t${ + data.document + }\t${JSON.stringify(data.metadata)}\n` + ); + currentIndex += 1; + } + }; + const ws = client.query( + copyFrom( + `COPY ${this.collectionName}(id, embedding, document, metadata) FROM STDIN` + ) + ); + await pipeline(rs, ws); + } + } finally { + client.release(); + } + } + + /** + * Performs a similarity search on the vectors in the collection. The + * search is performed using the given query vector and returns the top k + * most similar vectors along with their corresponding documents and + * similarity scores. + * @param query Query vector for the similarity search. + * @param k Number of top similar vectors to return. + * @param filter Optional. Filter to apply on the metadata of the documents. + * @returns Promise that resolves to an array of tuples, each containing a Document instance and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + if (!this.isCreateCollection) { + await this.createCollection(); + } + + let filterCondition = ""; + const filterEntries = filter ? Object.entries(filter) : []; + if (filterEntries.length > 0) { + const conditions = filterEntries.map( + (_, index) => `metadata->>$${2 * index + 3} = $${2 * index + 4}` + ); + filterCondition = `WHERE ${conditions.join(" AND ")}`; + } + + const sqlQuery = ` + SELECT *, l2_distance(embedding, $1::real[]) AS distance + FROM ${this.collectionName} + ${filterCondition} + ORDER BY embedding <-> $1 + LIMIT $2; + `; + + // Execute the query and fetch the results + const { rows } = await this.pool.query(sqlQuery, [ + query, + k, + ...filterEntries.flatMap(([key, value]) => [key, value]), + ]); + + const result: [Document, number][] = rows.map((row) => [ + new Document({ pageContent: row.document, metadata: row.metadata }), + row.distance, + ]); + + return result; + } + + /** + * Creates an instance of `AnalyticDBVectorStore` from an array of texts + * and corresponding metadata. The texts are first converted to Document + * instances before being added to the collection. + * @param texts Array of texts to be added to the collection. + * @param metadatas Array or object of metadata corresponding to the texts. + * @param embeddings Embeddings instance used to convert the texts to vectors. + * @param dbConfig Configuration for the AnalyticDB. + * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: AnalyticDBArgs + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return AnalyticDBVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates an instance of `AnalyticDBVectorStore` from an array of + * Document instances. The documents are added to the collection. + * @param docs Array of Document instances to be added to the collection. + * @param embeddings Embeddings instance used to convert the documents to vectors. + * @param dbConfig Configuration for the AnalyticDB. + * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: AnalyticDBArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Creates an instance of `AnalyticDBVectorStore` from an existing index + * in the database. A new collection is created in the database. + * @param embeddings Embeddings instance used to convert the documents to vectors. + * @param dbConfig Configuration for the AnalyticDB. + * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: AnalyticDBArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.createCollection(); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/cassandra.ts b/libs/langchain-community/src/vectorstores/cassandra.ts new file mode 100644 index 000000000000..4a90a38e56cd --- /dev/null +++ b/libs/langchain-community/src/vectorstores/cassandra.ts @@ -0,0 +1,581 @@ +/* eslint-disable prefer-template */ +import { Client as CassandraClient, DseClientOptions } from "cassandra-driver"; + +import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; + +export interface Column { + type: string; + name: string; + partition?: boolean; +} + +export interface Index { + name: string; + value: string; +} + +export interface Filter { + name: string; + value: unknown; + operator?: string; +} + +export type WhereClause = Filter[] | Filter | Record; + +export type SupportedVectorTypes = "cosine" | "dot_product" | "euclidean"; + +export interface CassandraLibArgs extends DseClientOptions, AsyncCallerParams { + table: string; + keyspace: string; + vectorType?: SupportedVectorTypes; + dimensions: number; + primaryKey: Column | Column[]; + metadataColumns: Column[]; + withClause?: string; + indices?: Index[]; + batchSize?: number; +} + +/** + * Class for interacting with the Cassandra database. It extends the + * VectorStore class and provides methods for adding vectors and + * documents, searching for similar vectors, and creating instances from + * texts or documents. + */ +export class CassandraStore extends VectorStore { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + declare FilterType: WhereClause; + + private client: CassandraClient; + + private readonly vectorType: SupportedVectorTypes; + + private readonly dimensions: number; + + private readonly keyspace: string; + + private primaryKey: Column[]; + + private metadataColumns: Column[]; + + private withClause: string; + + private selectColumns: string; + + private readonly table: string; + + private indices: Index[]; + + private isInitialized = false; + + asyncCaller: AsyncCaller; + + private readonly batchSize: number; + + _vectorstoreType(): string { + return "cassandra"; + } + + constructor(embeddings: Embeddings, args: CassandraLibArgs) { + super(embeddings, args); + + const { + indices = [], + maxConcurrency = 25, + withClause = "", + batchSize = 1, + vectorType = "cosine", + dimensions, + keyspace, + table, + primaryKey, + metadataColumns, + } = args; + + const argsWithDefaults = { + ...args, + indices, + maxConcurrency, + withClause, + batchSize, + vectorType, + }; + this.asyncCaller = new AsyncCaller(argsWithDefaults); + this.client = new CassandraClient(argsWithDefaults); + + // Assign properties + this.vectorType = vectorType; + this.dimensions = dimensions; + this.keyspace = keyspace; + this.table = table; + this.primaryKey = Array.isArray(primaryKey) ? primaryKey : [primaryKey]; + this.metadataColumns = metadataColumns; + this.withClause = withClause.trim().replace(/^with\s*/i, ""); + this.indices = indices; + this.batchSize = batchSize >= 1 ? batchSize : 1; + } + + /** + * Method to save vectors to the Cassandra database. + * @param vectors Vectors to save. + * @param documents The documents associated with the vectors. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + + if (!this.isInitialized) { + await this.initialize(); + } + + await this.insertAll(vectors, documents); + } + + /** + * Method to add documents to the Cassandra database. + * @param documents The documents to add. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + return this.addVectors( + await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), + documents + ); + } + + /** + * Method to search for vectors that are similar to a given query vector. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter + * @returns Promise that resolves with an array of tuples, each containing a Document and a score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: WhereClause + ): Promise<[Document, number][]> { + if (!this.isInitialized) { + await this.initialize(); + } + + // Ensure we have an array of Filter from the public interface + const filters = this.asFilters(filter); + + const queryStr = this.buildSearchQuery(filters); + + // Search query will be of format: + // SELECT ..., text, similarity_x(?) AS similarity_score + // FROM ... + // + // ORDER BY vector ANN OF ? + // LIMIT ? + // If any filter values are specified, they will be in the WHERE clause as + // filter.name filter.operator ? + // queryParams is a list of bind variables sent with the prepared statement + const queryParams = []; + const vectorAsFloat32Array = new Float32Array(query); + queryParams.push(vectorAsFloat32Array); + if (filters) { + const values = (filters as Filter[]).map(({ value }) => value); + queryParams.push(...values); + } + queryParams.push(vectorAsFloat32Array); + queryParams.push(k); + + const queryResultSet = await this.client.execute(queryStr, queryParams, { + prepare: true, + }); + + return queryResultSet?.rows.map((row) => { + const textContent = row.text; + const sanitizedRow = { ...row }; + delete sanitizedRow.text; + delete sanitizedRow.similarity_score; + + // A null value in Cassandra evaluates to a deleted column + // as this is treated as a tombstone record for the cell. + Object.keys(sanitizedRow).forEach((key) => { + if (sanitizedRow[key] === null) { + delete sanitizedRow[key]; + } + }); + + return [ + new Document({ pageContent: textContent, metadata: sanitizedRow }), + row.similarity_score, + ]; + }); + } + + /** + * Static method to create an instance of CassandraStore from texts. + * @param texts The texts to use. + * @param metadatas The metadata associated with the texts. + * @param embeddings The embeddings to use. + * @param args The arguments for the CassandraStore. + * @returns Promise that resolves with a new instance of CassandraStore. + */ + static async fromTexts( + texts: string[], + metadatas: object | object[], + embeddings: Embeddings, + args: CassandraLibArgs + ): Promise { + const docs: Document[] = []; + + for (let index = 0; index < texts.length; index += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[index] : metadatas; + const doc = new Document({ + pageContent: texts[index], + metadata, + }); + docs.push(doc); + } + + return CassandraStore.fromDocuments(docs, embeddings, args); + } + + /** + * Static method to create an instance of CassandraStore from documents. + * @param docs The documents to use. + * @param embeddings The embeddings to use. + * @param args The arguments for the CassandraStore. + * @returns Promise that resolves with a new instance of CassandraStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args: CassandraLibArgs + ): Promise { + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create an instance of CassandraStore from an existing + * index. + * @param embeddings The embeddings to use. + * @param args The arguments for the CassandraStore. + * @returns Promise that resolves with a new instance of CassandraStore. + */ + static async fromExistingIndex( + embeddings: Embeddings, + args: CassandraLibArgs + ): Promise { + const instance = new this(embeddings, args); + + await instance.initialize(); + return instance; + } + + /** + * Method to initialize the Cassandra database. + * @returns Promise that resolves when the database has been initialized. + */ + private async initialize(): Promise { + let cql = ""; + cql = `CREATE TABLE IF NOT EXISTS ${this.keyspace}.${this.table} ( + ${this.primaryKey.map((col) => `${col.name} ${col.type}`).join(", ")} + , text TEXT + ${ + this.metadataColumns.length > 0 + ? ", " + + this.metadataColumns + .map((col) => `${col.name} ${col.type}`) + .join(", ") + : "" + } + , vector VECTOR + , ${this.buildPrimaryKey(this.primaryKey)} + ) ${this.withClause ? `WITH ${this.withClause}` : ""};`; + + await this.client.execute(cql); + + this.selectColumns = `${this.primaryKey + .map((col) => `${col.name}`) + .join(", ")} + ${ + this.metadataColumns.length > 0 + ? ", " + + this.metadataColumns + .map((col) => `${col.name}`) + .join(", ") + : "" + }`; + + cql = `CREATE CUSTOM INDEX IF NOT EXISTS idx_vector_${this.table} + ON ${this.keyspace}.${ + this.table + }(vector) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function': '${this.vectorType.toUpperCase()}'};`; + await this.client.execute(cql); + + for await (const { name, value } of this.indices) { + cql = `CREATE CUSTOM INDEX IF NOT EXISTS idx_${this.table}_${name} + ON ${this.keyspace}.${this.table} ${value} USING 'StorageAttachedIndex';`; + await this.client.execute(cql); + } + this.isInitialized = true; + } + + /** + * Method to build the PRIMARY KEY clause for CREATE TABLE. + * @param columns: list of Column to include in the key + * @returns The clause, including PRIMARY KEY + */ + private buildPrimaryKey(columns: Column[]): string { + // Partition columns may be specified with optional attribute col.partition + const partitionColumns = columns + .filter((col) => col.partition) + .map((col) => col.name) + .join(", "); + + // All columns not part of the partition key are clustering columns + const clusteringColumns = columns + .filter((col) => !col.partition) + .map((col) => col.name) + .join(", "); + + let primaryKey = ""; + + // If partition columns are specified, they are included in a () wrapper + // If not, the clustering columns are used, and the first clustering column + // is the partition key per normal Cassandra behaviour. + if (partitionColumns) { + primaryKey = `PRIMARY KEY ((${partitionColumns}), ${clusteringColumns})`; + } else { + primaryKey = `PRIMARY KEY (${clusteringColumns})`; + } + + return primaryKey; + } + + /** + * Type guard to check if an object is a Filter. + * @param obj: the object to check + * @returns boolean indicating if the object is a Filter + */ + private isFilter(obj: unknown): obj is Filter { + return ( + typeof obj === "object" && obj !== null && "name" in obj && "value" in obj + ); + } + + /** + * Helper to convert Record to a Filter[] + * @param record: a key-value Record collection + * @returns Record as a Filter[] + */ + private convertToFilters(record: Record): Filter[] { + return Object.entries(record).map(([name, value]) => ({ + name, + value, + operator: "=", + })); + } + + /** + * Input santisation method for filters, as FilterType is not required to be + * Filter[], but we want to use Filter[] internally. + * @param record: the proposed filter + * @returns A Filter[], which may be empty + */ + private asFilters(record: WhereClause | undefined): Filter[] { + if (!record) { + return []; + } + + // If record is already an array + if (Array.isArray(record)) { + return record.flatMap((item) => { + // Check if item is a Filter before passing it to convertToFilters + if (this.isFilter(item)) { + return [item]; + } else { + // Here item is treated as Record + return this.convertToFilters(item); + } + }); + } + + // If record is a single Filter object, return it in an array + if (this.isFilter(record)) { + return [record]; + } + + // If record is a Record, convert it to an array of Filter + return this.convertToFilters(record); + } + + /** + * Method to build the WHERE clause of a CQL query, using bind variable ? + * @param filters list of filters to include in the WHERE clause + * @returns The WHERE clause + */ + private buildWhereClause(filters?: Filter[]): string { + if (!filters || filters.length === 0) { + return ""; + } + + const whereConditions = filters.map( + ({ name, operator = "=" }) => `${name} ${operator} ?` + ); + + return `WHERE ${whereConditions.join(" AND ")}`; + } + + /** + * Method to build an CQL query for searching for similar vectors in the + * Cassandra database. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filters + * @returns The CQL query string. + */ + private buildSearchQuery(filters: Filter[]): string { + const whereClause = filters ? this.buildWhereClause(filters) : ""; + + const cqlQuery = `SELECT ${this.selectColumns}, text, similarity_${this.vectorType}(vector, ?) AS similarity_score + FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF ? LIMIT ?`; + + return cqlQuery; + } + + /** + * Method for inserting vectors and documents into the Cassandra database in a batch. + * @param batchVectors The list of vectors to insert. + * @param batchDocuments The list of documents to insert. + * @returns Promise that resolves when the batch has been inserted. + */ + private async executeInsert( + batchVectors: number[][], + batchDocuments: Document[] + ): Promise { + // Input validation: Check if the lengths of batchVectors and batchDocuments are the same + if (batchVectors.length !== batchDocuments.length) { + throw new Error( + `The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.` + ); + } + + // Initialize an array to hold query objects + const queries = []; + + // Loop through each vector and document in the batch + for (let i = 0; i < batchVectors.length; i += 1) { + // Convert the list of numbers to a Float32Array, the driver's expected format of a vector + const preparedVector = new Float32Array(batchVectors[i]); + // Retrieve the corresponding document + const document = batchDocuments[i]; + + // Extract metadata column names and values from the document + const metadataColNames = Object.keys(document.metadata); + const metadataVals = Object.values(document.metadata); + + // Prepare the metadata columns string for the query, if metadata exists + const metadataInsert = + metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : ""; + + // Construct the query string and parameters + const query = { + query: `INSERT INTO ${this.keyspace}.${ + this.table + } (vector, text${metadataInsert}) + VALUES (?, ?${", ?".repeat(metadataColNames.length)})`, + params: [preparedVector, document.pageContent, ...metadataVals], + }; + + // Add the query to the list + queries.push(query); + } + + // Execute the queries: use a batch if multiple, otherwise execute a single query + if (queries.length === 1) { + await this.client.execute(queries[0].query, queries[0].params, { + prepare: true, + }); + } else { + await this.client.batch(queries, { prepare: true, logged: false }); + } + } + + /** + * Method for inserting vectors and documents into the Cassandra database in + * parallel, keeping within maxConcurrency number of active insert statements. + * @param vectors The vectors to insert. + * @param documents The documents to insert. + * @returns Promise that resolves when the documents have been added. + */ + private async insertAll( + vectors: number[][], + documents: Document[] + ): Promise { + // Input validation: Check if the lengths of vectors and documents are the same + if (vectors.length !== documents.length) { + throw new Error( + `The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.` + ); + } + + // Early exit: If there are no vectors or documents to insert, return immediately + if (vectors.length === 0) { + return; + } + + // Ensure the store is initialized before proceeding + if (!this.isInitialized) { + await this.initialize(); + } + + // Initialize an array to hold promises for each batch insert + const insertPromises: Promise[] = []; + + // Buffers to hold the current batch of vectors and documents + let currentBatchVectors: number[][] = []; + let currentBatchDocuments: Document[] = []; + + // Loop through each vector/document pair to insert; we use + // <= vectors.length to ensure the last batch is inserted + for (let i = 0; i <= vectors.length; i += 1) { + // Check if we're still within the array boundaries + if (i < vectors.length) { + // Add the current vector and document to the batch + currentBatchVectors.push(vectors[i]); + currentBatchDocuments.push(documents[i]); + } + + // Check if we've reached the batch size or end of the array + if ( + currentBatchVectors.length >= this.batchSize || + i === vectors.length + ) { + // Only proceed if there are items in the current batch + if (currentBatchVectors.length > 0) { + // Create copies of the current batch arrays to use in the async insert operation + const batchVectors = [...currentBatchVectors]; + const batchDocuments = [...currentBatchDocuments]; + + // Execute the insert using the AsyncCaller - it will handle concurrency and queueing. + insertPromises.push( + this.asyncCaller.call(() => + this.executeInsert(batchVectors, batchDocuments) + ) + ); + + // Clear the current buffers for the next iteration + currentBatchVectors = []; + currentBatchDocuments = []; + } + } + } + + // Wait for all insert operations to complete. + await Promise.all(insertPromises); + } +} diff --git a/libs/langchain-community/src/vectorstores/chroma.ts b/libs/langchain-community/src/vectorstores/chroma.ts new file mode 100644 index 000000000000..96e6b15475dc --- /dev/null +++ b/libs/langchain-community/src/vectorstores/chroma.ts @@ -0,0 +1,364 @@ +import * as uuid from "uuid"; +import type { ChromaClient as ChromaClientT, Collection } from "chromadb"; +import type { CollectionMetadata, Where } from "chromadb/dist/main/types.js"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; + +/** + * Defines the arguments that can be passed to the `Chroma` class + * constructor. It can either contain a `url` for the Chroma database, the + * number of dimensions for the vectors (`numDimensions`), a + * `collectionName` for the collection to be used in the database, and a + * `filter` object; or it can contain an `index` which is an instance of + * `ChromaClientT`, along with the `numDimensions`, `collectionName`, and + * `filter`. + */ +export type ChromaLibArgs = + | { + url?: string; + numDimensions?: number; + collectionName?: string; + filter?: object; + collectionMetadata?: CollectionMetadata; + } + | { + index?: ChromaClientT; + numDimensions?: number; + collectionName?: string; + filter?: object; + collectionMetadata?: CollectionMetadata; + }; + +/** + * Defines the parameters for the `delete` method in the `Chroma` class. + * It can either contain an array of `ids` of the documents to be deleted + * or a `filter` object to specify the documents to be deleted. + */ +export interface ChromaDeleteParams { + ids?: string[]; + filter?: T; +} + +/** + * The main class that extends the `VectorStore` class. It provides + * methods for interacting with the Chroma database, such as adding + * documents, deleting documents, and searching for similar vectors. + */ +export class Chroma extends VectorStore { + declare FilterType: Where; + + index?: ChromaClientT; + + collection?: Collection; + + collectionName: string; + + collectionMetadata?: CollectionMetadata; + + numDimensions?: number; + + url: string; + + filter?: object; + + _vectorstoreType(): string { + return "chroma"; + } + + constructor(embeddings: Embeddings, args: ChromaLibArgs) { + super(embeddings, args); + this.numDimensions = args.numDimensions; + this.embeddings = embeddings; + this.collectionName = ensureCollectionName(args.collectionName); + this.collectionMetadata = args.collectionMetadata; + if ("index" in args) { + this.index = args.index; + } else if ("url" in args) { + this.url = args.url || "http://localhost:8000"; + } + + this.filter = args.filter; + } + + /** + * Adds documents to the Chroma database. The documents are first + * converted to vectors using the `embeddings` instance, and then added to + * the database. + * @param documents An array of `Document` instances to be added to the database. + * @param options Optional. An object containing an array of `ids` for the documents. + * @returns A promise that resolves when the documents have been added to the database. + */ + async addDocuments(documents: Document[], options?: { ids?: string[] }) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Ensures that a collection exists in the Chroma database. If the + * collection does not exist, it is created. + * @returns A promise that resolves with the `Collection` instance. + */ + async ensureCollection(): Promise { + if (!this.collection) { + if (!this.index) { + const { ChromaClient } = await Chroma.imports(); + this.index = new ChromaClient({ path: this.url }); + } + try { + this.collection = await this.index.getOrCreateCollection({ + name: this.collectionName, + ...(this.collectionMetadata && { metadata: this.collectionMetadata }), + }); + } catch (err) { + throw new Error(`Chroma getOrCreateCollection error: ${err}`); + } + } + + return this.collection; + } + + /** + * Adds vectors to the Chroma database. The vectors are associated with + * the provided documents. + * @param vectors An array of vectors to be added to the database. + * @param documents An array of `Document` instances associated with the vectors. + * @param options Optional. An object containing an array of `ids` for the vectors. + * @returns A promise that resolves with an array of document IDs when the vectors have been added to the database. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { + if (vectors.length === 0) { + return []; + } + if (this.numDimensions === undefined) { + this.numDimensions = vectors[0].length; + } + if (vectors.length !== documents.length) { + throw new Error(`Vectors and metadatas must have the same length`); + } + if (vectors[0].length !== this.numDimensions) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${this.numDimensions})` + ); + } + + const documentIds = + options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1()); + const collection = await this.ensureCollection(); + + const mappedMetadatas = documents.map(({ metadata }) => { + let locFrom; + let locTo; + + if (metadata?.loc) { + if (metadata.loc.lines?.from !== undefined) + locFrom = metadata.loc.lines.from; + if (metadata.loc.lines?.to !== undefined) locTo = metadata.loc.lines.to; + } + + const newMetadata: Document["metadata"] = { + ...metadata, + ...(locFrom !== undefined && { locFrom }), + ...(locTo !== undefined && { locTo }), + }; + + if (newMetadata.loc) delete newMetadata.loc; + + return newMetadata; + }); + + await collection.upsert({ + ids: documentIds, + embeddings: vectors, + metadatas: mappedMetadatas, + documents: documents.map(({ pageContent }) => pageContent), + }); + return documentIds; + } + + /** + * Deletes documents from the Chroma database. The documents to be deleted + * can be specified by providing an array of `ids` or a `filter` object. + * @param params An object containing either an array of `ids` of the documents to be deleted or a `filter` object to specify the documents to be deleted. + * @returns A promise that resolves when the specified documents have been deleted from the database. + */ + async delete(params: ChromaDeleteParams): Promise { + const collection = await this.ensureCollection(); + if (Array.isArray(params.ids)) { + await collection.delete({ ids: params.ids }); + } else if (params.filter) { + await collection.delete({ + where: { ...params.filter }, + }); + } else { + throw new Error(`You must provide one of "ids or "filter".`); + } + } + + /** + * Searches for vectors in the Chroma database that are similar to the + * provided query vector. The search can be filtered using the provided + * `filter` object or the `filter` property of the `Chroma` instance. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter Optional. A `filter` object to filter the search results. + * @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ) { + if (filter && this.filter) { + throw new Error("cannot provide both `filter` and `this.filter`"); + } + const _filter = filter ?? this.filter; + + const collection = await this.ensureCollection(); + + // similaritySearchVectorWithScore supports one query vector at a time + // chroma supports multiple query vectors at a time + const result = await collection.query({ + queryEmbeddings: query, + nResults: k, + where: { ..._filter }, + }); + + const { ids, distances, documents, metadatas } = result; + if (!ids || !distances || !documents || !metadatas) { + return []; + } + // get the result data from the first and only query vector + const [firstIds] = ids; + const [firstDistances] = distances; + const [firstDocuments] = documents; + const [firstMetadatas] = metadatas; + + const results: [Document, number][] = []; + for (let i = 0; i < firstIds.length; i += 1) { + let metadata: Document["metadata"] = firstMetadatas?.[i] ?? {}; + + if (metadata.locFrom && metadata.locTo) { + metadata = { + ...metadata, + loc: { + lines: { + from: metadata.locFrom, + to: metadata.locTo, + }, + }, + }; + + delete metadata.locFrom; + delete metadata.locTo; + } + + results.push([ + new Document({ + pageContent: firstDocuments?.[i] ?? "", + metadata, + }), + firstDistances[i], + ]); + } + return results; + } + + /** + * Creates a new `Chroma` instance from an array of text strings. The text + * strings are converted to `Document` instances and added to the Chroma + * database. + * @param texts An array of text strings. + * @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array. + * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. + * @returns A promise that resolves with a new `Chroma` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: ChromaLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return this.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new `Chroma` instance from an array of `Document` instances. + * The documents are added to the Chroma database. + * @param docs An array of `Document` instances. + * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. + * @returns A promise that resolves with a new `Chroma` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: ChromaLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Creates a new `Chroma` instance from an existing collection in the + * Chroma database. + * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. + * @returns A promise that resolves with a new `Chroma` instance. + */ + static async fromExistingCollection( + embeddings: Embeddings, + dbConfig: ChromaLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.ensureCollection(); + return instance; + } + + /** + * Imports the `ChromaClient` from the `chromadb` module. + * @returns A promise that resolves with an object containing the `ChromaClient` constructor. + */ + static async imports(): Promise<{ + ChromaClient: typeof ChromaClientT; + }> { + try { + const { ChromaClient } = await import("chromadb"); + return { ChromaClient }; + } catch (e) { + throw new Error( + "Please install chromadb as a dependency with, e.g. `npm install -S chromadb`" + ); + } + } +} + +/** + * Generates a unique collection name if none is provided. + */ +function ensureCollectionName(collectionName?: string) { + if (!collectionName) { + return `langchain-${uuid.v4()}`; + } + return collectionName; +} diff --git a/libs/langchain-community/src/vectorstores/clickhouse.ts b/libs/langchain-community/src/vectorstores/clickhouse.ts new file mode 100644 index 000000000000..a74a05d8a49d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/clickhouse.ts @@ -0,0 +1,338 @@ +import * as uuid from "uuid"; +import { ClickHouseClient, createClient } from "@clickhouse/client"; +import { format } from "mysql2"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Arguments for the ClickHouseStore class, which include the host, port, + * protocol, username, password, index type, index parameters, + * index query params, column map, database, table. + */ +export interface ClickHouseLibArgs { + host: string; + port: string | number; + protocol?: string; + username: string; + password: string; + indexType?: string; + indexParam?: Record; + indexQueryParams?: Record; + columnMap?: ColumnMap; + database?: string; + table?: string; +} + +/** + * Mapping of columns in the ClickHouse database. + */ +export interface ColumnMap { + id: string; + uuid: string; + document: string; + embedding: string; + metadata: string; +} + +/** + * Type for filtering search results in the ClickHouse database. + */ +export interface ClickHouseFilter { + whereStr: string; +} + +/** + * Class for interacting with the ClickHouse database. It extends the + * VectorStore class and provides methods for adding vectors and + * documents, searching for similar vectors, and creating instances from + * texts or documents. + */ +export class ClickHouseStore extends VectorStore { + declare FilterType: ClickHouseFilter; + + private client: ClickHouseClient; + + private indexType: string; + + private indexParam: Record; + + private indexQueryParams: Record; + + private columnMap: ColumnMap; + + private database: string; + + private table: string; + + private isInitialized = false; + + _vectorstoreType(): string { + return "clickhouse"; + } + + constructor(embeddings: Embeddings, args: ClickHouseLibArgs) { + super(embeddings, args); + + this.indexType = args.indexType || "annoy"; + this.indexParam = args.indexParam || { L2Distance: 100 }; + this.indexQueryParams = args.indexQueryParams || {}; + this.columnMap = args.columnMap || { + id: "id", + document: "document", + embedding: "embedding", + metadata: "metadata", + uuid: "uuid", + }; + this.database = args.database || "default"; + this.table = args.table || "vector_table"; + + this.client = createClient({ + host: `${args.protocol ?? "https://"}${args.host}:${args.port}`, + username: args.username, + password: args.password, + session_id: uuid.v4(), + }); + } + + /** + * Method to add vectors to the ClickHouse database. + * @param vectors The vectors to add. + * @param documents The documents associated with the vectors. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + + if (!this.isInitialized) { + await this.initialize(vectors[0].length); + } + + const queryStr = this.buildInsertQuery(vectors, documents); + await this.client.exec({ query: queryStr }); + } + + /** + * Method to add documents to the ClickHouse database. + * @param documents The documents to add. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + return this.addVectors( + await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), + documents + ); + } + + /** + * Method to search for vectors that are similar to a given query vector. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter Optional filter for the search results. + * @returns Promise that resolves with an array of tuples, each containing a Document and a score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + if (!this.isInitialized) { + await this.initialize(query.length); + } + const queryStr = this.buildSearchQuery(query, k, filter); + + const queryResultSet = await this.client.query({ query: queryStr }); + + const queryResult: { + data: { document: string; metadata: object; dist: number }[]; + } = await queryResultSet.json(); + + const result: [Document, number][] = queryResult.data.map((item) => [ + new Document({ pageContent: item.document, metadata: item.metadata }), + item.dist, + ]); + + return result; + } + + /** + * Static method to create an instance of ClickHouseStore from texts. + * @param texts The texts to use. + * @param metadatas The metadata associated with the texts. + * @param embeddings The embeddings to use. + * @param args The arguments for the ClickHouseStore. + * @returns Promise that resolves with a new instance of ClickHouseStore. + */ + static async fromTexts( + texts: string[], + metadatas: object | object[], + embeddings: Embeddings, + args: ClickHouseLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return ClickHouseStore.fromDocuments(docs, embeddings, args); + } + + /** + * Static method to create an instance of ClickHouseStore from documents. + * @param docs The documents to use. + * @param embeddings The embeddings to use. + * @param args The arguments for the ClickHouseStore. + * @returns Promise that resolves with a new instance of ClickHouseStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args: ClickHouseLibArgs + ): Promise { + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create an instance of ClickHouseStore from an existing + * index. + * @param embeddings The embeddings to use. + * @param args The arguments for the ClickHouseStore. + * @returns Promise that resolves with a new instance of ClickHouseStore. + */ + static async fromExistingIndex( + embeddings: Embeddings, + args: ClickHouseLibArgs + ): Promise { + const instance = new this(embeddings, args); + + await instance.initialize(); + return instance; + } + + /** + * Method to initialize the ClickHouse database. + * @param dimension Optional dimension of the vectors. + * @returns Promise that resolves when the database has been initialized. + */ + private async initialize(dimension?: number): Promise { + const dim = dimension ?? (await this.embeddings.embedQuery("test")).length; + + const indexParamStr = this.indexParam + ? Object.entries(this.indexParam) + .map(([key, value]) => `'${key}', ${value}`) + .join(", ") + : ""; + + const query = ` + CREATE TABLE IF NOT EXISTS ${this.database}.${this.table}( + ${this.columnMap.id} Nullable(String), + ${this.columnMap.document} Nullable(String), + ${this.columnMap.embedding} Array(Float32), + ${this.columnMap.metadata} JSON, + ${this.columnMap.uuid} UUID DEFAULT generateUUIDv4(), + CONSTRAINT cons_vec_len CHECK length(${this.columnMap.embedding}) = ${dim}, + INDEX vec_idx ${this.columnMap.embedding} TYPE ${this.indexType}(${indexParamStr}) GRANULARITY 1000 + ) ENGINE = MergeTree ORDER BY ${this.columnMap.uuid} SETTINGS index_granularity = 8192;`; + + await this.client.exec({ + query, + clickhouse_settings: { + allow_experimental_object_type: 1, + allow_experimental_annoy_index: 1, + }, + }); + this.isInitialized = true; + } + + /** + * Method to build an SQL query for inserting vectors and documents into + * the ClickHouse database. + * @param vectors The vectors to insert. + * @param documents The documents to insert. + * @returns The SQL query string. + */ + private buildInsertQuery(vectors: number[][], documents: Document[]): string { + const columnsStr = Object.values( + Object.fromEntries( + Object.entries(this.columnMap).filter( + ([key]) => key !== this.columnMap.uuid + ) + ) + ).join(", "); + + const placeholders = vectors.map(() => "(?, ?, ?, ?)").join(", "); + const values = []; + + for (let i = 0; i < vectors.length; i += 1) { + const vector = vectors[i]; + const document = documents[i]; + values.push( + uuid.v4(), + this.escapeString(document.pageContent), + JSON.stringify(vector), + JSON.stringify(document.metadata) + ); + } + + const insertQueryStr = ` + INSERT INTO TABLE ${this.database}.${this.table}(${columnsStr}) + VALUES ${placeholders} + `; + + const insertQuery = format(insertQueryStr, values); + return insertQuery; + } + + private escapeString(str: string): string { + return str.replace(/\\/g, "\\\\").replace(/'/g, "\\'"); + } + + /** + * Method to build an SQL query for searching for similar vectors in the + * ClickHouse database. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter Optional filter for the search results. + * @returns The SQL query string. + */ + private buildSearchQuery( + query: number[], + k: number, + filter?: ClickHouseFilter + ): string { + const order = "ASC"; + const whereStr = filter ? `PREWHERE ${filter.whereStr}` : ""; + const placeholders = query.map(() => "?").join(", "); + + const settingStrings: string[] = []; + if (this.indexQueryParams) { + for (const [key, value] of Object.entries(this.indexQueryParams)) { + settingStrings.push(`SETTING ${key}=${value}`); + } + } + + const searchQueryStr = ` + SELECT ${this.columnMap.document} AS document, ${ + this.columnMap.metadata + } AS metadata, dist + FROM ${this.database}.${this.table} + ${whereStr} + ORDER BY L2Distance(${ + this.columnMap.embedding + }, [${placeholders}]) AS dist ${order} + LIMIT ${k} ${settingStrings.join(" ")} + `; + + // Format the query with actual values + const searchQuery = format(searchQueryStr, query); + return searchQuery; + } +} diff --git a/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts new file mode 100644 index 000000000000..66da3188608e --- /dev/null +++ b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts @@ -0,0 +1,227 @@ +import * as uuid from "uuid"; + +import { + VectorizeIndex, + VectorizeVectorMetadata, +} from "@cloudflare/workers-types"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { AsyncCaller, type AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { chunkArray } from "../util/chunk.js"; + +export interface VectorizeLibArgs extends AsyncCallerParams { + index: VectorizeIndex; + textKey?: string; +} + +/** + * Type that defines the parameters for the delete operation in the + * CloudflareVectorizeStore class. It includes ids, deleteAll flag, and namespace. + */ +export type VectorizeDeleteParams = { + ids: string[]; +}; + +/** + * Class that extends the VectorStore class and provides methods to + * interact with the Cloudflare Vectorize vector database. + */ +export class CloudflareVectorizeStore extends VectorStore { + textKey: string; + + namespace?: string; + + index: VectorizeIndex; + + caller: AsyncCaller; + + _vectorstoreType(): string { + return "cloudflare_vectorize"; + } + + constructor(embeddings: Embeddings, args: VectorizeLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + const { index, textKey, ...asyncCallerArgs } = args; + if (!index) { + throw new Error( + "Must supply a Vectorize index binding, eg { index: env.VECTORIZE }" + ); + } + this.index = index; + this.textKey = textKey ?? "text"; + this.caller = new AsyncCaller({ + maxConcurrency: 6, + maxRetries: 0, + ...asyncCallerArgs, + }); + } + + /** + * Method that adds documents to the Vectorize database. + * @param documents Array of documents to add. + * @param options Optional ids for the documents. + * @returns Promise that resolves with the ids of the added documents. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] } | string[] + ) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method that adds vectors to the Vectorize database. + * @param vectors Array of vectors to add. + * @param documents Array of documents associated with the vectors. + * @param options Optional ids for the vectors. + * @returns Promise that resolves with the ids of the added vectors. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } | string[] + ) { + const ids = Array.isArray(options) ? options : options?.ids; + const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; + const vectorizeVectors = vectors.map((values, idx) => { + const metadata: Record = { + ...documents[idx].metadata, + [this.textKey]: documents[idx].pageContent, + }; + return { + id: documentIds[idx], + metadata, + values, + }; + }); + + // Stick to a limit of 500 vectors per upsert request + const chunkSize = 500; + const chunkedVectors = chunkArray(vectorizeVectors, chunkSize); + const batchRequests = chunkedVectors.map((chunk) => + this.caller.call(async () => this.index.upsert(chunk)) + ); + + await Promise.all(batchRequests); + + return documentIds; + } + + /** + * Method that deletes vectors from the Vectorize database. + * @param params Parameters for the delete operation. + * @returns Promise that resolves when the delete operation is complete. + */ + async delete(params: VectorizeDeleteParams): Promise { + const batchSize = 1000; + const batchedIds = chunkArray(params.ids, batchSize); + const batchRequests = batchedIds.map((batchIds) => + this.caller.call(async () => this.index.deleteByIds(batchIds)) + ); + await Promise.all(batchRequests); + } + + /** + * Method that performs a similarity search in the Vectorize database and + * returns the results along with their scores. + * @param query Query vector for the similarity search. + * @param k Number of top results to return. + * @returns Promise that resolves with an array of documents and their scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number + ): Promise<[Document, number][]> { + const results = await this.index.query(query, { + returnVectors: true, + topK: k, + }); + + const result: [Document, number][] = []; + + if (results.matches) { + for (const res of results.matches) { + const { [this.textKey]: pageContent, ...metadata } = + res.vector?.metadata ?? {}; + result.push([ + new Document({ metadata, pageContent: pageContent as string }), + res.score, + ]); + } + } + + return result; + } + + /** + * Static method that creates a new instance of the CloudflareVectorizeStore class + * from texts. + * @param texts Array of texts to add to the Vectorize database. + * @param metadatas Metadata associated with the texts. + * @param embeddings Embeddings to use for the texts. + * @param dbConfig Configuration for the Vectorize database. + * @param options Optional ids for the vectors. + * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. + */ + static async fromTexts( + texts: string[], + metadatas: + | Record[] + | Record, + embeddings: Embeddings, + dbConfig: VectorizeLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return CloudflareVectorizeStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method that creates a new instance of the CloudflareVectorizeStore class + * from documents. + * @param docs Array of documents to add to the Vectorize database. + * @param embeddings Embeddings to use for the documents. + * @param dbConfig Configuration for the Vectorize database. + * @param options Optional ids for the vectors. + * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: VectorizeLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method that creates a new instance of the CloudflareVectorizeStore class + * from an existing index. + * @param embeddings Embeddings to use for the documents. + * @param dbConfig Configuration for the Vectorize database. + * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: VectorizeLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/convex.ts b/libs/langchain-community/src/vectorstores/convex.ts new file mode 100644 index 000000000000..0e36b00d52d8 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/convex.ts @@ -0,0 +1,376 @@ +// eslint-disable-next-line import/no-extraneous-dependencies +import { + DocumentByInfo, + FieldPaths, + FilterExpression, + FunctionReference, + GenericActionCtx, + GenericDataModel, + GenericTableInfo, + NamedTableInfo, + NamedVectorIndex, + TableNamesInDataModel, + VectorFilterBuilder, + VectorIndexNames, + makeFunctionReference, +} from "convex/server"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Type that defines the config required to initialize the + * ConvexVectorStore class. It includes the table name, + * index name, text field name, and embedding field name. + */ +export type ConvexVectorStoreConfig< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + TextFieldName extends FieldPaths>, + EmbeddingFieldName extends FieldPaths>, + MetadataFieldName extends FieldPaths>, + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + GetQuery extends FunctionReference< + "query", + "internal", + { id: string }, + object | null + > +> = { + readonly ctx: GenericActionCtx; + /** + * Defaults to "documents" + */ + readonly table?: TableName; + /** + * Defaults to "byEmbedding" + */ + readonly index?: IndexName; + /** + * Defaults to "text" + */ + readonly textField?: TextFieldName; + /** + * Defaults to "embedding" + */ + readonly embeddingField?: EmbeddingFieldName; + /** + * Defaults to "metadata" + */ + readonly metadataField?: MetadataFieldName; + /** + * Defaults to `internal.langchain.db.insert` + */ + readonly insert?: InsertMutation; + /** + * Defaults to `internal.langchain.db.get` + */ + readonly get?: GetQuery; +}; + +/** + * Class that is a wrapper around Convex storage and vector search. It is used + * to insert embeddings in Convex documents with a vector search index, + * and perform a vector search on them. + * + * ConvexVectorStore does NOT implement maxMarginalRelevanceSearch. + */ +export class ConvexVectorStore< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + TextFieldName extends FieldPaths>, + EmbeddingFieldName extends FieldPaths>, + MetadataFieldName extends FieldPaths>, + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + GetQuery extends FunctionReference< + "query", + "internal", + { id: string }, + object | null + > +> extends VectorStore { + /** + * Type that defines the filter used in the + * similaritySearchVectorWithScore and maxMarginalRelevanceSearch methods. + * It includes limit, filter and a flag to include embeddings. + */ + declare FilterType: { + filter?: ( + q: VectorFilterBuilder< + DocumentByInfo, + NamedVectorIndex, IndexName> + > + ) => FilterExpression; + includeEmbeddings?: boolean; + }; + + private readonly ctx: GenericActionCtx; + + private readonly table: TableName; + + private readonly index: IndexName; + + private readonly textField: TextFieldName; + + private readonly embeddingField: EmbeddingFieldName; + + private readonly metadataField: MetadataFieldName; + + private readonly insert: InsertMutation; + + private readonly get: GetQuery; + + _vectorstoreType(): string { + return "convex"; + } + + constructor( + embeddings: Embeddings, + config: ConvexVectorStoreConfig< + DataModel, + TableName, + IndexName, + TextFieldName, + EmbeddingFieldName, + MetadataFieldName, + InsertMutation, + GetQuery + > + ) { + super(embeddings, config); + this.ctx = config.ctx; + this.table = config.table ?? ("documents" as TableName); + this.index = config.index ?? ("byEmbedding" as IndexName); + this.textField = config.textField ?? ("text" as TextFieldName); + this.embeddingField = + config.embeddingField ?? ("embedding" as EmbeddingFieldName); + this.metadataField = + config.metadataField ?? ("metadata" as MetadataFieldName); + this.insert = + // eslint-disable-next-line @typescript-eslint/no-explicit-any + config.insert ?? (makeFunctionReference("langchain/db:insert") as any); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + this.get = config.get ?? (makeFunctionReference("langchain/db:get") as any); + } + + /** + * Add vectors and their corresponding documents to the Convex table. + * @param vectors Vectors to be added. + * @param documents Corresponding documents to be added. + * @returns Promise that resolves when the vectors and documents have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const convexDocuments = vectors.map((embedding, idx) => ({ + [this.textField]: documents[idx].pageContent, + [this.embeddingField]: embedding, + [this.metadataField]: documents[idx].metadata, + })); + // TODO: Remove chunking when Convex handles the concurrent requests correctly + const PAGE_SIZE = 16; + for (let i = 0; i < convexDocuments.length; i += PAGE_SIZE) { + await Promise.all( + convexDocuments.slice(i, i + PAGE_SIZE).map((document) => + this.ctx.runMutation(this.insert, { + table: this.table, + document, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any) + ) + ); + } + } + + /** + * Add documents to the Convex table. It first converts + * the documents to vectors using the embeddings and then calls the + * addVectors method. + * @param documents Documents to be added. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Similarity search on the vectors stored in the + * Convex table. It returns a list of documents and their + * corresponding similarity scores. + * @param query Query vector for the similarity search. + * @param k Number of nearest neighbors to return. + * @param filter Optional filter to be applied. + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const idsAndScores = await this.ctx.vectorSearch(this.table, this.index, { + vector: query, + limit: k, + filter: filter?.filter, + }); + + const documents = await Promise.all( + idsAndScores.map(({ _id }) => + // eslint-disable-next-line @typescript-eslint/no-explicit-any + this.ctx.runQuery(this.get, { id: _id } as any) + ) + ); + + return documents.map( + ( + { + [this.textField]: text, + [this.embeddingField]: embedding, + [this.metadataField]: metadata, + }, + idx + ) => [ + new Document({ + pageContent: text as string, + metadata: { + ...metadata, + ...(filter?.includeEmbeddings ? { embedding } : null), + }, + }), + idsAndScores[idx]._score, + ] + ); + } + + /** + * Static method to create an instance of ConvexVectorStore from a + * list of texts. It first converts the texts to vectors and then adds + * them to the Convex table. + * @param texts List of texts to be converted to vectors. + * @param metadatas Metadata for the texts. + * @param embeddings Embeddings to be used for conversion. + * @param dbConfig Database configuration for Convex. + * @returns Promise that resolves to a new instance of ConvexVectorStore. + */ + static async fromTexts< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + TextFieldName extends FieldPaths>, + EmbeddingFieldName extends FieldPaths>, + MetadataFieldName extends FieldPaths>, + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + GetQuery extends FunctionReference< + "query", + "internal", + { id: string }, + object | null + > + >( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: ConvexVectorStoreConfig< + DataModel, + TableName, + IndexName, + TextFieldName, + EmbeddingFieldName, + MetadataFieldName, + InsertMutation, + GetQuery + > + ): Promise< + ConvexVectorStore< + DataModel, + TableName, + IndexName, + TextFieldName, + EmbeddingFieldName, + MetadataFieldName, + InsertMutation, + GetQuery + > + > { + const docs = texts.map( + (text, i) => + new Document({ + pageContent: text, + metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas, + }) + ); + return ConvexVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create an instance of ConvexVectorStore from a + * list of documents. It first converts the documents to vectors and then + * adds them to the Convex table. + * @param docs List of documents to be converted to vectors. + * @param embeddings Embeddings to be used for conversion. + * @param dbConfig Database configuration for Convex. + * @returns Promise that resolves to a new instance of ConvexVectorStore. + */ + static async fromDocuments< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + TextFieldName extends FieldPaths>, + EmbeddingFieldName extends FieldPaths>, + MetadataFieldName extends FieldPaths>, + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + GetQuery extends FunctionReference< + "query", + "internal", + { id: string }, + object | null + > + >( + docs: Document[], + embeddings: Embeddings, + dbConfig: ConvexVectorStoreConfig< + DataModel, + TableName, + IndexName, + TextFieldName, + EmbeddingFieldName, + MetadataFieldName, + InsertMutation, + GetQuery + > + ): Promise< + ConvexVectorStore< + DataModel, + TableName, + IndexName, + TextFieldName, + EmbeddingFieldName, + MetadataFieldName, + InsertMutation, + GetQuery + > + > { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts new file mode 100644 index 000000000000..d8b10bedb03c --- /dev/null +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -0,0 +1,341 @@ +import * as uuid from "uuid"; +import { Client, estypes } from "@elastic/elasticsearch"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +/** + * Type representing the k-nearest neighbors (k-NN) engine used in + * Elasticsearch. + */ +type ElasticKnnEngine = "hnsw"; +/** + * Type representing the similarity measure used in Elasticsearch. + */ +type ElasticSimilarity = "l2_norm" | "dot_product" | "cosine"; + +/** + * Interface defining the options for vector search in Elasticsearch. + */ +interface VectorSearchOptions { + readonly engine?: ElasticKnnEngine; + readonly similarity?: ElasticSimilarity; + readonly m?: number; + readonly efConstruction?: number; + readonly candidates?: number; +} + +/** + * Interface defining the arguments required to create an Elasticsearch + * client. + */ +export interface ElasticClientArgs { + readonly client: Client; + readonly indexName?: string; + readonly vectorSearchOptions?: VectorSearchOptions; +} + +/** + * Type representing a filter object in Elasticsearch. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type ElasticFilter = object | { field: string; operator: string; value: any }[]; + +/** + * Class for interacting with an Elasticsearch database. It extends the + * VectorStore base class and provides methods for adding documents and + * vectors to the Elasticsearch database, performing similarity searches, + * deleting documents, and more. + */ +export class ElasticVectorSearch extends VectorStore { + declare FilterType: ElasticFilter; + + private readonly client: Client; + + private readonly indexName: string; + + private readonly engine: ElasticKnnEngine; + + private readonly similarity: ElasticSimilarity; + + private readonly efConstruction: number; + + private readonly m: number; + + private readonly candidates: number; + + _vectorstoreType(): string { + return "elasticsearch"; + } + + constructor(embeddings: Embeddings, args: ElasticClientArgs) { + super(embeddings, args); + + this.engine = args.vectorSearchOptions?.engine ?? "hnsw"; + this.similarity = args.vectorSearchOptions?.similarity ?? "l2_norm"; + this.m = args.vectorSearchOptions?.m ?? 16; + this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 100; + this.candidates = args.vectorSearchOptions?.candidates ?? 200; + + this.client = args.client.child({ + headers: { "user-agent": "langchain-js-vs/0.0.1" }, + }); + this.indexName = args.indexName ?? "documents"; + } + + /** + * Method to add documents to the Elasticsearch database. It first + * converts the documents to vectors using the embeddings, then adds the + * vectors to the database. + * @param documents The documents to add to the database. + * @param options Optional parameter that can contain the IDs for the documents. + * @returns A promise that resolves with the IDs of the added documents. + */ + async addDocuments(documents: Document[], options?: { ids?: string[] }) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method to add vectors to the Elasticsearch database. It ensures the + * index exists, then adds the vectors and their corresponding documents + * to the database. + * @param vectors The vectors to add to the database. + * @param documents The documents corresponding to the vectors. + * @param options Optional parameter that can contain the IDs for the documents. + * @returns A promise that resolves with the IDs of the added documents. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { + await this.ensureIndexExists( + vectors[0].length, + this.engine, + this.similarity, + this.efConstruction, + this.m + ); + const documentIds = + options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v4()); + const operations = vectors.flatMap((embedding, idx) => [ + { + index: { + _id: documentIds[idx], + _index: this.indexName, + }, + }, + { + embedding, + metadata: documents[idx].metadata, + text: documents[idx].pageContent, + }, + ]); + await this.client.bulk({ refresh: true, operations }); + return documentIds; + } + + /** + * Method to perform a similarity search in the Elasticsearch database + * using a vector. It returns the k most similar documents along with + * their similarity scores. + * @param query The query vector. + * @param k The number of most similar documents to return. + * @param filter Optional filter to apply to the search. + * @returns A promise that resolves with an array of tuples, where each tuple contains a Document and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: ElasticFilter + ): Promise<[Document, number][]> { + const result = await this.client.search({ + index: this.indexName, + size: k, + knn: { + field: "embedding", + query_vector: query, + filter: this.buildMetadataTerms(filter), + k, + num_candidates: this.candidates, + }, + }); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return result.hits.hits.map((hit: any) => [ + new Document({ + pageContent: hit._source.text, + metadata: hit._source.metadata, + }), + hit._score, + ]); + } + + /** + * Method to delete documents from the Elasticsearch database. + * @param params Object containing the IDs of the documents to delete. + * @returns A promise that resolves when the deletion is complete. + */ + async delete(params: { ids: string[] }): Promise { + const operations = params.ids.map((id) => ({ + delete: { + _id: id, + _index: this.indexName, + }, + })); + await this.client.bulk({ refresh: true, operations }); + } + + /** + * Static method to create an ElasticVectorSearch instance from texts. It + * creates Document instances from the texts and their corresponding + * metadata, then calls the fromDocuments method to create the + * ElasticVectorSearch instance. + * @param texts The texts to create the ElasticVectorSearch instance from. + * @param metadatas The metadata corresponding to the texts. + * @param embeddings The embeddings to use for the documents. + * @param args The arguments to create the Elasticsearch client. + * @returns A promise that resolves with the created ElasticVectorSearch instance. + */ + static fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + args: ElasticClientArgs + ): Promise { + const documents = texts.map((text, idx) => { + const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; + return new Document({ pageContent: text, metadata }); + }); + + return ElasticVectorSearch.fromDocuments(documents, embeddings, args); + } + + /** + * Static method to create an ElasticVectorSearch instance from Document + * instances. It adds the documents to the Elasticsearch database, then + * returns the ElasticVectorSearch instance. + * @param docs The Document instances to create the ElasticVectorSearch instance from. + * @param embeddings The embeddings to use for the documents. + * @param dbConfig The configuration for the Elasticsearch database. + * @returns A promise that resolves with the created ElasticVectorSearch instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: ElasticClientArgs + ): Promise { + const store = new ElasticVectorSearch(embeddings, dbConfig); + await store.addDocuments(docs).then(() => store); + return store; + } + + /** + * Static method to create an ElasticVectorSearch instance from an + * existing index in the Elasticsearch database. It checks if the index + * exists, then returns the ElasticVectorSearch instance if it does. + * @param embeddings The embeddings to use for the documents. + * @param dbConfig The configuration for the Elasticsearch database. + * @returns A promise that resolves with the created ElasticVectorSearch instance if the index exists, otherwise it throws an error. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: ElasticClientArgs + ): Promise { + const store = new ElasticVectorSearch(embeddings, dbConfig); + const exists = await store.doesIndexExist(); + if (exists) { + return store; + } + throw new Error(`The index ${store.indexName} does not exist.`); + } + + private async ensureIndexExists( + dimension: number, + engine = "hnsw", + similarity = "l2_norm", + efConstruction = 100, + m = 16 + ): Promise { + const request: estypes.IndicesCreateRequest = { + index: this.indexName, + mappings: { + dynamic_templates: [ + { + // map all metadata properties to be keyword + "metadata.*": { + match_mapping_type: "*", + mapping: { type: "keyword" }, + }, + }, + ], + properties: { + text: { type: "text" }, + metadata: { type: "object" }, + embedding: { + type: "dense_vector", + dims: dimension, + index: true, + similarity, + index_options: { + type: engine, + m, + ef_construction: efConstruction, + }, + }, + }, + }, + }; + + const indexExists = await this.doesIndexExist(); + if (indexExists) return; + + await this.client.indices.create(request); + } + + private buildMetadataTerms( + filter?: ElasticFilter + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ): { [operator: string]: { [field: string]: any } }[] { + if (filter == null) return []; + const result = []; + const filters = Array.isArray(filter) + ? filter + : Object.entries(filter).map(([key, value]) => ({ + operator: "term", + field: key, + value, + })); + for (const condition of filters) { + result.push({ + [condition.operator]: { + [`metadata.${condition.field}`]: condition.value, + }, + }); + } + return result; + } + + /** + * Method to check if an index exists in the Elasticsearch database. + * @returns A promise that resolves with a boolean indicating whether the index exists. + */ + async doesIndexExist(): Promise { + return await this.client.indices.exists({ index: this.indexName }); + } + + /** + * Method to delete an index from the Elasticsearch database if it exists. + * @returns A promise that resolves when the deletion is complete. + */ + async deleteIfExists(): Promise { + const indexExists = await this.doesIndexExist(); + if (!indexExists) return; + + await this.client.indices.delete({ index: this.indexName }); + } +} diff --git a/libs/langchain-community/src/vectorstores/lancedb.ts b/libs/langchain-community/src/vectorstores/lancedb.ts new file mode 100644 index 000000000000..0da70a879548 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/lancedb.ts @@ -0,0 +1,152 @@ +import { Table } from "vectordb"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Defines the arguments for the LanceDB class constructor. It includes a + * table and an optional textKey. + */ +export type LanceDBArgs = { + table: Table; + textKey?: string; +}; + +/** + * A wrapper for an open-source database for vector-search with persistent + * storage. It simplifies retrieval, filtering, and management of + * embeddings. + */ +export class LanceDB extends VectorStore { + private table: Table; + + private textKey: string; + + constructor(embeddings: Embeddings, args: LanceDBArgs) { + super(embeddings, args); + this.table = args.table; + this.embeddings = embeddings; + this.textKey = args.textKey || "text"; + } + + /** + * Adds documents to the database. + * @param documents The documents to be added. + * @returns A Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + _vectorstoreType(): string { + return "lancedb"; + } + + /** + * Adds vectors and their corresponding documents to the database. + * @param vectors The vectors to be added. + * @param documents The corresponding documents to be added. + * @returns A Promise that resolves when the vectors and documents have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + if (vectors.length !== documents.length) { + throw new Error(`Vectors and documents must have the same length`); + } + + const data: Array> = []; + for (let i = 0; i < documents.length; i += 1) { + const record = { + vector: vectors[i], + [this.textKey]: documents[i].pageContent, + }; + Object.keys(documents[i].metadata).forEach((metaKey) => { + record[metaKey] = documents[i].metadata[metaKey]; + }); + data.push(record); + } + await this.table.add(data); + } + + /** + * Performs a similarity search on the vectors in the database and returns + * the documents and their scores. + * @param query The query vector. + * @param k The number of results to return. + * @returns A Promise that resolves with an array of tuples, each containing a Document and its score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number + ): Promise<[Document, number][]> { + const results = await this.table.search(query).limit(k).execute(); + + const docsAndScore: [Document, number][] = []; + results.forEach((item) => { + const metadata: Record = {}; + Object.keys(item).forEach((key) => { + if (key !== "vector" && key !== "score" && key !== this.textKey) { + metadata[key] = item[key]; + } + }); + + docsAndScore.push([ + new Document({ + pageContent: item[this.textKey] as string, + metadata, + }), + item.score as number, + ]); + }); + return docsAndScore; + } + + /** + * Creates a new instance of LanceDB from texts. + * @param texts The texts to be converted into documents. + * @param metadatas The metadata for the texts. + * @param embeddings The embeddings to be managed. + * @param dbConfig The configuration for the LanceDB instance. + * @returns A Promise that resolves with a new instance of LanceDB. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: LanceDBArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return LanceDB.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new instance of LanceDB from documents. + * @param docs The documents to be added to the database. + * @param embeddings The embeddings to be managed. + * @param dbConfig The configuration for the LanceDB instance. + * @returns A Promise that resolves with a new instance of LanceDB. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: LanceDBArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/milvus.ts b/libs/langchain-community/src/vectorstores/milvus.ts new file mode 100644 index 000000000000..e1e33c39ed1a --- /dev/null +++ b/libs/langchain-community/src/vectorstores/milvus.ts @@ -0,0 +1,674 @@ +import * as uuid from "uuid"; +import { + MilvusClient, + DataType, + DataTypeMap, + ErrorCode, + FieldType, + ClientConfig, +} from "@zilliz/milvus2-sdk-node"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface for the arguments required by the Milvus class constructor. + */ +export interface MilvusLibArgs { + collectionName?: string; + primaryField?: string; + vectorField?: string; + textField?: string; + url?: string; // db address + ssl?: boolean; + username?: string; + password?: string; + textFieldMaxLength?: number; + clientConfig?: ClientConfig; + autoId?: boolean; +} + +/** + * Type representing the type of index used in the Milvus database. + */ +type IndexType = + | "IVF_FLAT" + | "IVF_SQ8" + | "IVF_PQ" + | "HNSW" + | "RHNSW_FLAT" + | "RHNSW_SQ" + | "RHNSW_PQ" + | "IVF_HNSW" + | "ANNOY"; + +/** + * Interface for the parameters required to create an index in the Milvus + * database. + */ +interface IndexParam { + params: { nprobe?: number; ef?: number; search_k?: number }; +} + +interface InsertRow { + [x: string]: string | number[]; +} + +const MILVUS_PRIMARY_FIELD_NAME = "langchain_primaryid"; +const MILVUS_VECTOR_FIELD_NAME = "langchain_vector"; +const MILVUS_TEXT_FIELD_NAME = "langchain_text"; +const MILVUS_COLLECTION_NAME_PREFIX = "langchain_col"; + +/** + * Class for interacting with a Milvus database. Extends the VectorStore + * class. + */ +export class Milvus extends VectorStore { + get lc_secrets(): { [key: string]: string } { + return { + ssl: "MILVUS_SSL", + username: "MILVUS_USERNAME", + password: "MILVUS_PASSWORD", + }; + } + + declare FilterType: string; + + collectionName: string; + + numDimensions?: number; + + autoId?: boolean; + + primaryField: string; + + vectorField: string; + + textField: string; + + textFieldMaxLength: number; + + fields: string[]; + + client: MilvusClient; + + indexParams: Record = { + IVF_FLAT: { params: { nprobe: 10 } }, + IVF_SQ8: { params: { nprobe: 10 } }, + IVF_PQ: { params: { nprobe: 10 } }, + HNSW: { params: { ef: 10 } }, + RHNSW_FLAT: { params: { ef: 10 } }, + RHNSW_SQ: { params: { ef: 10 } }, + RHNSW_PQ: { params: { ef: 10 } }, + IVF_HNSW: { params: { nprobe: 10, ef: 10 } }, + ANNOY: { params: { search_k: 10 } }, + }; + + indexCreateParams = { + index_type: "HNSW", + metric_type: "L2", + params: JSON.stringify({ M: 8, efConstruction: 64 }), + }; + + indexSearchParams = JSON.stringify({ ef: 64 }); + + _vectorstoreType(): string { + return "milvus"; + } + + constructor(embeddings: Embeddings, args: MilvusLibArgs) { + super(embeddings, args); + this.embeddings = embeddings; + this.collectionName = args.collectionName ?? genCollectionName(); + this.textField = args.textField ?? MILVUS_TEXT_FIELD_NAME; + + this.autoId = args.autoId ?? true; + this.primaryField = args.primaryField ?? MILVUS_PRIMARY_FIELD_NAME; + this.vectorField = args.vectorField ?? MILVUS_VECTOR_FIELD_NAME; + + this.textFieldMaxLength = args.textFieldMaxLength ?? 0; + + this.fields = []; + + const url = args.url ?? getEnvironmentVariable("MILVUS_URL"); + const { + address = "", + username = "", + password = "", + ssl, + } = args.clientConfig || {}; + + // combine args clientConfig and env variables + const clientConfig: ClientConfig = { + ...(args.clientConfig || {}), + address: url || address, + username: args.username || username, + password: args.password || password, + ssl: args.ssl || ssl, + }; + + if (!clientConfig.address) { + throw new Error("Milvus URL address is not provided."); + } + this.client = new MilvusClient(clientConfig); + } + + /** + * Adds documents to the Milvus database. + * @param documents Array of Document instances to be added to the database. + * @returns Promise resolving to void. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Adds vectors to the Milvus database. + * @param vectors Array of vectors to be added to the database. + * @param documents Array of Document instances associated with the vectors. + * @returns Promise resolving to void. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + await this.ensureCollection(vectors, documents); + + const insertDatas: InsertRow[] = []; + // eslint-disable-next-line no-plusplus + for (let index = 0; index < vectors.length; index++) { + const vec = vectors[index]; + const doc = documents[index]; + const data: InsertRow = { + [this.textField]: doc.pageContent, + [this.vectorField]: vec, + }; + this.fields.forEach((field) => { + switch (field) { + case this.primaryField: + if (!this.autoId) { + if (doc.metadata[this.primaryField] === undefined) { + throw new Error( + `The Collection's primaryField is configured with autoId=false, thus its value must be provided through metadata.` + ); + } + data[field] = doc.metadata[this.primaryField]; + } + break; + case this.textField: + data[field] = doc.pageContent; + break; + case this.vectorField: + data[field] = vec; + break; + default: // metadata fields + if (doc.metadata[field] === undefined) { + throw new Error( + `The field "${field}" is not provided in documents[${index}].metadata.` + ); + } else if (typeof doc.metadata[field] === "object") { + data[field] = JSON.stringify(doc.metadata[field]); + } else { + data[field] = doc.metadata[field]; + } + break; + } + }); + + insertDatas.push(data); + } + + const insertResp = await this.client.insert({ + collection_name: this.collectionName, + fields_data: insertDatas, + }); + if (insertResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error inserting data: ${JSON.stringify(insertResp)}`); + } + await this.client.flushSync({ collection_names: [this.collectionName] }); + } + + /** + * Searches for vectors in the Milvus database that are similar to a given + * vector. + * @param query Vector to compare with the vectors in the database. + * @param k Number of similar vectors to return. + * @param filter Optional filter to apply to the search. + * @returns Promise resolving to an array of tuples, each containing a Document instance and a similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: string + ): Promise<[Document, number][]> { + const hasColResp = await this.client.hasCollection({ + collection_name: this.collectionName, + }); + if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error checking collection: ${hasColResp}`); + } + if (hasColResp.value === false) { + throw new Error( + `Collection not found: ${this.collectionName}, please create collection before search.` + ); + } + + const filterStr = filter ?? ""; + + await this.grabCollectionFields(); + + const loadResp = await this.client.loadCollectionSync({ + collection_name: this.collectionName, + }); + if (loadResp.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error loading collection: ${loadResp}`); + } + + // clone this.field and remove vectorField + const outputFields = this.fields.filter( + (field) => field !== this.vectorField + ); + + const searchResp = await this.client.search({ + collection_name: this.collectionName, + search_params: { + anns_field: this.vectorField, + topk: k.toString(), + metric_type: this.indexCreateParams.metric_type, + params: this.indexSearchParams, + }, + output_fields: outputFields, + vector_type: DataType.FloatVector, + vectors: [query], + filter: filterStr, + }); + if (searchResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error searching data: ${JSON.stringify(searchResp)}`); + } + const results: [Document, number][] = []; + searchResp.results.forEach((result) => { + const fields = { + pageContent: "", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadata: {} as Record, + }; + Object.keys(result).forEach((key) => { + if (key === this.textField) { + fields.pageContent = result[key]; + } else if (this.fields.includes(key) || key === this.primaryField) { + if (typeof result[key] === "string") { + const { isJson, obj } = checkJsonString(result[key]); + fields.metadata[key] = isJson ? obj : result[key]; + } else { + fields.metadata[key] = result[key]; + } + } + }); + results.push([new Document(fields), result.score]); + }); + // console.log("Search result: " + JSON.stringify(results, null, 2)); + return results; + } + + /** + * Ensures that a collection exists in the Milvus database. + * @param vectors Optional array of vectors to be used if a new collection needs to be created. + * @param documents Optional array of Document instances to be used if a new collection needs to be created. + * @returns Promise resolving to void. + */ + async ensureCollection(vectors?: number[][], documents?: Document[]) { + const hasColResp = await this.client.hasCollection({ + collection_name: this.collectionName, + }); + if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error( + `Error checking collection: ${JSON.stringify(hasColResp, null, 2)}` + ); + } + + if (hasColResp.value === false) { + if (vectors === undefined || documents === undefined) { + throw new Error( + `Collection not found: ${this.collectionName}, please provide vectors and documents to create collection.` + ); + } + await this.createCollection(vectors, documents); + } else { + await this.grabCollectionFields(); + } + } + + /** + * Creates a collection in the Milvus database. + * @param vectors Array of vectors to be added to the new collection. + * @param documents Array of Document instances to be added to the new collection. + * @returns Promise resolving to void. + */ + async createCollection( + vectors: number[][], + documents: Document[] + ): Promise { + const fieldList: FieldType[] = []; + + fieldList.push(...createFieldTypeForMetadata(documents, this.primaryField)); + + fieldList.push( + { + name: this.primaryField, + description: "Primary key", + data_type: DataType.Int64, + is_primary_key: true, + autoID: this.autoId, + }, + { + name: this.textField, + description: "Text field", + data_type: DataType.VarChar, + type_params: { + max_length: + this.textFieldMaxLength > 0 + ? this.textFieldMaxLength.toString() + : getTextFieldMaxLength(documents).toString(), + }, + }, + { + name: this.vectorField, + description: "Vector field", + data_type: DataType.FloatVector, + type_params: { + dim: getVectorFieldDim(vectors).toString(), + }, + } + ); + + fieldList.forEach((field) => { + if (!field.autoID) { + this.fields.push(field.name); + } + }); + + const createRes = await this.client.createCollection({ + collection_name: this.collectionName, + fields: fieldList, + }); + + if (createRes.error_code !== ErrorCode.SUCCESS) { + console.log(createRes); + throw new Error(`Failed to create collection: ${createRes}`); + } + + await this.client.createIndex({ + collection_name: this.collectionName, + field_name: this.vectorField, + extra_params: this.indexCreateParams, + }); + } + + /** + * Retrieves the fields of a collection in the Milvus database. + * @returns Promise resolving to void. + */ + async grabCollectionFields(): Promise { + if (!this.collectionName) { + throw new Error("Need collection name to grab collection fields"); + } + if ( + this.primaryField && + this.vectorField && + this.textField && + this.fields.length > 0 + ) { + return; + } + const desc = await this.client.describeCollection({ + collection_name: this.collectionName, + }); + desc.schema.fields.forEach((field) => { + this.fields.push(field.name); + if (field.autoID) { + const index = this.fields.indexOf(field.name); + if (index !== -1) { + this.fields.splice(index, 1); + } + } + if (field.is_primary_key) { + this.primaryField = field.name; + } + const dtype = DataTypeMap[field.data_type]; + if (dtype === DataType.FloatVector || dtype === DataType.BinaryVector) { + this.vectorField = field.name; + } + + if (dtype === DataType.VarChar && field.name === MILVUS_TEXT_FIELD_NAME) { + this.textField = field.name; + } + }); + } + + /** + * Creates a Milvus instance from a set of texts and their associated + * metadata. + * @param texts Array of texts to be added to the database. + * @param metadatas Array of metadata objects associated with the texts. + * @param embeddings Embeddings instance used to generate vector embeddings for the texts. + * @param dbConfig Optional configuration for the Milvus database. + * @returns Promise resolving to a new Milvus instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: MilvusLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return Milvus.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a Milvus instance from a set of Document instances. + * @param docs Array of Document instances to be added to the database. + * @param embeddings Embeddings instance used to generate vector embeddings for the documents. + * @param dbConfig Optional configuration for the Milvus database. + * @returns Promise resolving to a new Milvus instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: MilvusLibArgs + ): Promise { + const args: MilvusLibArgs = { + collectionName: dbConfig?.collectionName || genCollectionName(), + url: dbConfig?.url, + ssl: dbConfig?.ssl, + username: dbConfig?.username, + password: dbConfig?.password, + textField: dbConfig?.textField, + primaryField: dbConfig?.primaryField, + vectorField: dbConfig?.vectorField, + clientConfig: dbConfig?.clientConfig, + autoId: dbConfig?.autoId, + }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Creates a Milvus instance from an existing collection in the Milvus + * database. + * @param embeddings Embeddings instance used to generate vector embeddings for the documents in the collection. + * @param dbConfig Configuration for the Milvus database. + * @returns Promise resolving to a new Milvus instance. + */ + static async fromExistingCollection( + embeddings: Embeddings, + dbConfig: MilvusLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.ensureCollection(); + return instance; + } + + /** + * Deletes data from the Milvus database. + * @param params Object containing a filter to apply to the deletion. + * @returns Promise resolving to void. + */ + async delete(params: { filter: string }): Promise { + const hasColResp = await this.client.hasCollection({ + collection_name: this.collectionName, + }); + if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error checking collection: ${hasColResp}`); + } + if (hasColResp.value === false) { + throw new Error( + `Collection not found: ${this.collectionName}, please create collection before search.` + ); + } + + const { filter } = params; + + const deleteResp = await this.client.deleteEntities({ + collection_name: this.collectionName, + expr: filter, + }); + + if (deleteResp.status.error_code !== ErrorCode.SUCCESS) { + throw new Error(`Error deleting data: ${JSON.stringify(deleteResp)}`); + } + } +} + +function createFieldTypeForMetadata( + documents: Document[], + primaryFieldName: string +): FieldType[] { + const sampleMetadata = documents[0].metadata; + let textFieldMaxLength = 0; + let jsonFieldMaxLength = 0; + documents.forEach(({ metadata }) => { + // check all keys name and count in metadata is same as sampleMetadata + Object.keys(metadata).forEach((key) => { + if ( + !(key in metadata) || + typeof metadata[key] !== typeof sampleMetadata[key] + ) { + throw new Error( + "All documents must have same metadata keys and datatype" + ); + } + + // find max length of string field and json field, cache json string value + if (typeof metadata[key] === "string") { + if (metadata[key].length > textFieldMaxLength) { + textFieldMaxLength = metadata[key].length; + } + } else if (typeof metadata[key] === "object") { + const json = JSON.stringify(metadata[key]); + if (json.length > jsonFieldMaxLength) { + jsonFieldMaxLength = json.length; + } + } + }); + }); + + const fields: FieldType[] = []; + for (const [key, value] of Object.entries(sampleMetadata)) { + const type = typeof value; + + if (key === primaryFieldName) { + /** + * skip primary field + * because we will create primary field in createCollection + * */ + } else if (type === "string") { + fields.push({ + name: key, + description: `Metadata String field`, + data_type: DataType.VarChar, + type_params: { + max_length: textFieldMaxLength.toString(), + }, + }); + } else if (type === "number") { + fields.push({ + name: key, + description: `Metadata Number field`, + data_type: DataType.Float, + }); + } else if (type === "boolean") { + fields.push({ + name: key, + description: `Metadata Boolean field`, + data_type: DataType.Bool, + }); + } else if (value === null) { + // skip + } else { + // use json for other types + try { + fields.push({ + name: key, + description: `Metadata JSON field`, + data_type: DataType.VarChar, + type_params: { + max_length: jsonFieldMaxLength.toString(), + }, + }); + } catch (e) { + throw new Error("Failed to parse metadata field as JSON"); + } + } + } + return fields; +} + +function genCollectionName(): string { + return `${MILVUS_COLLECTION_NAME_PREFIX}_${uuid.v4().replaceAll("-", "")}`; +} + +function getTextFieldMaxLength(documents: Document[]) { + let textMaxLength = 0; + const textEncoder = new TextEncoder(); + // eslint-disable-next-line no-plusplus + for (let i = 0; i < documents.length; i++) { + const text = documents[i].pageContent; + const textLengthInBytes = textEncoder.encode(text).length; + if (textLengthInBytes > textMaxLength) { + textMaxLength = textLengthInBytes; + } + } + return textMaxLength; +} + +function getVectorFieldDim(vectors: number[][]) { + if (vectors.length === 0) { + throw new Error("No vectors found"); + } + return vectors[0].length; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function checkJsonString(value: string): { isJson: boolean; obj: any } { + try { + const result = JSON.parse(value); + return { isJson: true, obj: result }; + } catch (e) { + return { isJson: false, obj: null }; + } +} diff --git a/libs/langchain-community/src/vectorstores/myscale.ts b/libs/langchain-community/src/vectorstores/myscale.ts new file mode 100644 index 000000000000..f288cac4b60e --- /dev/null +++ b/libs/langchain-community/src/vectorstores/myscale.ts @@ -0,0 +1,314 @@ +import * as uuid from "uuid"; +import { ClickHouseClient, createClient } from "@clickhouse/client"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Arguments for the MyScaleStore class, which include the host, port, + * protocol, username, password, index type, index parameters, column map, + * database, table, and metric. + */ +export interface MyScaleLibArgs { + host: string; + port: string | number; + protocol?: string; + username: string; + password: string; + indexType?: string; + indexParam?: Record; + columnMap?: ColumnMap; + database?: string; + table?: string; + metric?: metric; +} + +/** + * Mapping of columns in the MyScale database. + */ +export interface ColumnMap { + id: string; + text: string; + vector: string; + metadata: string; +} + +/** + * Type of metric used in the MyScale database. + */ +export type metric = "L2" | "Cosine" | "IP"; + +/** + * Type for filtering search results in the MyScale database. + */ +export interface MyScaleFilter { + whereStr: string; +} + +/** + * Class for interacting with the MyScale database. It extends the + * VectorStore class and provides methods for adding vectors and + * documents, searching for similar vectors, and creating instances from + * texts or documents. + */ +export class MyScaleStore extends VectorStore { + declare FilterType: MyScaleFilter; + + private client: ClickHouseClient; + + private indexType: string; + + private indexParam: Record; + + private columnMap: ColumnMap; + + private database: string; + + private table: string; + + private metric: metric; + + private isInitialized = false; + + _vectorstoreType(): string { + return "myscale"; + } + + constructor(embeddings: Embeddings, args: MyScaleLibArgs) { + super(embeddings, args); + + this.indexType = args.indexType || "MSTG"; + this.indexParam = args.indexParam || {}; + this.columnMap = args.columnMap || { + id: "id", + text: "text", + vector: "vector", + metadata: "metadata", + }; + this.database = args.database || "default"; + this.table = args.table || "vector_table"; + this.metric = args.metric || "Cosine"; + + this.client = createClient({ + host: `${args.protocol ?? "https://"}${args.host}:${args.port}`, + username: args.username, + password: args.password, + session_id: uuid.v4(), + }); + } + + /** + * Method to add vectors to the MyScale database. + * @param vectors The vectors to add. + * @param documents The documents associated with the vectors. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + + if (!this.isInitialized) { + await this.initialize(vectors[0].length); + } + + const queryStr = this.buildInsertQuery(vectors, documents); + await this.client.exec({ query: queryStr }); + } + + /** + * Method to add documents to the MyScale database. + * @param documents The documents to add. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + return this.addVectors( + await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), + documents + ); + } + + /** + * Method to search for vectors that are similar to a given query vector. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter Optional filter for the search results. + * @returns Promise that resolves with an array of tuples, each containing a Document and a score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + if (!this.isInitialized) { + await this.initialize(query.length); + } + const queryStr = this.buildSearchQuery(query, k, filter); + + const queryResultSet = await this.client.query({ query: queryStr }); + const queryResult: { + data: { text: string; metadata: object; dist: number }[]; + } = await queryResultSet.json(); + + const result: [Document, number][] = queryResult.data.map((item) => [ + new Document({ pageContent: item.text, metadata: item.metadata }), + item.dist, + ]); + + return result; + } + + /** + * Static method to create an instance of MyScaleStore from texts. + * @param texts The texts to use. + * @param metadatas The metadata associated with the texts. + * @param embeddings The embeddings to use. + * @param args The arguments for the MyScaleStore. + * @returns Promise that resolves with a new instance of MyScaleStore. + */ + static async fromTexts( + texts: string[], + metadatas: object | object[], + embeddings: Embeddings, + args: MyScaleLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return MyScaleStore.fromDocuments(docs, embeddings, args); + } + + /** + * Static method to create an instance of MyScaleStore from documents. + * @param docs The documents to use. + * @param embeddings The embeddings to use. + * @param args The arguments for the MyScaleStore. + * @returns Promise that resolves with a new instance of MyScaleStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args: MyScaleLibArgs + ): Promise { + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create an instance of MyScaleStore from an existing + * index. + * @param embeddings The embeddings to use. + * @param args The arguments for the MyScaleStore. + * @returns Promise that resolves with a new instance of MyScaleStore. + */ + static async fromExistingIndex( + embeddings: Embeddings, + args: MyScaleLibArgs + ): Promise { + const instance = new this(embeddings, args); + + await instance.initialize(); + return instance; + } + + /** + * Method to initialize the MyScale database. + * @param dimension Optional dimension of the vectors. + * @returns Promise that resolves when the database has been initialized. + */ + private async initialize(dimension?: number): Promise { + const dim = dimension ?? (await this.embeddings.embedQuery("test")).length; + + let indexParamStr = ""; + for (const [key, value] of Object.entries(this.indexParam)) { + indexParamStr += `, '${key}=${value}'`; + } + + const query = ` + CREATE TABLE IF NOT EXISTS ${this.database}.${this.table}( + ${this.columnMap.id} String, + ${this.columnMap.text} String, + ${this.columnMap.vector} Array(Float32), + ${this.columnMap.metadata} JSON, + CONSTRAINT cons_vec_len CHECK length(${this.columnMap.vector}) = ${dim}, + VECTOR INDEX vidx ${this.columnMap.vector} TYPE ${this.indexType}('metric_type=${this.metric}'${indexParamStr}) + ) ENGINE = MergeTree ORDER BY ${this.columnMap.id} + `; + + await this.client.exec({ query: "SET allow_experimental_object_type=1" }); + await this.client.exec({ + query: "SET output_format_json_named_tuples_as_objects = 1", + }); + await this.client.exec({ query }); + this.isInitialized = true; + } + + /** + * Method to build an SQL query for inserting vectors and documents into + * the MyScale database. + * @param vectors The vectors to insert. + * @param documents The documents to insert. + * @returns The SQL query string. + */ + private buildInsertQuery(vectors: number[][], documents: Document[]): string { + const columnsStr = Object.values(this.columnMap).join(", "); + + const data: string[] = []; + for (let i = 0; i < vectors.length; i += 1) { + const vector = vectors[i]; + const document = documents[i]; + const item = [ + `'${uuid.v4()}'`, + `'${this.escapeString(document.pageContent)}'`, + `[${vector}]`, + `'${JSON.stringify(document.metadata)}'`, + ].join(", "); + data.push(`(${item})`); + } + const dataStr = data.join(", "); + + return ` + INSERT INTO TABLE + ${this.database}.${this.table}(${columnsStr}) + VALUES + ${dataStr} + `; + } + + private escapeString(str: string): string { + return str.replace(/\\/g, "\\\\").replace(/'/g, "\\'"); + } + + /** + * Method to build an SQL query for searching for similar vectors in the + * MyScale database. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @param filter Optional filter for the search results. + * @returns The SQL query string. + */ + private buildSearchQuery( + query: number[], + k: number, + filter?: MyScaleFilter + ): string { + const order = this.metric === "IP" ? "DESC" : "ASC"; + + const whereStr = filter ? `PREWHERE ${filter.whereStr}` : ""; + return ` + SELECT ${this.columnMap.text} AS text, ${this.columnMap.metadata} AS metadata, dist + FROM ${this.database}.${this.table} + ${whereStr} + ORDER BY distance(${this.columnMap.vector}, [${query}]) AS dist ${order} + LIMIT ${k} + `; + } +} diff --git a/libs/langchain-community/src/vectorstores/neo4j_vector.ts b/libs/langchain-community/src/vectorstores/neo4j_vector.ts new file mode 100644 index 000000000000..a2f6482063d8 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/neo4j_vector.ts @@ -0,0 +1,731 @@ +import neo4j from "neo4j-driver"; +import * as uuid from "uuid"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +export type SearchType = "vector" | "hybrid"; + +export type DistanceStrategy = "euclidean" | "cosine"; + +interface Neo4jVectorStoreArgs { + url: string; + username: string; + password: string; + database?: string; + preDeleteCollection?: boolean; + textNodeProperty?: string; + textNodeProperties?: string[]; + embeddingNodeProperty?: string; + keywordIndexName?: string; + indexName?: string; + searchType?: SearchType; + retrievalQuery?: string; + nodeLabel?: string; + createIdIndex?: boolean; +} + +const DEFAULT_SEARCH_TYPE = "vector"; +const DEFAULT_DISTANCE_STRATEGY = "cosine"; + +/** + * @security *Security note*: Make sure that the database connection uses credentials + * that are narrowly-scoped to only include necessary permissions. + * Failure to do so may result in data corruption or loss, since the calling + * code may attempt commands that would result in deletion, mutation + * of data if appropriately prompted or reading sensitive data if such + * data is present in the database. + * The best way to guard against such negative outcomes is to (as appropriate) + * limit the permissions granted to the credentials used with this tool. + * For example, creating read only users for the database is a good way to + * ensure that the calling code cannot mutate or delete data. + * + * @link See https://js.langchain.com/docs/security for more information. + */ +export class Neo4jVectorStore extends VectorStore { + private driver: neo4j.Driver; + + private database: string; + + private preDeleteCollection: boolean; + + private nodeLabel: string; + + private embeddingNodeProperty: string; + + private embeddingDimension: number; + + private textNodeProperty: string; + + private keywordIndexName: string; + + private indexName: string; + + private retrievalQuery: string; + + private searchType: SearchType; + + private distanceStrategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY; + + _vectorstoreType(): string { + return "neo4jvector"; + } + + constructor(embeddings: Embeddings, config: Neo4jVectorStoreArgs) { + super(embeddings, config); + } + + static async initialize( + embeddings: Embeddings, + config: Neo4jVectorStoreArgs + ) { + const store = new Neo4jVectorStore(embeddings, config); + await store._initializeDriver(config); + await store._verifyConnectivity(); + + const { + preDeleteCollection = false, + nodeLabel = "Chunk", + textNodeProperty = "text", + embeddingNodeProperty = "embedding", + keywordIndexName = "keyword", + indexName = "vector", + retrievalQuery = "", + searchType = DEFAULT_SEARCH_TYPE, + } = config; + + store.embeddingDimension = (await embeddings.embedQuery("foo")).length; + store.preDeleteCollection = preDeleteCollection; + store.nodeLabel = nodeLabel; + store.textNodeProperty = textNodeProperty; + store.embeddingNodeProperty = embeddingNodeProperty; + store.keywordIndexName = keywordIndexName; + store.indexName = indexName; + store.retrievalQuery = retrievalQuery; + store.searchType = searchType; + + if (store.preDeleteCollection) { + await store._dropIndex(); + } + + return store; + } + + async _initializeDriver({ + url, + username, + password, + database = "neo4j", + }: Neo4jVectorStoreArgs) { + try { + this.driver = neo4j.driver(url, neo4j.auth.basic(username, password)); + this.database = database; + } catch (error) { + throw new Error( + "Could not create a Neo4j driver instance. Please check the connection details." + ); + } + } + + async _verifyConnectivity() { + await this.driver.verifyAuthentication(); + } + + async close() { + await this.driver.close(); + } + + async _dropIndex() { + try { + await this.query(` + MATCH (n:\`${this.nodeLabel}\`) + CALL { + WITH n + DETACH DELETE n + } + IN TRANSACTIONS OF 10000 ROWS; + `); + await this.query(`DROP INDEX ${this.indexName}`); + } catch (error) { + console.error("An error occurred while dropping the index:", error); + } + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async query(query: string, params: any = {}): Promise { + const session = this.driver.session({ database: this.database }); + const result = await session.run(query, params); + return toObjects(result.records); + } + + static async fromTexts( + texts: string[], + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadatas: any, + embeddings: Embeddings, + config: Neo4jVectorStoreArgs + ): Promise { + const docs = []; + + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return Neo4jVectorStore.fromDocuments(docs, embeddings, config); + } + + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + config: Neo4jVectorStoreArgs + ): Promise { + const { + searchType = DEFAULT_SEARCH_TYPE, + createIdIndex = true, + textNodeProperties = [], + } = config; + + const store = await this.initialize(embeddings, config); + + const embeddingDimension = await store.retrieveExistingIndex(); + + if (!embeddingDimension) { + await store.createNewIndex(); + } else if (store.embeddingDimension !== embeddingDimension) { + throw new Error( + `Index with name "${store.indexName}" already exists. The provided embedding function and vector index dimensions do not match. + Embedding function dimension: ${store.embeddingDimension} + Vector index dimension: ${embeddingDimension}` + ); + } + + if (searchType === "hybrid") { + const ftsNodeLabel = await store.retrieveExistingFtsIndex(); + + if (!ftsNodeLabel) { + await store.createNewKeywordIndex(textNodeProperties); + } else { + if (ftsNodeLabel !== store.nodeLabel) { + throw Error( + "Vector and keyword index don't index the same node label" + ); + } + } + } + + if (createIdIndex) { + await store.query( + `CREATE CONSTRAINT IF NOT EXISTS FOR (n:${store.nodeLabel}) REQUIRE n.id IS UNIQUE;` + ); + } + + await store.addDocuments(docs); + + return store; + } + + static async fromExistingIndex( + embeddings: Embeddings, + config: Neo4jVectorStoreArgs + ) { + const { searchType = DEFAULT_SEARCH_TYPE, keywordIndexName = "keyword" } = + config; + + if (searchType === "hybrid" && !keywordIndexName) { + throw Error( + "keyword_index name has to be specified when using hybrid search option" + ); + } + + const store = await this.initialize(embeddings, config); + const embeddingDimension = await store.retrieveExistingIndex(); + + if (!embeddingDimension) { + throw Error( + "The specified vector index name does not exist. Make sure to check if you spelled it correctly" + ); + } + + if (store.embeddingDimension !== embeddingDimension) { + throw new Error( + `The provided embedding function and vector index dimensions do not match. + Embedding function dimension: ${store.embeddingDimension} + Vector index dimension: ${embeddingDimension}` + ); + } + + if (searchType === "hybrid") { + const ftsNodeLabel = await store.retrieveExistingFtsIndex(); + + if (!ftsNodeLabel) { + throw Error( + "The specified keyword index name does not exist. Make sure to check if you spelled it correctly" + ); + } else { + if (ftsNodeLabel !== store.nodeLabel) { + throw Error( + "Vector and keyword index don't index the same node label" + ); + } + } + } + + return store; + } + + static async fromExistingGraph( + embeddings: Embeddings, + config: Neo4jVectorStoreArgs + ) { + const { + textNodeProperties = [], + embeddingNodeProperty, + searchType = DEFAULT_SEARCH_TYPE, + retrievalQuery = "", + nodeLabel, + } = config; + + let _retrievalQuery = retrievalQuery; + + if (textNodeProperties.length === 0) { + throw Error( + "Parameter `text_node_properties` must not be an empty array" + ); + } + + if (!retrievalQuery) { + _retrievalQuery = ` + RETURN reduce(str='', k IN ${JSON.stringify(textNodeProperties)} | + str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text, + node {.*, \`${embeddingNodeProperty}\`: Null, id: Null, ${textNodeProperties + .map((prop) => `\`${prop}\`: Null`) + .join(", ")} } AS metadata, score + `; + } + + const store = await this.initialize(embeddings, { + ...config, + retrievalQuery: _retrievalQuery, + }); + + const embeddingDimension = await store.retrieveExistingIndex(); + + if (!embeddingDimension) { + await store.createNewIndex(); + } else if (store.embeddingDimension !== embeddingDimension) { + throw new Error( + `Index with name ${store.indexName} already exists. The provided embedding function and vector index dimensions do not match.\nEmbedding function dimension: ${store.embeddingDimension}\nVector index dimension: ${embeddingDimension}` + ); + } + + if (searchType === "hybrid") { + const ftsNodeLabel = await store.retrieveExistingFtsIndex( + textNodeProperties + ); + + if (!ftsNodeLabel) { + await store.createNewKeywordIndex(textNodeProperties); + } else { + if (ftsNodeLabel !== store.nodeLabel) { + throw Error( + "Vector and keyword index don't index the same node label" + ); + } + } + } + + // eslint-disable-next-line no-constant-condition + while (true) { + const fetchQuery = ` + MATCH (n:\`${nodeLabel}\`) + WHERE n.${embeddingNodeProperty} IS null + AND any(k in $props WHERE n[k] IS NOT null) + RETURN elementId(n) AS id, reduce(str='', k IN $props | + str + '\\n' + k + ':' + coalesce(n[k], '')) AS text + LIMIT 1000 + `; + + const data = await store.query(fetchQuery, { props: textNodeProperties }); + + if (!data) { + continue; + } + + const textEmbeddings = await embeddings.embedDocuments( + data.map((el) => el.text) + ); + + const params = { + data: data.map((el, index) => ({ + id: el.id, + embedding: textEmbeddings[index], + })), + }; + + await store.query( + ` + UNWIND $data AS row + MATCH (n:\`${nodeLabel}\`) + WHERE elementId(n) = row.id + CALL db.create.setVectorProperty(n, '${embeddingNodeProperty}', row.embedding) + YIELD node RETURN count(*) + `, + params + ); + + if (data.length < 1000) { + break; + } + } + + return store; + } + + async createNewIndex(): Promise { + const indexQuery = ` + CALL db.index.vector.createNodeIndex( + $index_name, + $node_label, + $embedding_node_property, + toInteger($embedding_dimension), + $similarity_metric + ) + `; + + const parameters = { + index_name: this.indexName, + node_label: this.nodeLabel, + embedding_node_property: this.embeddingNodeProperty, + embedding_dimension: this.embeddingDimension, + similarity_metric: this.distanceStrategy, + }; + + await this.query(indexQuery, parameters); + } + + async retrieveExistingIndex() { + let indexInformation = await this.query( + ` + SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options + WHERE type = 'VECTOR' AND (name = $index_name + OR (labelsOrTypes[0] = $node_label AND + properties[0] = $embedding_node_property)) + RETURN name, labelsOrTypes, properties, options + `, + { + index_name: this.indexName, + node_label: this.nodeLabel, + embedding_node_property: this.embeddingNodeProperty, + } + ); + + if (indexInformation) { + indexInformation = this.sortByIndexName(indexInformation, this.indexName); + + try { + const [index] = indexInformation; + const [labelOrType] = index.labelsOrTypes; + const [property] = index.properties; + + this.indexName = index.name; + this.nodeLabel = labelOrType; + this.embeddingNodeProperty = property; + + const embeddingDimension = + index.options.indexConfig["vector.dimensions"]; + return Number(embeddingDimension); + } catch (error) { + return null; + } + } + + return null; + } + + async retrieveExistingFtsIndex( + textNodeProperties: string[] = [] + ): Promise { + const indexInformation = await this.query( + ` + SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options + WHERE type = 'FULLTEXT' AND (name = $keyword_index_name + OR (labelsOrTypes = [$node_label] AND + properties = $text_node_property)) + RETURN name, labelsOrTypes, properties, options + `, + { + keyword_index_name: this.keywordIndexName, + node_label: this.nodeLabel, + text_node_property: + textNodeProperties.length > 0 + ? textNodeProperties + : [this.textNodeProperty], + } + ); + + if (indexInformation) { + // Sort the index information by index name + const sortedIndexInformation = this.sortByIndexName( + indexInformation, + this.indexName + ); + + try { + const [index] = sortedIndexInformation; + const [labelOrType] = index.labelsOrTypes; + const [property] = index.properties; + + this.keywordIndexName = index.name; + this.textNodeProperty = property; + this.nodeLabel = labelOrType; + + return labelOrType; + } catch (error) { + return null; + } + } + + return null; + } + + async createNewKeywordIndex( + textNodeProperties: string[] = [] + ): Promise { + const nodeProps = + textNodeProperties.length > 0 + ? textNodeProperties + : [this.textNodeProperty]; + + // Construct the Cypher query to create a new full text index + const ftsIndexQuery = ` + CREATE FULLTEXT INDEX ${this.keywordIndexName} + FOR (n:\`${this.nodeLabel}\`) ON EACH + [${nodeProps.map((prop) => `n.\`${prop}\``).join(", ")}] + `; + + await this.query(ftsIndexQuery); + } + + sortByIndexName( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + values: Array<{ [key: string]: any }>, + indexName: string + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ): Array<{ [key: string]: any }> { + return values.sort( + (a, b) => + (a.index_name === indexName ? -1 : 0) - + (b.index_name === indexName ? -1 : 0) + ); + } + + async addVectors( + vectors: number[][], + documents: Document[], + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadatas?: Record[], + ids?: string[] + ): Promise { + let _ids = ids; + let _metadatas = metadatas; + + if (!_ids) { + _ids = documents.map(() => uuid.v1()); + } + + if (!metadatas) { + _metadatas = documents.map(() => ({})); + } + + const importQuery = ` + UNWIND $data AS row + CALL { + WITH row + MERGE (c:\`${this.nodeLabel}\` {id: row.id}) + WITH c, row + CALL db.create.setVectorProperty(c, '${this.embeddingNodeProperty}', row.embedding) + YIELD node + SET c.\`${this.textNodeProperty}\` = row.text + SET c += row.metadata + } IN TRANSACTIONS OF 1000 ROWS + `; + + const parameters = { + data: documents.map(({ pageContent, metadata }, index) => ({ + text: pageContent, + metadata: _metadatas ? _metadatas[index] : metadata, + embedding: vectors[index], + id: _ids ? _ids[index] : null, + })), + }; + + await this.query(importQuery, parameters); + + return _ids; + } + + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + async similaritySearch(query: string, k = 4): Promise { + const embedding = await this.embeddings.embedQuery(query); + + const results = await this.similaritySearchVectorWithScore( + embedding, + k, + query + ); + + return results.map((result) => result[0]); + } + + async similaritySearchVectorWithScore( + vector: number[], + k: number, + query: string + ): Promise<[Document, number][]> { + const defaultRetrieval = ` + RETURN node.${this.textNodeProperty} AS text, score, + node {.*, ${this.textNodeProperty}: Null, + ${this.embeddingNodeProperty}: Null, id: Null } AS metadata + `; + + const retrievalQuery = this.retrievalQuery + ? this.retrievalQuery + : defaultRetrieval; + + const readQuery = `${getSearchIndexQuery( + this.searchType + )} ${retrievalQuery}`; + + const parameters = { + index: this.indexName, + k: Number(k), + embedding: vector, + keyword_index: this.keywordIndexName, + query, + }; + const results = await this.query(readQuery, parameters); + + if (results) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const docs: [Document, number][] = results.map((result: any) => [ + new Document({ + pageContent: result.text, + metadata: Object.fromEntries( + Object.entries(result.metadata).filter(([_, v]) => v !== null) + ), + }), + result.score, + ]); + + return docs; + } + + return []; + } +} + +function toObjects(records: neo4j.Record[]) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const recordValues: Record[] = records.map((record) => { + const rObj = record.toObject(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const out: { [key: string]: any } = {}; + Object.keys(rObj).forEach((key) => { + out[key] = itemIntToString(rObj[key]); + }); + return out; + }); + return recordValues; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function itemIntToString(item: any): any { + if (neo4j.isInt(item)) return item.toString(); + if (Array.isArray(item)) return item.map((ii) => itemIntToString(ii)); + if (["number", "string", "boolean"].indexOf(typeof item) !== -1) return item; + if (item === null) return item; + if (typeof item === "object") return objIntToString(item); +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function objIntToString(obj: any) { + const entry = extractFromNeoObjects(obj); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let newObj: any = null; + if (Array.isArray(entry)) { + newObj = entry.map((item) => itemIntToString(item)); + } else if (entry !== null && typeof entry === "object") { + newObj = {}; + Object.keys(entry).forEach((key) => { + newObj[key] = itemIntToString(entry[key]); + }); + } + return newObj; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function extractFromNeoObjects(obj: any) { + if ( + // eslint-disable-next-line + obj instanceof (neo4j.types.Node as any) || + // eslint-disable-next-line + obj instanceof (neo4j.types.Relationship as any) + ) { + return obj.properties; + // eslint-disable-next-line + } else if (obj instanceof (neo4j.types.Path as any)) { + // eslint-disable-next-line + return [].concat.apply([], extractPathForRows(obj)); + } + return obj; +} + +function extractPathForRows(path: neo4j.Path) { + let { segments } = path; + // Zero length path. No relationship, end === start + if (!Array.isArray(path.segments) || path.segments.length < 1) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + segments = [{ ...path, end: null } as any]; + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return segments.map((segment: any) => + [ + objIntToString(segment.start), + objIntToString(segment.relationship), + objIntToString(segment.end), + ].filter((part) => part !== null) + ); +} + +function getSearchIndexQuery(searchType: SearchType): string { + const typeToQueryMap: { [key in SearchType]: string } = { + vector: + "CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score", + hybrid: ` + CALL { + CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score + RETURN node, score UNION + CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score + WITH collect({node: node, score: score}) AS nodes, max(score) AS max + UNWIND nodes AS n + RETURN n.node AS node, (n.score / max) AS score + } + WITH node, max(score) AS score ORDER BY score DESC LIMIT toInteger($k) + `, + }; + + return typeToQueryMap[searchType]; +} diff --git a/libs/langchain-community/src/vectorstores/opensearch.ts b/libs/langchain-community/src/vectorstores/opensearch.ts new file mode 100644 index 000000000000..5728298eadd6 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/opensearch.ts @@ -0,0 +1,326 @@ +import { Client, RequestParams, errors } from "@opensearch-project/opensearch"; +import * as uuid from "uuid"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +type OpenSearchEngine = "nmslib" | "hnsw"; +type OpenSearchSpaceType = "l2" | "cosinesimil" | "ip"; + +/** + * Interface defining the options for vector search in OpenSearch. It + * includes the engine type, space type, and parameters for the HNSW + * algorithm. + */ +interface VectorSearchOptions { + readonly engine?: OpenSearchEngine; + readonly spaceType?: OpenSearchSpaceType; + readonly m?: number; + readonly efConstruction?: number; + readonly efSearch?: number; +} + +/** + * Interface defining the arguments required to create an instance of the + * OpenSearchVectorStore class. It includes the OpenSearch client, index + * name, and vector search options. + */ +export interface OpenSearchClientArgs { + readonly client: Client; + readonly indexName?: string; + + readonly vectorSearchOptions?: VectorSearchOptions; +} + +/** + * Type alias for an object. It's used to define filters for OpenSearch + * queries. + */ +type OpenSearchFilter = object; + +/** + * Class that provides a wrapper around the OpenSearch service for vector + * search. It provides methods for adding documents and vectors to the + * OpenSearch index, searching for similar vectors, and managing the + * OpenSearch index. + */ +export class OpenSearchVectorStore extends VectorStore { + declare FilterType: OpenSearchFilter; + + private readonly client: Client; + + private readonly indexName: string; + + private readonly engine: OpenSearchEngine; + + private readonly spaceType: OpenSearchSpaceType; + + private readonly efConstruction: number; + + private readonly efSearch: number; + + private readonly m: number; + + _vectorstoreType(): string { + return "opensearch"; + } + + constructor(embeddings: Embeddings, args: OpenSearchClientArgs) { + super(embeddings, args); + + this.spaceType = args.vectorSearchOptions?.spaceType ?? "l2"; + this.engine = args.vectorSearchOptions?.engine ?? "nmslib"; + this.m = args.vectorSearchOptions?.m ?? 16; + this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 512; + this.efSearch = args.vectorSearchOptions?.efSearch ?? 512; + + this.client = args.client; + this.indexName = args.indexName ?? "documents"; + } + + /** + * Method to add documents to the OpenSearch index. It first converts the + * documents to vectors using the embeddings, then adds the vectors to the + * index. + * @param documents The documents to be added to the OpenSearch index. + * @returns Promise resolving to void. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Method to add vectors to the OpenSearch index. It ensures the index + * exists, then adds the vectors and associated documents to the index. + * @param vectors The vectors to be added to the OpenSearch index. + * @param documents The documents associated with the vectors. + * @param options Optional parameter that can contain the IDs for the documents. + * @returns Promise resolving to void. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ): Promise { + await this.ensureIndexExists( + vectors[0].length, + this.engine, + this.spaceType, + this.efSearch, + this.efConstruction, + this.m + ); + const documentIds = + options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v4()); + const operations = vectors.flatMap((embedding, idx) => [ + { + index: { + _index: this.indexName, + _id: documentIds[idx], + }, + }, + { + embedding, + metadata: documents[idx].metadata, + text: documents[idx].pageContent, + }, + ]); + await this.client.bulk({ body: operations }); + await this.client.indices.refresh({ index: this.indexName }); + } + + /** + * Method to perform a similarity search on the OpenSearch index using a + * query vector. It returns the k most similar documents and their scores. + * @param query The query vector. + * @param k The number of similar documents to return. + * @param filter Optional filter for the OpenSearch query. + * @returns Promise resolving to an array of tuples, each containing a Document and its score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: OpenSearchFilter | undefined + ): Promise<[Document, number][]> { + const search: RequestParams.Search = { + index: this.indexName, + body: { + query: { + bool: { + filter: { bool: { must: this.buildMetadataTerms(filter) } }, + must: [ + { + knn: { + embedding: { vector: query, k }, + }, + }, + ], + }, + }, + size: k, + }, + }; + + const { body } = await this.client.search(search); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return body.hits.hits.map((hit: any) => [ + new Document({ + pageContent: hit._source.text, + metadata: hit._source.metadata, + }), + hit._score, + ]); + } + + /** + * Static method to create a new OpenSearchVectorStore from an array of + * texts, their metadata, embeddings, and OpenSearch client arguments. + * @param texts The texts to be converted into documents and added to the OpenSearch index. + * @param metadatas The metadata associated with the texts. Can be an array of objects or a single object. + * @param embeddings The embeddings used to convert the texts into vectors. + * @param args The OpenSearch client arguments. + * @returns Promise resolving to a new instance of OpenSearchVectorStore. + */ + static fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + args: OpenSearchClientArgs + ): Promise { + const documents = texts.map((text, idx) => { + const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; + return new Document({ pageContent: text, metadata }); + }); + + return OpenSearchVectorStore.fromDocuments(documents, embeddings, args); + } + + /** + * Static method to create a new OpenSearchVectorStore from an array of + * Documents, embeddings, and OpenSearch client arguments. + * @param docs The documents to be added to the OpenSearch index. + * @param embeddings The embeddings used to convert the documents into vectors. + * @param dbConfig The OpenSearch client arguments. + * @returns Promise resolving to a new instance of OpenSearchVectorStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: OpenSearchClientArgs + ): Promise { + const store = new OpenSearchVectorStore(embeddings, dbConfig); + await store.addDocuments(docs).then(() => store); + return store; + } + + /** + * Static method to create a new OpenSearchVectorStore from an existing + * OpenSearch index, embeddings, and OpenSearch client arguments. + * @param embeddings The embeddings used to convert the documents into vectors. + * @param dbConfig The OpenSearch client arguments. + * @returns Promise resolving to a new instance of OpenSearchVectorStore. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: OpenSearchClientArgs + ): Promise { + const store = new OpenSearchVectorStore(embeddings, dbConfig); + await store.client.cat.indices({ index: store.indexName }); + return store; + } + + private async ensureIndexExists( + dimension: number, + engine = "nmslib", + spaceType = "l2", + efSearch = 512, + efConstruction = 512, + m = 16 + ): Promise { + const body = { + settings: { + index: { + number_of_shards: 5, + number_of_replicas: 1, + knn: true, + "knn.algo_param.ef_search": efSearch, + }, + }, + mappings: { + dynamic_templates: [ + { + // map all metadata properties to be keyword + "metadata.*": { + match_mapping_type: "*", + mapping: { type: "keyword" }, + }, + }, + ], + properties: { + text: { type: "text" }, + metadata: { type: "object" }, + embedding: { + type: "knn_vector", + dimension, + method: { + name: "hnsw", + engine, + space_type: spaceType, + parameters: { ef_construction: efConstruction, m }, + }, + }, + }, + }, + }; + + const indexExists = await this.doesIndexExist(); + if (indexExists) return; + + await this.client.indices.create({ index: this.indexName, body }); + } + + private buildMetadataTerms( + filter?: OpenSearchFilter + ): { [key: string]: Record }[] { + if (filter == null) return []; + const result = []; + for (const [key, value] of Object.entries(filter)) { + const aggregatorKey = Array.isArray(value) ? "terms" : "term"; + result.push({ [aggregatorKey]: { [`metadata.${key}`]: value } }); + } + return result; + } + + /** + * Method to check if the OpenSearch index exists. + * @returns Promise resolving to a boolean indicating whether the index exists. + */ + async doesIndexExist(): Promise { + try { + await this.client.cat.indices({ index: this.indexName }); + return true; + } catch (err: unknown) { + // eslint-disable-next-line no-instanceof/no-instanceof + if (err instanceof errors.ResponseError && err.statusCode === 404) { + return false; + } + throw err; + } + } + + /** + * Method to delete the OpenSearch index if it exists. + * @returns Promise resolving to void. + */ + async deleteIfExists(): Promise { + const indexExists = await this.doesIndexExist(); + if (!indexExists) return; + + await this.client.indices.delete({ index: this.indexName }); + } +} diff --git a/libs/langchain-community/src/vectorstores/prisma.ts b/libs/langchain-community/src/vectorstores/prisma.ts new file mode 100644 index 000000000000..6e2ec8d8692c --- /dev/null +++ b/libs/langchain-community/src/vectorstores/prisma.ts @@ -0,0 +1,511 @@ +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { Callbacks } from "@langchain/core/callbacks/manager"; + +const IdColumnSymbol = Symbol("id"); +const ContentColumnSymbol = Symbol("content"); + +type ColumnSymbol = typeof IdColumnSymbol | typeof ContentColumnSymbol; + +declare type Value = unknown; +declare type RawValue = Value | Sql; + +declare class Sql { + strings: string[]; + + constructor( + rawStrings: ReadonlyArray, + rawValues: ReadonlyArray + ); +} + +type PrismaNamespace = { + ModelName: Record; + Sql: typeof Sql; + raw: (sql: string) => Sql; + join: ( + values: RawValue[], + separator?: string, + prefix?: string, + suffix?: string + ) => Sql; + sql: (strings: ReadonlyArray, ...values: RawValue[]) => Sql; +}; + +type PrismaClient = { + $queryRaw( + query: TemplateStringsArray | Sql, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...values: any[] + ): Promise; + $executeRaw( + query: TemplateStringsArray | Sql, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ...values: any[] + ): // eslint-disable-next-line @typescript-eslint/no-explicit-any + Promise; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + $transaction

[]>(arg: [...P]): Promise; +}; + +type ObjectIntersect = { + [P in keyof A & keyof B]: A[P] | B[P]; +}; + +type ModelColumns> = { + [K in keyof TModel]?: true | ColumnSymbol; +}; + +export type PrismaSqlFilter> = { + [K in keyof TModel]?: { + equals?: TModel[K]; + in?: TModel[K][]; + isNull?: TModel[K]; + isNotNull?: TModel[K]; + like?: TModel[K]; + lt?: TModel[K]; + lte?: TModel[K]; + gt?: TModel[K]; + gte?: TModel[K]; + not?: TModel[K]; + }; +}; + +const OpMap = { + equals: "=", + in: "IN", + isNull: "IS NULL", + isNotNull: "IS NOT NULL", + like: "LIKE", + lt: "<", + lte: "<=", + gt: ">", + gte: ">=", + not: "<>", +}; + +type SimilarityModel< + TModel extends Record = Record, + TColumns extends ModelColumns = ModelColumns +> = Pick> & { + _distance: number | null; +}; + +type DefaultPrismaVectorStore = PrismaVectorStore< + Record, + string, + ModelColumns>, + PrismaSqlFilter> +>; + +/** + * A specific implementation of the VectorStore class that is designed to + * work with Prisma. It provides methods for adding models, documents, and + * vectors, as well as for performing similarity searches. + */ +export class PrismaVectorStore< + TModel extends Record, + TModelName extends string, + TSelectModel extends ModelColumns, + TFilterModel extends PrismaSqlFilter +> extends VectorStore { + protected tableName: string; + + protected vectorColumnName: string; + + protected selectColumns: string[]; + + filter?: TFilterModel; + + idColumn: keyof TModel & string; + + contentColumn: keyof TModel & string; + + static IdColumn: typeof IdColumnSymbol = IdColumnSymbol; + + static ContentColumn: typeof ContentColumnSymbol = ContentColumnSymbol; + + protected db: PrismaClient; + + protected Prisma: PrismaNamespace; + + _vectorstoreType(): string { + return "prisma"; + } + + constructor( + embeddings: Embeddings, + config: { + db: PrismaClient; + prisma: PrismaNamespace; + tableName: TModelName; + vectorColumnName: string; + columns: TSelectModel; + filter?: TFilterModel; + } + ) { + super(embeddings, {}); + + this.Prisma = config.prisma; + this.db = config.db; + + const entries = Object.entries(config.columns); + const idColumn = entries.find((i) => i[1] === IdColumnSymbol)?.[0]; + const contentColumn = entries.find( + (i) => i[1] === ContentColumnSymbol + )?.[0]; + + if (idColumn == null) throw new Error("Missing ID column"); + if (contentColumn == null) throw new Error("Missing content column"); + + this.idColumn = idColumn; + this.contentColumn = contentColumn; + + this.tableName = config.tableName; + this.vectorColumnName = config.vectorColumnName; + + this.selectColumns = entries + .map(([key, alias]) => (alias && key) || null) + .filter((x): x is string => !!x); + + if (config.filter) { + this.filter = config.filter; + } + } + + /** + * Creates a new PrismaVectorStore with the specified model. + * @param db The PrismaClient instance. + * @returns An object with create, fromTexts, and fromDocuments methods. + */ + static withModel>(db: PrismaClient) { + function create< + TPrisma extends PrismaNamespace, + TColumns extends ModelColumns, + TFilters extends PrismaSqlFilter + >( + embeddings: Embeddings, + config: { + prisma: TPrisma; + tableName: keyof TPrisma["ModelName"] & string; + vectorColumnName: string; + columns: TColumns; + filter?: TFilters; + } + ) { + type ModelName = keyof TPrisma["ModelName"] & string; + return new PrismaVectorStore( + embeddings, + { ...config, db } + ); + } + + async function fromTexts< + TPrisma extends PrismaNamespace, + TColumns extends ModelColumns + >( + texts: string[], + metadatas: TModel[], + embeddings: Embeddings, + dbConfig: { + prisma: TPrisma; + tableName: keyof TPrisma["ModelName"] & string; + vectorColumnName: string; + columns: TColumns; + } + ) { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return PrismaVectorStore.fromDocuments(docs, embeddings, { + ...dbConfig, + db, + }); + } + + async function fromDocuments< + TPrisma extends PrismaNamespace, + TColumns extends ModelColumns, + TFilters extends PrismaSqlFilter + >( + docs: Document[], + embeddings: Embeddings, + dbConfig: { + prisma: TPrisma; + tableName: keyof TPrisma["ModelName"] & string; + vectorColumnName: string; + columns: TColumns; + } + ) { + type ModelName = keyof TPrisma["ModelName"] & string; + const instance = new PrismaVectorStore< + TModel, + ModelName, + TColumns, + TFilters + >(embeddings, { ...dbConfig, db }); + await instance.addDocuments(docs); + return instance; + } + + return { create, fromTexts, fromDocuments }; + } + + /** + * Adds the specified models to the store. + * @param models The models to add. + * @returns A promise that resolves when the models have been added. + */ + async addModels(models: TModel[]) { + return this.addDocuments( + models.map((metadata) => { + const pageContent = metadata[this.contentColumn]; + if (typeof pageContent !== "string") + throw new Error("Content column must be a string"); + return new Document({ pageContent, metadata }); + }) + ); + } + + /** + * Adds the specified documents to the store. + * @param documents The documents to add. + * @returns A promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Adds the specified vectors to the store. + * @param vectors The vectors to add. + * @param documents The documents associated with the vectors. + * @returns A promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]) { + // table name, column name cannot be parametrised + // these fields are thus not escaped by Prisma and can be dangerous if user input is used + const idColumnRaw = this.Prisma.raw(`"${this.idColumn}"`); + const tableNameRaw = this.Prisma.raw(`"${this.tableName}"`); + const vectorColumnRaw = this.Prisma.raw(`"${this.vectorColumnName}"`); + + await this.db.$transaction( + vectors.map( + (vector, idx) => this.db.$executeRaw` + UPDATE ${tableNameRaw} + SET ${vectorColumnRaw} = ${`[${vector.join(",")}]`}::vector + WHERE ${idColumnRaw} = ${documents[idx].metadata[this.idColumn]} + ` + ) + ); + } + + /** + * Performs a similarity search with the specified query. + * @param query The query to use for the similarity search. + * @param k The number of results to return. + * @param _filter The filter to apply to the results. + * @param _callbacks The callbacks to use during the search. + * @returns A promise that resolves with the search results. + */ + async similaritySearch( + query: string, + k = 4, + _filter: this["FilterType"] | undefined = undefined, // not used. here to make the interface compatible with the other stores + _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later + ): Promise>[]> { + const results = await this.similaritySearchVectorWithScore( + await this.embeddings.embedQuery(query), + k + ); + + return results.map((result) => result[0]); + } + + /** + * Performs a similarity search with the specified query and returns the + * results along with their scores. + * @param query The query to use for the similarity search. + * @param k The number of results to return. + * @param filter The filter to apply to the results. + * @param _callbacks The callbacks to use during the search. + * @returns A promise that resolves with the search results and their scores. + */ + async similaritySearchWithScore( + query: string, + k?: number, + filter?: TFilterModel, + _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later + ) { + return super.similaritySearchWithScore(query, k, filter); + } + + /** + * Performs a similarity search with the specified vector and returns the + * results along with their scores. + * @param query The vector to use for the similarity search. + * @param k The number of results to return. + * @param filter The filter to apply to the results. + * @returns A promise that resolves with the search results and their scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: TFilterModel + ): Promise<[Document>, number][]> { + // table name, column names cannot be parametrised + // these fields are thus not escaped by Prisma and can be dangerous if user input is used + const vectorColumnRaw = this.Prisma.raw(`"${this.vectorColumnName}"`); + const tableNameRaw = this.Prisma.raw(`"${this.tableName}"`); + const selectRaw = this.Prisma.raw( + this.selectColumns.map((x) => `"${x}"`).join(", ") + ); + + const vector = `[${query.join(",")}]`; + const articles = await this.db.$queryRaw< + Array> + >( + this.Prisma.join( + [ + this.Prisma.sql` + SELECT ${selectRaw}, ${vectorColumnRaw} <=> ${vector}::vector as "_distance" + FROM ${tableNameRaw} + `, + this.buildSqlFilterStr(filter ?? this.filter), + this.Prisma.sql` + ORDER BY "_distance" ASC + LIMIT ${k}; + `, + ].filter((x) => x != null), + "" + ) + ); + + const results: [Document>, number][] = + []; + for (const article of articles) { + if (article._distance != null && article[this.contentColumn] != null) { + results.push([ + new Document({ + pageContent: article[this.contentColumn] as string, + metadata: article, + }), + article._distance, + ]); + } + } + + return results; + } + + buildSqlFilterStr(filter?: TFilterModel) { + if (filter == null) return null; + return this.Prisma.join( + Object.entries(filter).flatMap(([key, ops]) => + Object.entries(ops).map(([opName, value]) => { + // column name, operators cannot be parametrised + // these fields are thus not escaped by Prisma and can be dangerous if user input is used + const opNameKey = opName as keyof typeof OpMap; + const colRaw = this.Prisma.raw(`"${key}"`); + const opRaw = this.Prisma.raw(OpMap[opNameKey]); + + switch (OpMap[opNameKey]) { + case OpMap.in: { + if ( + !Array.isArray(value) || + !value.every((v) => typeof v === "string") + ) { + throw new Error( + `Invalid filter: IN operator requires an array of strings. Received: ${JSON.stringify( + value, + null, + 2 + )}` + ); + } + return this.Prisma.sql`${colRaw} ${opRaw} (${this.Prisma.join( + value + )})`; + } + case OpMap.isNull: + case OpMap.isNotNull: + return this.Prisma.sql`${colRaw} ${opRaw}`; + default: + return this.Prisma.sql`${colRaw} ${opRaw} ${value}`; + } + }) + ), + " AND ", + " WHERE " + ); + } + + /** + * Creates a new PrismaVectorStore from the specified texts. + * @param texts The texts to use to create the store. + * @param metadatas The metadata for the texts. + * @param embeddings The embeddings to use. + * @param dbConfig The database configuration. + * @returns A promise that resolves with the new PrismaVectorStore. + */ + static async fromTexts( + texts: string[], + metadatas: object[], + embeddings: Embeddings, + dbConfig: { + db: PrismaClient; + prisma: PrismaNamespace; + tableName: string; + vectorColumnName: string; + columns: ModelColumns>; + } + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return PrismaVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new PrismaVectorStore from the specified documents. + * @param docs The documents to use to create the store. + * @param embeddings The embeddings to use. + * @param dbConfig The database configuration. + * @returns A promise that resolves with the new PrismaVectorStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: { + db: PrismaClient; + prisma: PrismaNamespace; + tableName: string; + vectorColumnName: string; + columns: ModelColumns>; + } + ): Promise { + const instance = new PrismaVectorStore(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/qdrant.ts b/libs/langchain-community/src/vectorstores/qdrant.ts new file mode 100644 index 000000000000..86ed978bbc39 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/qdrant.ts @@ -0,0 +1,260 @@ +import { QdrantClient } from "@qdrant/js-client-rest"; +import type { Schemas as QdrantSchemas } from "@qdrant/js-client-rest"; +import { v4 as uuid } from "uuid"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface for the arguments that can be passed to the + * `QdrantVectorStore` constructor. It includes options for specifying a + * `QdrantClient` instance, the URL and API key for a Qdrant database, and + * the name and configuration for a collection. + */ +export interface QdrantLibArgs { + client?: QdrantClient; + url?: string; + apiKey?: string; + collectionName?: string; + collectionConfig?: QdrantSchemas["CreateCollection"]; +} + +/** + * Type for the response returned by a search operation in the Qdrant + * database. It includes the score and payload (metadata and content) for + * each point (document) in the search results. + */ +type QdrantSearchResponse = QdrantSchemas["ScoredPoint"] & { + payload: { + metadata: object; + content: string; + }; +}; + +/** + * Class that extends the `VectorStore` base class to interact with a + * Qdrant database. It includes methods for adding documents and vectors + * to the Qdrant database, searching for similar vectors, and ensuring the + * existence of a collection in the database. + */ +export class QdrantVectorStore extends VectorStore { + get lc_secrets(): { [key: string]: string } { + return { + apiKey: "QDRANT_API_KEY", + url: "QDRANT_URL", + }; + } + + client: QdrantClient; + + collectionName: string; + + collectionConfig?: QdrantSchemas["CreateCollection"]; + + _vectorstoreType(): string { + return "qdrant"; + } + + constructor(embeddings: Embeddings, args: QdrantLibArgs) { + super(embeddings, args); + + const url = args.url ?? getEnvironmentVariable("QDRANT_URL"); + const apiKey = args.apiKey ?? getEnvironmentVariable("QDRANT_API_KEY"); + + if (!args.client && !url) { + throw new Error("Qdrant client or url address must be set."); + } + + this.client = + args.client || + new QdrantClient({ + url, + apiKey, + }); + + this.collectionName = args.collectionName ?? "documents"; + + this.collectionConfig = args.collectionConfig; + } + + /** + * Method to add documents to the Qdrant database. It generates vectors + * from the documents using the `Embeddings` instance and then adds the + * vectors to the database. + * @param documents Array of `Document` instances to be added to the Qdrant database. + * @returns Promise that resolves when the documents have been added to the database. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Method to add vectors to the Qdrant database. Each vector is associated + * with a document, which is stored as the payload for a point in the + * database. + * @param vectors Array of vectors to be added to the Qdrant database. + * @param documents Array of `Document` instances associated with the vectors. + * @returns Promise that resolves when the vectors have been added to the database. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + + await this.ensureCollection(); + + const points = vectors.map((embedding, idx) => ({ + id: uuid(), + vector: embedding, + payload: { + content: documents[idx].pageContent, + metadata: documents[idx].metadata, + }, + })); + + try { + await this.client.upsert(this.collectionName, { + wait: true, + points, + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + const error = new Error( + `${e?.status ?? "Undefined error code"} ${e?.message}: ${ + e?.data?.status?.error + }` + ); + throw error; + } + } + + /** + * Method to search for vectors in the Qdrant database that are similar to + * a given query vector. The search results include the score and payload + * (metadata and content) for each similar vector. + * @param query Query vector to search for similar vectors in the Qdrant database. + * @param k Optional number of similar vectors to return. If not specified, all similar vectors are returned. + * @param filter Optional filter to apply to the search results. + * @returns Promise that resolves with an array of tuples, where each tuple includes a `Document` instance and a score for a similar vector. + */ + async similaritySearchVectorWithScore( + query: number[], + k?: number, + filter?: QdrantSchemas["Filter"] + ): Promise<[Document, number][]> { + if (!query) { + return []; + } + + await this.ensureCollection(); + + const results = await this.client.search(this.collectionName, { + vector: query, + limit: k, + filter, + }); + + const result: [Document, number][] = ( + results as QdrantSearchResponse[] + ).map((res) => [ + new Document({ + metadata: res.payload.metadata, + pageContent: res.payload.content, + }), + res.score, + ]); + + return result; + } + + /** + * Method to ensure the existence of a collection in the Qdrant database. + * If the collection does not exist, it is created. + * @returns Promise that resolves when the existence of the collection has been ensured. + */ + async ensureCollection() { + const response = await this.client.getCollections(); + + const collectionNames = response.collections.map( + (collection) => collection.name + ); + + if (!collectionNames.includes(this.collectionName)) { + const collectionConfig = this.collectionConfig ?? { + vectors: { + size: (await this.embeddings.embedQuery("test")).length, + distance: "Cosine", + }, + }; + await this.client.createCollection(this.collectionName, collectionConfig); + } + } + + /** + * Static method to create a `QdrantVectorStore` instance from texts. Each + * text is associated with metadata and converted to a `Document` + * instance, which is then added to the Qdrant database. + * @param texts Array of texts to be converted to `Document` instances and added to the Qdrant database. + * @param metadatas Array or single object of metadata to be associated with the texts. + * @param embeddings `Embeddings` instance used to generate vectors from the texts. + * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. + * @returns Promise that resolves with a new `QdrantVectorStore` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: QdrantLibArgs + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return QdrantVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a `QdrantVectorStore` instance from `Document` + * instances. The documents are added to the Qdrant database. + * @param docs Array of `Document` instances to be added to the Qdrant database. + * @param embeddings `Embeddings` instance used to generate vectors from the documents. + * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. + * @returns Promise that resolves with a new `QdrantVectorStore` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: QdrantLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create a `QdrantVectorStore` instance from an existing + * collection in the Qdrant database. + * @param embeddings `Embeddings` instance used to generate vectors from the documents in the collection. + * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. + * @returns Promise that resolves with a new `QdrantVectorStore` instance. + */ + static async fromExistingCollection( + embeddings: Embeddings, + dbConfig: QdrantLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.ensureCollection(); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/redis.ts b/libs/langchain-community/src/vectorstores/redis.ts new file mode 100644 index 000000000000..63402c7f72af --- /dev/null +++ b/libs/langchain-community/src/vectorstores/redis.ts @@ -0,0 +1,458 @@ +import type { + createCluster, + createClient, + RediSearchSchema, + SearchOptions, +} from "redis"; +import { SchemaFieldTypes, VectorAlgorithms } from "redis"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +// Adapated from internal redis types which aren't exported +/** + * Type for creating a schema vector field. It includes the algorithm, + * distance metric, and initial capacity. + */ +export type CreateSchemaVectorField< + T extends VectorAlgorithms, + A extends Record +> = { + ALGORITHM: T; + DISTANCE_METRIC: "L2" | "IP" | "COSINE"; + INITIAL_CAP?: number; +} & A; +/** + * Type for creating a flat schema vector field. It extends + * CreateSchemaVectorField with a block size property. + */ +export type CreateSchemaFlatVectorField = CreateSchemaVectorField< + VectorAlgorithms.FLAT, + { + BLOCK_SIZE?: number; + } +>; +/** + * Type for creating a HNSW schema vector field. It extends + * CreateSchemaVectorField with M, EF_CONSTRUCTION, and EF_RUNTIME + * properties. + */ +export type CreateSchemaHNSWVectorField = CreateSchemaVectorField< + VectorAlgorithms.HNSW, + { + M?: number; + EF_CONSTRUCTION?: number; + EF_RUNTIME?: number; + } +>; + +type CreateIndexOptions = NonNullable< + Parameters["ft"]["create"]>[3] +>; + +export type RedisSearchLanguages = `${NonNullable< + CreateIndexOptions["LANGUAGE"] +>}`; + +export type RedisVectorStoreIndexOptions = Omit< + CreateIndexOptions, + "LANGUAGE" +> & { LANGUAGE?: RedisSearchLanguages }; + +/** + * Interface for the configuration of the RedisVectorStore. It includes + * the Redis client, index name, index options, key prefix, content key, + * metadata key, vector key, and filter. + */ +export interface RedisVectorStoreConfig { + redisClient: + | ReturnType + | ReturnType; + indexName: string; + indexOptions?: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField; + createIndexOptions?: Omit; // PREFIX must be set with keyPrefix + keyPrefix?: string; + contentKey?: string; + metadataKey?: string; + vectorKey?: string; + filter?: RedisVectorStoreFilterType; +} + +/** + * Interface for the options when adding documents to the + * RedisVectorStore. It includes keys and batch size. + */ +export interface RedisAddOptions { + keys?: string[]; + batchSize?: number; +} + +/** + * Type for the filter used in the RedisVectorStore. It is an array of + * strings. + */ +export type RedisVectorStoreFilterType = string[]; + +/** + * Class representing a RedisVectorStore. It extends the VectorStore class + * and includes methods for adding documents and vectors, performing + * similarity searches, managing the index, and more. + */ +export class RedisVectorStore extends VectorStore { + declare FilterType: RedisVectorStoreFilterType; + + private redisClient: + | ReturnType + | ReturnType; + + indexName: string; + + indexOptions: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField; + + createIndexOptions: CreateIndexOptions; + + keyPrefix: string; + + contentKey: string; + + metadataKey: string; + + vectorKey: string; + + filter?: RedisVectorStoreFilterType; + + _vectorstoreType(): string { + return "redis"; + } + + constructor(embeddings: Embeddings, _dbConfig: RedisVectorStoreConfig) { + super(embeddings, _dbConfig); + + this.redisClient = _dbConfig.redisClient; + this.indexName = _dbConfig.indexName; + this.indexOptions = _dbConfig.indexOptions ?? { + ALGORITHM: VectorAlgorithms.HNSW, + DISTANCE_METRIC: "COSINE", + }; + this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`; + this.contentKey = _dbConfig.contentKey ?? "content"; + this.metadataKey = _dbConfig.metadataKey ?? "metadata"; + this.vectorKey = _dbConfig.vectorKey ?? "content_vector"; + this.filter = _dbConfig.filter; + this.createIndexOptions = { + ON: "HASH", + PREFIX: this.keyPrefix, + ...(_dbConfig.createIndexOptions as CreateIndexOptions), + }; + } + + /** + * Method for adding documents to the RedisVectorStore. It first converts + * the documents to texts and then adds them as vectors. + * @param documents The documents to add. + * @param options Optional parameters for adding the documents. + * @returns A promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[], options?: RedisAddOptions) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method for adding vectors to the RedisVectorStore. It checks if the + * index exists and creates it if it doesn't, then adds the vectors in + * batches. + * @param vectors The vectors to add. + * @param documents The documents associated with the vectors. + * @param keys Optional keys for the vectors. + * @param batchSize The size of the batches in which to add the vectors. Defaults to 1000. + * @returns A promise that resolves when the vectors have been added. + */ + async addVectors( + vectors: number[][], + documents: Document[], + { keys, batchSize = 1000 }: RedisAddOptions = {} + ) { + if (!vectors.length || !vectors[0].length) { + throw new Error("No vectors provided"); + } + // check if the index exists and create it if it doesn't + await this.createIndex(vectors[0].length); + + const info = await this.redisClient.ft.info(this.indexName); + const lastKeyCount = parseInt(info.numDocs, 10) || 0; + const multi = this.redisClient.multi(); + + vectors.map(async (vector, idx) => { + const key = + keys && keys.length + ? keys[idx] + : `${this.keyPrefix}${idx + lastKeyCount}`; + const metadata = + documents[idx] && documents[idx].metadata + ? documents[idx].metadata + : {}; + + multi.hSet(key, { + [this.vectorKey]: this.getFloat32Buffer(vector), + [this.contentKey]: documents[idx].pageContent, + [this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)), + }); + + // write batch + if (idx % batchSize === 0) { + await multi.exec(); + } + }); + + // insert final batch + await multi.exec(); + } + + /** + * Method for performing a similarity search in the RedisVectorStore. It + * returns the documents and their scores. + * @param query The query vector. + * @param k The number of nearest neighbors to return. + * @param filter Optional filter to apply to the search. + * @returns A promise that resolves to an array of documents and their scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: RedisVectorStoreFilterType + ): Promise<[Document, number][]> { + if (filter && this.filter) { + throw new Error("cannot provide both `filter` and `this.filter`"); + } + + const _filter = filter ?? this.filter; + const results = await this.redisClient.ft.search( + this.indexName, + ...this.buildQuery(query, k, _filter) + ); + const result: [Document, number][] = []; + + if (results.total) { + for (const res of results.documents) { + if (res.value) { + const document = res.value; + if (document.vector_score) { + result.push([ + new Document({ + pageContent: document[this.contentKey] as string, + metadata: JSON.parse( + this.unEscapeSpecialChars(document.metadata as string) + ), + }), + Number(document.vector_score), + ]); + } + } + } + } + + return result; + } + + /** + * Static method for creating a new instance of RedisVectorStore from + * texts. It creates documents from the texts and metadata, then adds them + * to the RedisVectorStore. + * @param texts The texts to add. + * @param metadatas The metadata associated with the texts. + * @param embeddings The embeddings to use. + * @param dbConfig The configuration for the RedisVectorStore. + * @returns A promise that resolves to a new instance of RedisVectorStore. + */ + static fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: RedisVectorStoreConfig + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method for creating a new instance of RedisVectorStore from + * documents. It adds the documents to the RedisVectorStore. + * @param docs The documents to add. + * @param embeddings The embeddings to use. + * @param dbConfig The configuration for the RedisVectorStore. + * @returns A promise that resolves to a new instance of RedisVectorStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: RedisVectorStoreConfig + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Method for checking if an index exists in the RedisVectorStore. + * @returns A promise that resolves to a boolean indicating whether the index exists. + */ + async checkIndexExists() { + try { + await this.redisClient.ft.info(this.indexName); + } catch (err) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + if ((err as any)?.message.includes("unknown command")) { + throw new Error( + "Failed to run FT.INFO command. Please ensure that you are running a RediSearch-capable Redis instance: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/redis#setup" + ); + } + // index doesn't exist + return false; + } + + return true; + } + + /** + * Method for creating an index in the RedisVectorStore. If the index + * already exists, it does nothing. + * @param dimensions The dimensions of the index + * @returns A promise that resolves when the index has been created. + */ + async createIndex(dimensions = 1536): Promise { + if (await this.checkIndexExists()) { + return; + } + + const schema: RediSearchSchema = { + [this.vectorKey]: { + type: SchemaFieldTypes.VECTOR, + TYPE: "FLOAT32", + DIM: dimensions, + ...this.indexOptions, + }, + [this.contentKey]: SchemaFieldTypes.TEXT, + [this.metadataKey]: SchemaFieldTypes.TEXT, + }; + + await this.redisClient.ft.create( + this.indexName, + schema, + this.createIndexOptions + ); + } + + /** + * Method for dropping an index from the RedisVectorStore. + * @param deleteDocuments Optional boolean indicating whether to drop the associated documents. + * @returns A promise that resolves to a boolean indicating whether the index was dropped. + */ + async dropIndex(deleteDocuments?: boolean): Promise { + try { + const options = deleteDocuments ? { DD: deleteDocuments } : undefined; + await this.redisClient.ft.dropIndex(this.indexName, options); + + return true; + } catch (err) { + return false; + } + } + + /** + * Deletes vectors from the vector store. + * @param params The parameters for deleting vectors. + * @returns A promise that resolves when the vectors have been deleted. + */ + async delete(params: { deleteAll: boolean }): Promise { + if (params.deleteAll) { + await this.dropIndex(true); + } else { + throw new Error(`Invalid parameters passed to "delete".`); + } + } + + private buildQuery( + query: number[], + k: number, + filter?: RedisVectorStoreFilterType + ): [string, SearchOptions] { + const vectorScoreField = "vector_score"; + + let hybridFields = "*"; + // if a filter is set, modify the hybrid query + if (filter && filter.length) { + // `filter` is a list of strings, then it's applied using the OR operator in the metadata key + // for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar' + hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`; + } + + const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`; + const returnFields = [this.metadataKey, this.contentKey, vectorScoreField]; + + const options: SearchOptions = { + PARAMS: { + vector: this.getFloat32Buffer(query), + }, + RETURN: returnFields, + SORTBY: vectorScoreField, + DIALECT: 2, + LIMIT: { + from: 0, + size: k, + }, + }; + + return [baseQuery, options]; + } + + private prepareFilter(filter: RedisVectorStoreFilterType) { + return filter.map(this.escapeSpecialChars).join("|"); + } + + /** + * Escapes all '-' characters. + * RediSearch considers '-' as a negative operator, hence we need + * to escape it + * @see https://redis.io/docs/stack/search/reference/query_syntax + * + * @param str + * @returns + */ + private escapeSpecialChars(str: string) { + return str.replaceAll("-", "\\-"); + } + + /** + * Unescapes all '-' characters, returning the original string + * + * @param str + * @returns + */ + private unEscapeSpecialChars(str: string) { + return str.replaceAll("\\-", "-"); + } + + /** + * Converts the vector to the buffer Redis needs to + * correctly store an embedding + * + * @param vector + * @returns Buffer + */ + private getFloat32Buffer(vector: number[]) { + return Buffer.from(new Float32Array(vector).buffer); + } +} diff --git a/libs/langchain-community/src/vectorstores/rockset.ts b/libs/langchain-community/src/vectorstores/rockset.ts new file mode 100644 index 000000000000..2f68213bd592 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/rockset.ts @@ -0,0 +1,452 @@ +import { MainApi } from "@rockset/client"; +import type { CreateCollectionRequest } from "@rockset/client/dist/codegen/api.d.ts"; +import { Collection } from "@rockset/client/dist/codegen/api.js"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +/** + * Generic Rockset vector storage error + */ +export class RocksetStoreError extends Error { + /** + * Constructs a RocksetStoreError + * @param message The error message + */ + constructor(message: string) { + super(message); + this.name = this.constructor.name; + } +} + +/** + * Error that is thrown when a RocksetStore function is called + * after `destroy()` is called (meaning the collection would be + * deleted). + */ +export class RocksetStoreDestroyedError extends RocksetStoreError { + constructor() { + super("The Rockset store has been destroyed"); + this.name = this.constructor.name; + } +} + +/** + * Functions to measure vector distance/similarity by. + * See https://rockset.com/docs/vector-functions/#vector-distance-functions + * @enum SimilarityMetric + */ +export const SimilarityMetric = { + CosineSimilarity: "COSINE_SIM", + EuclideanDistance: "EUCLIDEAN_DIST", + DotProduct: "DOT_PRODUCT", +} as const; + +export type SimilarityMetric = + (typeof SimilarityMetric)[keyof typeof SimilarityMetric]; + +interface CollectionNotFoundError { + message_key: string; +} + +/** + * Vector store arguments + * @interface RocksetStore + */ +export interface RocksetLibArgs { + /** + * The rockset client object constructed with `rocksetConfigure` + * @type {MainAPI} + */ + client: MainApi; + /** + * The name of the Rockset collection to store vectors + * @type {string} + */ + collectionName: string; + /** + * The name of othe Rockset workspace that holds @member collectionName + * @type {string} + */ + workspaceName?: string; + /** + * The name of the collection column to contain page contnent of documents + * @type {string} + */ + textKey?: string; + /** + * The name of the collection column to contain vectors + * @type {string} + */ + embeddingKey?: string; + /** + * The SQL `WHERE` clause to filter by + * @type {string} + */ + filter?: string; + /** + * The metric used to measure vector relationship + * @type {SimilarityMetric} + */ + similarityMetric?: SimilarityMetric; +} + +/** + * Exposes Rockset's vector store/search functionality + */ +export class RocksetStore extends VectorStore { + declare FilterType: string; + + client: MainApi; + + collectionName: string; + + workspaceName: string; + + textKey: string; + + embeddingKey: string; + + filter?: string; + + private _similarityMetric: SimilarityMetric; + + private similarityOrder: "ASC" | "DESC"; + + private destroyed: boolean; + + /** + * Gets a string representation of the type of this VectorStore + * @returns {"rockset"} + */ + _vectorstoreType(): "rockset" { + return "rockset"; + } + + /** + * Constructs a new RocksetStore + * @param {Embeddings} embeddings Object used to embed queries and + * page content + * @param {RocksetLibArgs} args + */ + constructor(embeddings: Embeddings, args: RocksetLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + this.client = args.client; + this.collectionName = args.collectionName; + this.workspaceName = args.workspaceName ?? "commons"; + this.textKey = args.textKey ?? "text"; + this.embeddingKey = args.embeddingKey ?? "embedding"; + this.filter = args.filter; + this.similarityMetric = + args.similarityMetric ?? SimilarityMetric.CosineSimilarity; + this.setSimilarityOrder(); + } + + /** + * Sets the object's similarity order based on what + * SimilarityMetric is being used + */ + private setSimilarityOrder() { + this.checkIfDestroyed(); + this.similarityOrder = + this.similarityMetric === SimilarityMetric.EuclideanDistance + ? "ASC" + : "DESC"; + } + + /** + * Embeds and adds Documents to the store. + * @param {Documents[]} documents The documents to store + * @returns {Promise} The _id's of the documents added + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Adds vectors to the store given their corresponding Documents + * @param {number[][]} vectors The vectors to store + * @param {Document[]} documents The Documents they represent + * @return {Promise} The _id's of the added documents + */ + async addVectors(vectors: number[][], documents: Document[]) { + this.checkIfDestroyed(); + const rocksetDocs = []; + for (let i = 0; i < documents.length; i += 1) { + const currDoc = documents[i]; + const currVector = vectors[i]; + rocksetDocs.push({ + [this.textKey]: currDoc.pageContent, + [this.embeddingKey]: currVector, + ...currDoc.metadata, + }); + } + + return ( + await this.client.documents.addDocuments( + this.workspaceName, + this.collectionName, + { + data: rocksetDocs, + } + ) + ).data?.map((docStatus) => docStatus._id || ""); + } + + /** + * Deletes Rockset documements given their _id's + * @param {string[]} ids The IDS to remove documents with + */ + async delete(ids: string[]): Promise { + this.checkIfDestroyed(); + await this.client.documents.deleteDocuments( + this.workspaceName, + this.collectionName, + { + data: ids.map((id) => ({ _id: id })), + } + ); + } + + /** + * Gets the most relevant documents to a query along + * with their similarity score. The returned documents + * are ordered by similarity (most similar at the first + * index) + * @param {number[]} query The embedded query to search + * the store by + * @param {number} k The number of documents to retreive + * @param {string?} filter The SQL `WHERE` clause to filter by + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: string + ): Promise<[Document, number][]> { + this.checkIfDestroyed(); + if (filter && this.filter) { + throw new RocksetStoreError( + "cannot provide both `filter` and `this.filter`" + ); + } + const similarityKey = "similarity"; + const _filter = filter ?? this.filter; + return ( + ( + await this.client.queries.query({ + sql: { + query: ` + SELECT + * EXCEPT("${this.embeddingKey}"), + "${this.textKey}", + ${this.similarityMetric}(:query, "${ + this.embeddingKey + }") AS "${similarityKey}" + FROM + "${this.workspaceName}"."${this.collectionName}" + ${_filter ? `WHERE ${_filter}` : ""} + ORDER BY + "${similarityKey}" ${this.similarityOrder} + LIMIT + ${k} + `, + parameters: [ + { + name: "query", + type: "", + value: `[${query.toString()}]`, + }, + ], + }, + }) + ).results?.map((rocksetDoc) => [ + new Document>({ + pageContent: rocksetDoc[this.textKey], + metadata: (({ + [this.textKey]: t, + [similarityKey]: s, + ...rocksetDoc + }) => rocksetDoc)(rocksetDoc), + }), + rocksetDoc[similarityKey] as number, + ]) ?? [] + ); + } + + /** + * Constructs and returns a RocksetStore object given texts to store. + * @param {string[]} texts The texts to store + * @param {object[] | object} metadatas The metadatas that correspond + * to @param texts + * @param {Embeddings} embeddings The object used to embed queries + * and page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @returns {RocksetStore} + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: RocksetLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return RocksetStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Constructs, adds docs to, and returns a RocksetStore object + * @param {Document[]} docs The Documents to store + * @param {Embeddings} embeddings The object used to embed queries + * and page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @returns {RocksetStore} + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: RocksetLibArgs + ): Promise { + const args = { ...dbConfig, textKey: dbConfig.textKey ?? "text" }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Checks if a Rockset collection exists. + * @param {RocksetLibArgs} dbConfig The object containing the collection + * and workspace names + * @return {boolean} whether the collection exists + */ + private static async collectionExists(dbConfig: RocksetLibArgs) { + try { + await dbConfig.client.collections.getCollection( + dbConfig.workspaceName ?? "commons", + dbConfig.collectionName + ); + } catch (err) { + if ( + (err as CollectionNotFoundError).message_key === + "COLLECTION_DOES_NOT_EXIST" + ) { + return false; + } + throw err; + } + return true; + } + + /** + * Checks whether a Rockset collection is ready to be queried. + * @param {RocksetLibArgs} dbConfig The object containing the collection + * name and workspace + * @return {boolean} whether the collection is ready + */ + private static async collectionReady(dbConfig: RocksetLibArgs) { + return ( + ( + await dbConfig.client.collections.getCollection( + dbConfig.workspaceName ?? "commons", + dbConfig.collectionName + ) + ).data?.status === Collection.StatusEnum.READY + ); + } + + /** + * Deletes the collection this RocksetStore uses + * @param {boolean?} waitUntilDeletion Whether to sleep until the + * collection is ready to be + * queried + */ + async destroy(waitUntilDeletion?: boolean) { + await this.client.collections.deleteCollection( + this.workspaceName, + this.collectionName + ); + this.destroyed = true; + if (waitUntilDeletion) { + while ( + await RocksetStore.collectionExists({ + collectionName: this.collectionName, + client: this.client, + }) + ); + } + } + + /** + * Checks if this RocksetStore has been destroyed. + * @throws {RocksetStoreDestroyederror} if it has. + */ + private checkIfDestroyed() { + if (this.destroyed) { + throw new RocksetStoreDestroyedError(); + } + } + + /** + * Creates a new Rockset collection and returns a RocksetStore that + * uses it + * @param {Embeddings} embeddings Object used to embed queries and + * page content + * @param {RocksetLibArgs} dbConfig The options to be passed into the + * RocksetStore constructor + * @param {CreateCollectionRequest?} collectionOptions The arguments to sent with the + * HTTP request when creating the + * collection. Setting a field mapping + * that `VECTOR_ENFORCE`s is recommended + * when using this function. See + * https://rockset.com/docs/vector-functions/#vector_enforce + * @returns {RocsketStore} + */ + static async withNewCollection( + embeddings: Embeddings, + dbConfig: RocksetLibArgs, + collectionOptions?: CreateCollectionRequest + ): Promise { + if ( + collectionOptions?.name && + dbConfig.collectionName !== collectionOptions?.name + ) { + throw new RocksetStoreError( + "`dbConfig.name` and `collectionOptions.name` do not match" + ); + } + await dbConfig.client.collections.createCollection( + dbConfig.workspaceName ?? "commons", + collectionOptions || { name: dbConfig.collectionName } + ); + while ( + !(await this.collectionExists(dbConfig)) || + !(await this.collectionReady(dbConfig)) + ); + return new this(embeddings, dbConfig); + } + + public get similarityMetric() { + return this._similarityMetric; + } + + public set similarityMetric(metric: SimilarityMetric) { + this._similarityMetric = metric; + this.setSimilarityOrder(); + } +} diff --git a/libs/langchain-community/src/vectorstores/singlestore.ts b/libs/langchain-community/src/vectorstores/singlestore.ts new file mode 100644 index 000000000000..70871e8713ca --- /dev/null +++ b/libs/langchain-community/src/vectorstores/singlestore.ts @@ -0,0 +1,294 @@ +import type { + Pool, + RowDataPacket, + OkPacket, + ResultSetHeader, + FieldPacket, + PoolOptions, +} from "mysql2/promise"; +import { format } from "mysql2"; +import { createPool } from "mysql2/promise"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type Metadata = Record; + +export type DistanceMetrics = "DOT_PRODUCT" | "EUCLIDEAN_DISTANCE"; + +const OrderingDirective: Record = { + DOT_PRODUCT: "DESC", + EUCLIDEAN_DISTANCE: "", +}; + +export interface ConnectionOptions extends PoolOptions {} + +type ConnectionWithUri = { + connectionOptions?: never; + connectionURI: string; +}; + +type ConnectionWithOptions = { + connectionURI?: never; + connectionOptions: ConnectionOptions; +}; + +type ConnectionConfig = ConnectionWithUri | ConnectionWithOptions; + +export type SingleStoreVectorStoreConfig = ConnectionConfig & { + tableName?: string; + contentColumnName?: string; + vectorColumnName?: string; + metadataColumnName?: string; + distanceMetric?: DistanceMetrics; +}; + +function withConnectAttributes( + config: SingleStoreVectorStoreConfig +): ConnectionOptions { + let newOptions: ConnectionOptions = {}; + if (config.connectionURI) { + newOptions = { + uri: config.connectionURI, + }; + } else if (config.connectionOptions) { + newOptions = { + ...config.connectionOptions, + }; + } + const result: ConnectionOptions = { + ...newOptions, + connectAttributes: { + ...newOptions.connectAttributes, + }, + }; + + if (!result.connectAttributes) { + result.connectAttributes = {}; + } + + result.connectAttributes = { + ...result.connectAttributes, + _connector_name: "langchain js sdk", + _connector_version: "1.0.0", + _driver_name: "Node-MySQL-2", + }; + + return result; +} + +/** + * Class for interacting with SingleStoreDB, a high-performance + * distributed SQL database. It provides vector storage and vector + * functions. + */ +export class SingleStoreVectorStore extends VectorStore { + connectionPool: Pool; + + tableName: string; + + contentColumnName: string; + + vectorColumnName: string; + + metadataColumnName: string; + + distanceMetric: DistanceMetrics; + + _vectorstoreType(): string { + return "singlestore"; + } + + constructor(embeddings: Embeddings, config: SingleStoreVectorStoreConfig) { + super(embeddings, config); + this.connectionPool = createPool(withConnectAttributes(config)); + this.tableName = config.tableName ?? "embeddings"; + this.contentColumnName = config.contentColumnName ?? "content"; + this.vectorColumnName = config.vectorColumnName ?? "vector"; + this.metadataColumnName = config.metadataColumnName ?? "metadata"; + this.distanceMetric = config.distanceMetric ?? "DOT_PRODUCT"; + } + + /** + * Creates a new table in the SingleStoreDB database if it does not + * already exist. + */ + async createTableIfNotExists(): Promise { + await this.connectionPool + .execute(`CREATE TABLE IF NOT EXISTS ${this.tableName} ( + ${this.contentColumnName} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, + ${this.vectorColumnName} BLOB, + ${this.metadataColumnName} JSON);`); + } + + /** + * Ends the connection to the SingleStoreDB database. + */ + async end(): Promise { + return this.connectionPool.end(); + } + + /** + * Adds new documents to the SingleStoreDB database. + * @param documents An array of Document objects. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + const vectors = await this.embeddings.embedDocuments(texts); + return this.addVectors(vectors, documents); + } + + /** + * Adds new vectors to the SingleStoreDB database. + * @param vectors An array of vectors. + * @param documents An array of Document objects. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + await this.createTableIfNotExists(); + const { tableName } = this; + + await Promise.all( + vectors.map(async (vector, idx) => { + try { + await this.connectionPool.execute( + format( + `INSERT INTO ${tableName} VALUES (?, JSON_ARRAY_PACK('[?]'), ?);`, + [ + documents[idx].pageContent, + vector, + JSON.stringify(documents[idx].metadata), + ] + ) + ); + } catch (error) { + console.error(`Error adding vector at index ${idx}:`, error); + } + }) + ); + } + + /** + * Performs a similarity search on the vectors stored in the SingleStoreDB + * database. + * @param query An array of numbers representing the query vector. + * @param k The number of nearest neighbors to return. + * @param filter Optional metadata to filter the vectors by. + * @returns Top matching vectors with score + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: Metadata + ): Promise<[Document, number][]> { + // build the where clause from filter + const whereArgs: string[] = []; + const buildWhereClause = (record: Metadata, argList: string[]): string => { + const whereTokens: string[] = []; + for (const key in record) + if (record[key] !== undefined) { + if ( + typeof record[key] === "object" && + record[key] != null && + !Array.isArray(record[key]) + ) { + whereTokens.push( + buildWhereClause(record[key], argList.concat([key])) + ); + } else { + whereTokens.push( + `JSON_EXTRACT_JSON(${this.metadataColumnName}, `.concat( + Array.from({ length: argList.length + 1 }, () => "?").join( + ", " + ), + ") = ?" + ) + ); + whereArgs.push(...argList, key, JSON.stringify(record[key])); + } + } + return whereTokens.join(" AND "); + }; + const whereClause = filter + ? "WHERE ".concat(buildWhereClause(filter, [])) + : ""; + + const [rows]: [ + ( + | RowDataPacket[] + | RowDataPacket[][] + | OkPacket + | OkPacket[] + | ResultSetHeader + ), + FieldPacket[] + ] = await this.connectionPool.query( + format( + `SELECT ${this.contentColumnName}, + ${this.metadataColumnName}, + ${this.distanceMetric}(${ + this.vectorColumnName + }, JSON_ARRAY_PACK('[?]')) as __score FROM ${ + this.tableName + } ${whereClause} + ORDER BY __score ${OrderingDirective[this.distanceMetric]} LIMIT ?;`, + [query, ...whereArgs, k] + ) + ); + const result: [Document, number][] = []; + for (const row of rows as RowDataPacket[]) { + const rowData = row as unknown as Record; + result.push([ + new Document({ + pageContent: rowData[this.contentColumnName] as string, + metadata: rowData[this.metadataColumnName] as Record, + }), + Number(rowData.score), + ]); + } + return result; + } + + /** + * Creates a new instance of the SingleStoreVectorStore class from a list + * of texts. + * @param texts An array of strings. + * @param metadatas An array of metadata objects. + * @param embeddings An Embeddings object. + * @param dbConfig A SingleStoreVectorStoreConfig object. + * @returns A new SingleStoreVectorStore instance + */ + static async fromTexts( + texts: string[], + metadatas: object[], + embeddings: Embeddings, + dbConfig: SingleStoreVectorStoreConfig + ): Promise { + const docs = texts.map((text, idx) => { + const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; + return new Document({ + pageContent: text, + metadata, + }); + }); + return SingleStoreVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new instance of the SingleStoreVectorStore class from a list + * of Document objects. + * @param docs An array of Document objects. + * @param embeddings An Embeddings object. + * @param dbConfig A SingleStoreVectorStoreConfig object. + * @returns A new SingleStoreVectorStore instance + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: SingleStoreVectorStoreConfig + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts new file mode 100644 index 000000000000..46abb53472f5 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts @@ -0,0 +1,169 @@ +/* eslint-disable no-process-env */ +/* eslint-disable import/no-extraneous-dependencies */ +import { test } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; + +import { AnalyticDBVectorStore } from "../analyticdb.js"; + +const connectionOptions = { + host: process.env.ANALYTICDB_HOST || "localhost", + port: Number(process.env.ANALYTICDB_PORT) || 5432, + database: process.env.ANALYTICDB_DATABASE || "your_database", + user: process.env.ANALYTICDB_USERNAME || "username", + password: process.env.ANALYTICDB_PASSWORD || "password", +}; + +const embeddings = new OpenAIEmbeddings(); +const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536; + +beforeAll(async () => { + expect(process.env.ANALYTICDB_HOST).toBeDefined(); + expect(process.env.ANALYTICDB_PORT).toBeDefined(); + expect(process.env.ANALYTICDB_DATABASE).toBeDefined(); + expect(process.env.ANALYTICDB_USERNAME).toBeDefined(); + expect(process.env.ANALYTICDB_USERNAME).toBeDefined(); +}); + +test.skip("test analyticdb", async () => { + const vectorStore = new AnalyticDBVectorStore(embeddings, { + connectionOptions, + collectionName: "test_collection", + preDeleteCollection: true, + }); + expect(vectorStore).toBeDefined(); + + const createdAt = new Date().getTime(); + await vectorStore.addDocuments([ + { pageContent: "hi", metadata: { a: createdAt } }, + { pageContent: "bye", metadata: { a: createdAt } }, + { pageContent: "what's this", metadata: { a: createdAt } }, + { pageContent: createdAt.toString(), metadata: { a: createdAt } }, + ]); + + const results = await vectorStore.similaritySearch("what's this", 1); + + expect(results).toHaveLength(1); + expect(results).toEqual([ + new Document({ + pageContent: "what's this", + metadata: { a: createdAt }, + }), + ]); + + await vectorStore.end(); +}); + +test.skip("test analyticdb using filter", async () => { + const vectorStore = new AnalyticDBVectorStore(embeddings, { + connectionOptions, + collectionName: "test_collection", + embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, + preDeleteCollection: true, + }); + expect(vectorStore).toBeDefined(); + + const createdAt = new Date().getTime(); + await vectorStore.addDocuments([ + { pageContent: "foo", metadata: { a: createdAt, b: createdAt + 6 } }, + { pageContent: "bar", metadata: { a: createdAt + 1, b: createdAt + 7 } }, + { pageContent: "baz", metadata: { a: createdAt + 2, b: createdAt + 8 } }, + { pageContent: "foo", metadata: { a: createdAt + 3, b: createdAt + 9 } }, + { pageContent: "bar", metadata: { a: createdAt + 4, b: createdAt + 10 } }, + { pageContent: "baz", metadata: { a: createdAt + 5, b: createdAt + 11 } }, + ]); + + const results = await vectorStore.similaritySearch("bar", 1, { + a: createdAt + 4, + b: createdAt + 10, + }); + + expect(results).toHaveLength(1); + expect(results).toEqual([ + new Document({ + pageContent: "bar", + metadata: { a: createdAt + 4, b: createdAt + 10 }, + }), + ]); + + await vectorStore.end(); +}); + +test.skip("test analyticdb from texts", async () => { + const vectorStore = await AnalyticDBVectorStore.fromTexts( + ["Bye bye", "Hello world", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + embeddings, + { + connectionOptions, + collectionName: "test_collection", + embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, + preDeleteCollection: true, + } + ); + expect(vectorStore).toBeDefined(); + + const results = await vectorStore.similaritySearch("hello world", 1); + + expect(results).toHaveLength(1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 1, name: "1" }, + }), + ]); + + await vectorStore.end(); +}); + +test.skip("test analyticdb from existing index", async () => { + await AnalyticDBVectorStore.fromTexts( + ["Bye bye", "Hello world", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + embeddings, + { + connectionOptions, + collectionName: "test_collection", + embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, + preDeleteCollection: true, + } + ); + + const vectorStore = await AnalyticDBVectorStore.fromExistingIndex( + embeddings, + { + connectionOptions, + collectionName: "test_collection", + embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, + preDeleteCollection: false, + } + ); + + const result1 = await vectorStore.similaritySearch("hello world", 1); + expect(result1).toHaveLength(1); + expect(result1).toEqual([ + { pageContent: "Hello world", metadata: { id: 1, name: "1" } }, + ]); + + await vectorStore.addDocuments([ + { pageContent: "bar", metadata: { id: 4, name: "4" } }, + { pageContent: "baz", metadata: { id: 5, name: "5" } }, + ]); + + const result2 = await vectorStore.similaritySearch("bar", 2); + expect(result2).toHaveLength(2); + expect(result2).toEqual([ + { pageContent: "bar", metadata: { id: 4, name: "4" } }, + { pageContent: "baz", metadata: { id: 5, name: "5" } }, + ]); + + await vectorStore.end(); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts b/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts new file mode 100644 index 000000000000..d9d4df2d3a3d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts @@ -0,0 +1,363 @@ +/* eslint-disable no-process-env */ +import { test, expect, describe } from "@jest/globals"; + +import { Client } from "cassandra-driver"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { CassandraStore } from "../cassandra.js"; + +const cassandraConfig = { + cloud: { + secureConnectBundle: process.env.CASSANDRA_SCB as string, + }, + credentials: { + username: "token", + password: process.env.CASSANDRA_TOKEN as string, + }, + keyspace: "test", + table: "test", +}; +const client = new Client(cassandraConfig); + +const noPartitionConfig = { + ...cassandraConfig, + dimensions: 1536, + primaryKey: { + name: "id", + type: "int", + }, + metadataColumns: [ + { + name: "name", + type: "text", + }, + { + name: "seq", + type: "int", + }, + ], +}; + +// yarn test:single /langchain/src/vectorstores/tests/cassandra.int.test.ts +// Note there are multiple describe functions that need to be un-skipped for internal testing +describe.skip("CassandraStore - no explicit partition key", () => { + beforeAll(async () => { + await client.execute("DROP TABLE IF EXISTS test.test;"); + }); + + test("CassandraStore.fromText", async () => { + const vectorStore = await CassandraStore.fromTexts( + ["I am blue", "Green yellow purple", "Hello there hello"], + [ + { id: 2, name: "Alex" }, + { id: 1, name: "Scott" }, + { id: 3, name: "Bubba" }, + ], + new OpenAIEmbeddings(), + noPartitionConfig + ); + + const results = await vectorStore.similaritySearch( + "Green yellow purple", + 1 + ); + expect(results).toEqual([ + new Document({ + pageContent: "Green yellow purple", + metadata: { id: 1, name: "Scott" }, + }), + ]); + }); + + test("CassandraStore.fromExistingIndex", async () => { + await CassandraStore.fromTexts( + ["Hey", "Whats up", "Hello"], + [ + { id: 2, name: "Alex" }, + { id: 1, name: "Scott" }, + { id: 3, name: "Bubba" }, + ], + new OpenAIEmbeddings(), + noPartitionConfig + ); + + const vectorStore = await CassandraStore.fromExistingIndex( + new OpenAIEmbeddings(), + noPartitionConfig + ); + + const results = await vectorStore.similaritySearch("Whats up", 1); + expect(results).toEqual([ + new Document({ + pageContent: "Whats up", + metadata: { id: 1, name: "Scott" }, + }), + ]); + }); + + test("CassandraStore.fromExistingIndex (with filter)", async () => { + const testConfig = { + ...noPartitionConfig, + indices: [ + { + name: "name", + value: "(name)", + }, + ], + }; + + await CassandraStore.fromTexts( + ["Hey", "Whats up", "Hello"], + [ + { id: 2, name: "Alex" }, + { id: 1, name: "Scott" }, + { id: 3, name: "Bubba" }, + ], + new OpenAIEmbeddings(), + testConfig + ); + + const vectorStore = await CassandraStore.fromExistingIndex( + new OpenAIEmbeddings(), + testConfig + ); + + const results = await vectorStore.similaritySearch("Hey", 1, { + name: "Bubba", + }); + expect(results).toEqual([ + new Document({ + pageContent: "Hello", + metadata: { id: 3, name: "Bubba" }, + }), + ]); + }); + + test("CassandraStore.fromExistingIndex (with inequality filter)", async () => { + const testConfig = { + ...noPartitionConfig, + indices: [ + { + name: "seq", + value: "(seq)", + }, + ], + }; + + await CassandraStore.fromTexts( + ["Hey", "Whats up", "Hello"], + [ + { id: 2, name: "Alex", seq: 99 }, + { id: 1, name: "Scott", seq: 88 }, + { id: 3, name: "Bubba", seq: 77 }, + ], + new OpenAIEmbeddings(), + testConfig + ); + + const vectorStore = await CassandraStore.fromExistingIndex( + new OpenAIEmbeddings(), + testConfig + ); + + // With out the filter this would match on Scott, but we are using > filter + const results = await vectorStore.similaritySearch("Whats up", 1, [ + { name: "seq", operator: ">", value: "88" }, + ]); + expect(results).toEqual([ + new Document({ + pageContent: "Hey", + metadata: { id: 2, name: "Alex", seq: 99 }, + }), + ]); + }); + + test("CassandraStore.addDocuments (with batch))", async () => { + const testConfig = { + ...noPartitionConfig, + maxConcurrency: 1, + batchSize: 5, + }; + + const docs: Document[] = []; + docs.push( + new Document({ + pageContent: "Hello Muddah, hello Faddah", + metadata: { id: 1, name: "Alex" }, + }) + ); + docs.push( + new Document({ + pageContent: "Here I am at Camp Granada", + metadata: { id: 2, name: "Blair" }, + }) + ); + docs.push( + new Document({ + pageContent: "Camp is very entertaining", + metadata: { id: 3, name: "Casey" }, + }) + ); + docs.push( + new Document({ + pageContent: "And they say we'll have some fun if it stops raining", + metadata: { id: 4, name: "Dana" }, + }) + ); + + docs.push( + new Document({ + pageContent: "I went hiking with Joe Spivey", + metadata: { id: 5, name: "Amber" }, + }) + ); + docs.push( + new Document({ + pageContent: "He developed poison ivy", + metadata: { id: 6, name: "Blair" }, + }) + ); + docs.push( + new Document({ + pageContent: "You remember Leonard Skinner", + metadata: { id: 7, name: "Casey" }, + }) + ); + docs.push( + new Document({ + pageContent: "He got Ptomaine poisoning last night after dinner", + metadata: { id: 8, name: "Dana" }, + }) + ); + + docs.push( + new Document({ + pageContent: "All the counsellors hate the waiters", + metadata: { id: 9, name: "Amber" }, + }) + ); + docs.push( + new Document({ + pageContent: "And the lake has alligators", + metadata: { id: 10, name: "Blair" }, + }) + ); + docs.push( + new Document({ + pageContent: "And the head coach wants no sissies", + metadata: { id: 11, name: "Casey" }, + }) + ); + docs.push( + new Document({ + pageContent: "So he reads to us from something called Ulysses", + metadata: { id: 12, name: "Dana" }, + }) + ); + + const vectorStore = await CassandraStore.fromExistingIndex( + new OpenAIEmbeddings(), + testConfig + ); + + await vectorStore.addDocuments(docs); + + const results = await vectorStore.similaritySearch( + "something called Ulysses", + 1 + ); + expect(results).toEqual([ + new Document({ + pageContent: "So he reads to us from something called Ulysses", + metadata: { id: 12, name: "Dana" }, + }), + ]); + }); +}); + +const partitionConfig = { + ...noPartitionConfig, + primaryKey: [ + { + name: "group", + type: "int", + partition: true, + }, + { + name: "ts", + type: "timestamp", + }, + { + name: "id", + type: "int", + }, + ], + withClause: "CLUSTERING ORDER BY (ts DESC)", +}; + +describe.skip("CassandraStore - with explicit partition key", () => { + beforeAll(async () => { + await client.execute("DROP TABLE IF EXISTS test.test;"); + }); + + test("CassandraStore.partitionKey", async () => { + const vectorStore = await CassandraStore.fromTexts( + ["Hey", "Hey"], + [ + { group: 1, ts: new Date(1655377200000), id: 1, name: "Alex" }, + { group: 2, ts: new Date(1655377200000), id: 1, name: "Alice" }, + ], + new OpenAIEmbeddings(), + partitionConfig + ); + + const results = await vectorStore.similaritySearch("Hey", 1, { + group: 2, + }); + + console.debug(`results: ${JSON.stringify(results)}`); + + expect(results).toEqual([ + new Document({ + pageContent: "Hey", + metadata: { + group: 2, + ts: new Date(1655377200000), + id: 1, + name: "Alice", + }, + }), + ]); + }); + + // Test needs to be skipped until https://github.com/datastax/cassandra/pull/839 + test.skip("CassandraStore.partition with cluster filter", async () => { + const vectorStore = await CassandraStore.fromTexts( + ["Apple", "Banana", "Cherry", "Date", "Elderberry"], + [ + { group: 3, ts: new Date(1655377200000), id: 1, name: "Alex" }, + { group: 3, ts: new Date(1655377201000), id: 2, name: "Alex" }, + { group: 3, ts: new Date(1655377202000), id: 3, name: "Alex" }, + { group: 3, ts: new Date(1655377203000), id: 4, name: "Alex" }, + { group: 3, ts: new Date(1655377204000), id: 5, name: "Alex" }, + ], + new OpenAIEmbeddings(), + partitionConfig + ); + + await expect( + vectorStore.similaritySearch("Banana", 1, [ + { name: "group", value: 1 }, + { name: "ts", value: new Date(1655377202000), operator: ">" }, + ]) + ).rejects.toThrow(); + + // Once Cassandra supports filtering against cluster columns, the following should work + // expect(results).toEqual([ + // new Document({ + // pageContent: "Elderberry", + // metadata: { group: 1, ts: new Date(1655377204000), id: 5, name: "Alex", seq: null} + // }), + // ]); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts new file mode 100644 index 000000000000..af9da7661dc6 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts @@ -0,0 +1,163 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { beforeEach, describe, expect, test } from "@jest/globals"; +import { ChromaClient } from "chromadb"; +import { faker } from "@faker-js/faker"; +import * as uuid from "uuid"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Chroma } from "../chroma.js"; + +describe.skip("Chroma", () => { + let chromaStore: Chroma; + + beforeEach(async () => { + const embeddings = new OpenAIEmbeddings(); + chromaStore = new Chroma(embeddings, { + url: "http://localhost:8000", + collectionName: "test-collection", + }); + }); + + test.skip("auto-generated ids", async () => { + const pageContent = faker.lorem.sentence(5); + + await chromaStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]); + + const results = await chromaStore.similaritySearch(pageContent, 1); + + expect(results).toEqual([ + new Document({ metadata: { foo: "bar" }, pageContent }), + ]); + }); + + test.skip("metadata filtering", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + + await chromaStore.addDocuments([ + { pageContent, metadata: { foo: "bar" } }, + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: "qux" } }, + ]); + + // If the filter wasn't working, we'd get all 3 documents back + const results = await chromaStore.similaritySearch(pageContent, 3, { + foo: id, + }); + + expect(results).toEqual([ + new Document({ metadata: { foo: id }, pageContent }), + ]); + }); + + test.skip("upsert", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + + const ids = await chromaStore.addDocuments([ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id } }, + ]); + + const results = await chromaStore.similaritySearch(pageContent, 4, { + foo: id, + }); + + expect(results.length).toEqual(2); + + const ids2 = await chromaStore.addDocuments( + [ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id } }, + ], + { ids } + ); + + expect(ids).toEqual(ids2); + + const newResults = await chromaStore.similaritySearch(pageContent, 4, { + foo: id, + }); + + expect(newResults.length).toEqual(2); + }); + + test.skip("delete by ids", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + + const ids = await chromaStore.addDocuments([ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id } }, + ]); + + const results = await chromaStore.similaritySearch(pageContent, 2, { + foo: id, + }); + + expect(results.length).toEqual(2); + + await chromaStore.delete({ ids: ids.slice(0, 1) }); + + const newResults = await chromaStore.similaritySearch(pageContent, 2, { + foo: id, + }); + + expect(newResults.length).toEqual(1); + }); + + test.skip("delete by filter", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + const id2 = uuid.v4(); + + await chromaStore.addDocuments([ + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: id, bar: id2 } }, + ]); + + const results = await chromaStore.similaritySearch(pageContent, 2, { + foo: id, + }); + + expect(results.length).toEqual(2); + + await chromaStore.delete({ + filter: { + bar: id2, + }, + }); + + const newResults = await chromaStore.similaritySearch(pageContent, 2, { + foo: id, + }); + + expect(newResults.length).toEqual(1); + }); + + test.skip("load from client instance", async () => { + const pageContent = faker.lorem.sentence(5); + const id = uuid.v4(); + + const chromaStoreFromClient = new Chroma(new OpenAIEmbeddings(), { + index: new ChromaClient({ + path: "http://localhost:8000", + }), + collectionName: "test-collection", + }); + + await chromaStoreFromClient.addDocuments([ + { pageContent, metadata: { foo: "bar" } }, + { pageContent, metadata: { foo: id } }, + { pageContent, metadata: { foo: "qux" } }, + ]); + + const results = await chromaStoreFromClient.similaritySearch( + pageContent, + 3 + ); + + expect(results.length).toEqual(3); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/chroma.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.test.ts new file mode 100644 index 000000000000..5ba32646645e --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/chroma.test.ts @@ -0,0 +1,135 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { jest, test, expect } from "@jest/globals"; + +import { type Collection } from "chromadb"; +import { Chroma } from "../chroma.js"; +import { FakeEmbeddings } from "../../util/testing.js"; + +const mockCollection = { + count: jest.fn().mockResolvedValue(5), + upsert: jest.fn().mockResolvedValue(undefined as any), + delete: jest.fn().mockResolvedValue(undefined as any), + // add: jest.fn().mockResolvedValue(undefined as any), + // modify: jest.fn().mockResolvedValue(undefined as any), + // get: jest.fn().mockResolvedValue(undefined as any), + // update: jest.fn().mockResolvedValue({ success: true }), + // query: jest.fn().mockResolvedValue(undefined as any), + // peek: jest.fn().mockResolvedValue(undefined as any), +} as any; + +const mockClient = { + getOrCreateCollection: jest.fn().mockResolvedValue(mockCollection), +} as any; + +describe("Chroma", () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + test("imports correctly", async () => { + const { ChromaClient } = await Chroma.imports(); + + expect(ChromaClient).toBeDefined(); + }); + + test("constructor works", async () => { + const chromaStore = new Chroma(new FakeEmbeddings(), { + index: mockClient, + collectionName: "test-collection", + }); + + expect(chromaStore).toBeDefined(); + }); + + test("should add vectors to the collection", async () => { + const expectedPageContents = ["Document 1", "Document 2"]; + const embeddings = new FakeEmbeddings(); + jest.spyOn(embeddings, "embedDocuments"); + const args = { + collectionName: "testCollection", + index: mockClient, + collectionMetadata: { "hnsw:space": "cosine" }, + }; + const documents = expectedPageContents.map((pc) => ({ pageContent: pc })); + + const chroma = new Chroma(embeddings, args); + await chroma.addDocuments(documents as any); + + expect(mockClient.getOrCreateCollection).toHaveBeenCalled(); + expect(embeddings.embedDocuments).toHaveBeenCalledWith( + expectedPageContents + ); + expect(mockCollection.upsert).toHaveBeenCalled(); + + const { metadatas } = mockCollection.upsert.mock.calls[0][0]; + expect(metadatas).toEqual([{}, {}]); + }); + + test("should override loc.lines with locFrom/locTo", async () => { + const expectedPageContents = ["Document 1"]; + const embeddings = new FakeEmbeddings(); + jest.spyOn(embeddings, "embedDocuments"); + + const args = { collectionName: "testCollection", index: mockClient }; + const documents = expectedPageContents.map((pc) => ({ + pageContent: pc, + metadata: { source: "source.html", loc: { lines: { from: 0, to: 4 } } }, + })); + + const chroma = new Chroma(embeddings, args); + await chroma.addDocuments(documents as any); + + const { metadatas } = mockCollection.upsert.mock.calls[0][0]; + + expect(metadatas[0]).toEqual({ + source: "source.html", + locFrom: 0, + locTo: 4, + }); + }); + + test("should throw an error for mismatched vector lengths", async () => { + const args = { collectionName: "testCollection" }; + const vectors = [ + [1, 2], + [3, 4], + ]; + const documents = [ + { metadata: { id: 1 }, pageContent: "Document 1" }, + { metadata: { id: 2 }, pageContent: "Document 2" }, + ]; + + const chroma = new Chroma(new FakeEmbeddings(), args); + chroma.numDimensions = 3; // Mismatched numDimensions + + await expect(chroma.addVectors(vectors, documents)).rejects.toThrowError(); + }); + + test("should perform similarity search and return results", async () => { + const args = { collectionName: "testCollection" }; + const query = [1, 2]; + const expectedResultCount = 5; + mockCollection.query = jest.fn().mockResolvedValue({ + ids: [["0", "1", "2", "3", "4"]], + distances: [[0.1, 0.2, 0.3, 0.4, 0.5]], + documents: [ + ["Document 1", "Document 2", "Document 3", "Document 4", "Document 5"], + ], + metadatas: [[{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }]], + } as any); + + const chroma = new Chroma(new FakeEmbeddings(), args); + chroma.collection = mockCollection; + + const results = await chroma.similaritySearchVectorWithScore( + query, + expectedResultCount + ); + + expect(mockCollection.query).toHaveBeenCalledWith({ + queryEmbeddings: query, + nResults: expectedResultCount, + where: {}, + }); + expect(results).toHaveLength(5); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts new file mode 100644 index 000000000000..2eb210f8fb18 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts @@ -0,0 +1,99 @@ +/* eslint-disable no-process-env */ +import { test, expect } from "@jest/globals"; + +import { ClickHouseStore } from "../clickhouse.js"; +// Import OpenAIEmbeddings if you have a valid OpenAI API key +import { HuggingFaceInferenceEmbeddings } from "../../embeddings/hf.js"; +import { Document } from "@langchain/core/documents"; + +test.skip("ClickHouseStore.fromText", async () => { + const vectorStore = await ClickHouseStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new HuggingFaceInferenceEmbeddings(), + { + host: process.env.CLICKHOUSE_HOST || "localhost", + port: process.env.CLICKHOUSE_PORT || "8443", + username: process.env.CLICKHOUSE_USERNAME || "username", + password: process.env.CLICKHOUSE_PASSWORD || "password", + } + ); + + // Sleep 1 second to ensure that the search occurs after the successful insertion of data. + // eslint-disable-next-line no-promise-executor-return + await new Promise((resolve) => setTimeout(resolve, 1000)); + + const results = await vectorStore.similaritySearch("hello world", 1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + const filteredResults = await vectorStore.similaritySearch("hello world", 1, { + whereStr: "metadata.name = '1'", + }); + expect(filteredResults).toEqual([ + new Document({ + pageContent: "Bye bye", + metadata: { id: 1, name: "1" }, + }), + ]); +}); + +test.skip("ClickHouseStore.fromExistingIndex", async () => { + await ClickHouseStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new HuggingFaceInferenceEmbeddings(), + { + host: process.env.CLICKHOUSE_HOST || "localhost", + port: process.env.CLICKHOUSE_PORT || "8443", + username: process.env.CLICKHOUSE_USERNAME || "username", + password: process.env.CLICKHOUSE_PASSWORD || "password", + table: "test_table", + } + ); + + const vectorStore = await ClickHouseStore.fromExistingIndex( + new HuggingFaceInferenceEmbeddings(), + { + host: process.env.CLICKHOUSE_HOST || "localhost", + port: process.env.CLICKHOUSE_PORT || "8443", + username: process.env.CLICKHOUSE_USERNAME || "username", + password: process.env.CLICKHOUSE_PASSWORD || "password", + table: "test_table", + } + ); + + // Sleep 1 second to ensure that the search occurs after the successful insertion of data. + // eslint-disable-next-line no-promise-executor-return + await new Promise((resolve) => setTimeout(resolve, 1000)); + + const results = await vectorStore.similaritySearch("hello world", 1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + const filteredResults = await vectorStore.similaritySearch("hello world", 1, { + whereStr: "metadata.name = '1'", + }); + expect(filteredResults).toEqual([ + new Document({ + pageContent: "Bye bye", + metadata: { id: 1, name: "1" }, + }), + ]); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts b/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts new file mode 100644 index 000000000000..b985d658d049 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts @@ -0,0 +1,43 @@ +/* eslint-disable no-process-env */ + +import { ConvexHttpClient } from "convex/browser"; + +import { expect, test } from "@jest/globals"; +// eslint-disable-next-line import/no-relative-packages +import { api } from "./convex/convex/_generated/api.js"; + +// To run these tests at least once, follow these steps: +// +// 1. `cd langchain/src/vectorstores/tests/convex` +// 2. `npx convex dev --once` +// 3. `cd ../../../..` +// 3. `cp src/vectorstores/tests/convex/.env.local .env` +// 4. Add your OpenAI key to `.env` (see `.env.example`) +// 5. `yarn test:single src/vectorstores/tests/convex.int.test.ts` +// +// If you're developing these tests, after you've done the above: +// +// In `langchain/src/vectorstores/tests/convex` run `npx convex dev` +// In `langchain` run `yarn test:single src/vectorstores/tests/convex.int.test.ts` + +describe.skip("Convex Vectorstore", () => { + test("Convex ingest, similaritySearch", async () => { + const client = new ConvexHttpClient(process.env.CONVEX_URL as string); + const openAIApiKey = process.env.OPENAI_API_KEY as string; + + await client.mutation(api.lib.reset); + + await client.action(api.lib.ingest, { + openAIApiKey, + texts: ["Hello world", "Bye bye", "hello nice world"], + metadatas: [{ id: 2 }, { id: 1 }, { id: 3 }], + }); + + const metadatas = await client.action(api.lib.similaritySearch, { + openAIApiKey, + query: "hello world", + }); + + expect(metadatas).toEqual([{ id: 2 }, { id: 3 }, { id: 1 }]); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts new file mode 100644 index 000000000000..0f668d5477a8 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts @@ -0,0 +1,39 @@ +/* eslint-disable */ +/** + * Generated `api` utility. + * + * THIS CODE IS AUTOMATICALLY GENERATED. + * + * Generated by convex@1.3.1. + * To regenerate, run `npx convex dev`. + * @module + */ + +import type { + ApiFromModules, + FilterApi, + FunctionReference, +} from "convex/server"; +import type * as langchain_db from "../langchain/db"; +import type * as lib from "../lib"; + +/** + * A utility for referencing Convex functions in your app's API. + * + * Usage: + * ```js + * const myFunctionReference = api.myModule.myFunction; + * ``` + */ +declare const fullApi: ApiFromModules<{ + "langchain/db": typeof langchain_db; + lib: typeof lib; +}>; +export declare const api: FilterApi< + typeof fullApi, + FunctionReference +>; +export declare const internal: FilterApi< + typeof fullApi, + FunctionReference +>; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js new file mode 100644 index 000000000000..3f4ee5427ff9 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js @@ -0,0 +1,23 @@ +/* eslint-disable */ +/** + * Generated `api` utility. + * + * THIS CODE IS AUTOMATICALLY GENERATED. + * + * Generated by convex@1.3.1. + * To regenerate, run `npx convex dev`. + * @module + */ + +import { anyApi } from "convex/server"; + +/** + * A utility for referencing Convex functions in your app's API. + * + * Usage: + * ```js + * const myFunctionReference = api.myModule.myFunction; + * ``` + */ +export const api = anyApi; +export const internal = anyApi; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts new file mode 100644 index 000000000000..68d087faa690 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts @@ -0,0 +1,56 @@ +/* eslint-disable */ +/** + * Generated data model types. + * + * THIS CODE IS AUTOMATICALLY GENERATED. + * + * Generated by convex@1.3.1. + * To regenerate, run `npx convex dev`. + * @module + */ + +import type { DataModelFromSchemaDefinition } from "convex/server"; +import type { DocumentByName, TableNamesInDataModel } from "convex/server"; +import type { GenericId } from "convex/values"; +import schema from "../schema"; + +/** + * The names of all of your Convex tables. + */ +export type TableNames = TableNamesInDataModel; + +/** + * The type of a document stored in Convex. + * + * @typeParam TableName - A string literal type of the table name (like "users"). + */ +export type Doc = DocumentByName< + DataModel, + TableName +>; + +/** + * An identifier for a document in Convex. + * + * Convex documents are uniquely identified by their `Id`, which is accessible + * on the `_id` field. To learn more, see [Document IDs](https://docs.convex.dev/using/document-ids). + * + * Documents can be loaded using `db.get(id)` in query and mutation functions. + * + * IDs are just strings at runtime, but this type can be used to distinguish them from other + * strings when type checking. + * + * @typeParam TableName - A string literal type of the table name (like "users"). + */ +export type Id = GenericId; + +/** + * A type describing your Convex data model. + * + * This type includes information about what tables you have, the type of + * documents stored in those tables, and the indexes defined on them. + * + * This type is used to parameterize methods like `queryGeneric` and + * `mutationGeneric` to make them type-safe. + */ +export type DataModel = DataModelFromSchemaDefinition; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts new file mode 100644 index 000000000000..729d94c3989b --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts @@ -0,0 +1,143 @@ +/* eslint-disable */ +/** + * Generated utilities for implementing server-side Convex query and mutation functions. + * + * THIS CODE IS AUTOMATICALLY GENERATED. + * + * Generated by convex@1.3.1. + * To regenerate, run `npx convex dev`. + * @module + */ + +import { + ActionBuilder, + HttpActionBuilder, + MutationBuilder, + QueryBuilder, + GenericActionCtx, + GenericMutationCtx, + GenericQueryCtx, + GenericDatabaseReader, + GenericDatabaseWriter, +} from "convex/server"; +import type { DataModel } from "./dataModel.js"; + +/** + * Define a query in this Convex app's public API. + * + * This function will be allowed to read your Convex database and will be accessible from the client. + * + * @param func - The query function. It receives a {@link QueryCtx} as its first argument. + * @returns The wrapped query. Include this as an `export` to name it and make it accessible. + */ +export declare const query: QueryBuilder; + +/** + * Define a query that is only accessible from other Convex functions (but not from the client). + * + * This function will be allowed to read from your Convex database. It will not be accessible from the client. + * + * @param func - The query function. It receives a {@link QueryCtx} as its first argument. + * @returns The wrapped query. Include this as an `export` to name it and make it accessible. + */ +export declare const internalQuery: QueryBuilder; + +/** + * Define a mutation in this Convex app's public API. + * + * This function will be allowed to modify your Convex database and will be accessible from the client. + * + * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. + * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. + */ +export declare const mutation: MutationBuilder; + +/** + * Define a mutation that is only accessible from other Convex functions (but not from the client). + * + * This function will be allowed to modify your Convex database. It will not be accessible from the client. + * + * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. + * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. + */ +export declare const internalMutation: MutationBuilder; + +/** + * Define an action in this Convex app's public API. + * + * An action is a function which can execute any JavaScript code, including non-deterministic + * code and code with side-effects, like calling third-party services. + * They can be run in Convex's JavaScript environment or in Node.js using the "use node" directive. + * They can interact with the database indirectly by calling queries and mutations using the {@link ActionCtx}. + * + * @param func - The action. It receives an {@link ActionCtx} as its first argument. + * @returns The wrapped action. Include this as an `export` to name it and make it accessible. + */ +export declare const action: ActionBuilder; + +/** + * Define an action that is only accessible from other Convex functions (but not from the client). + * + * @param func - The function. It receives an {@link ActionCtx} as its first argument. + * @returns The wrapped function. Include this as an `export` to name it and make it accessible. + */ +export declare const internalAction: ActionBuilder; + +/** + * Define an HTTP action. + * + * This function will be used to respond to HTTP requests received by a Convex + * deployment if the requests matches the path and method where this action + * is routed. Be sure to route your action in `convex/http.js`. + * + * @param func - The function. It receives an {@link ActionCtx} as its first argument. + * @returns The wrapped function. Import this function from `convex/http.js` and route it to hook it up. + */ +export declare const httpAction: HttpActionBuilder; + +/** + * A set of services for use within Convex query functions. + * + * The query context is passed as the first argument to any Convex query + * function run on the server. + * + * This differs from the {@link MutationCtx} because all of the services are + * read-only. + */ +export type QueryCtx = GenericQueryCtx; + +/** + * A set of services for use within Convex mutation functions. + * + * The mutation context is passed as the first argument to any Convex mutation + * function run on the server. + */ +export type MutationCtx = GenericMutationCtx; + +/** + * A set of services for use within Convex action functions. + * + * The action context is passed as the first argument to any Convex action + * function run on the server. + */ +export type ActionCtx = GenericActionCtx; + +/** + * An interface to read from the database within Convex query functions. + * + * The two entry points are {@link DatabaseReader.get}, which fetches a single + * document by its {@link Id}, or {@link DatabaseReader.query}, which starts + * building a query. + */ +export type DatabaseReader = GenericDatabaseReader; + +/** + * An interface to read from and write to the database within Convex mutation + * functions. + * + * Convex guarantees that all writes within a single mutation are + * executed atomically, so you never have to worry about partial writes leaving + * your data in an inconsistent state. See [the Convex Guide](https://docs.convex.dev/understanding/convex-fundamentals/functions#atomicity-and-optimistic-concurrency-control) + * for the guarantees Convex provides your functions. + */ +export type DatabaseWriter = GenericDatabaseWriter; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js new file mode 100644 index 000000000000..fa2cb37267d2 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js @@ -0,0 +1,90 @@ +/* eslint-disable */ +/** + * Generated utilities for implementing server-side Convex query and mutation functions. + * + * THIS CODE IS AUTOMATICALLY GENERATED. + * + * Generated by convex@1.3.1. + * To regenerate, run `npx convex dev`. + * @module + */ + +import { + actionGeneric, + httpActionGeneric, + queryGeneric, + mutationGeneric, + internalActionGeneric, + internalMutationGeneric, + internalQueryGeneric, +} from "convex/server"; + +/** + * Define a query in this Convex app's public API. + * + * This function will be allowed to read your Convex database and will be accessible from the client. + * + * @param func - The query function. It receives a {@link QueryCtx} as its first argument. + * @returns The wrapped query. Include this as an `export` to name it and make it accessible. + */ +export const query = queryGeneric; + +/** + * Define a query that is only accessible from other Convex functions (but not from the client). + * + * This function will be allowed to read from your Convex database. It will not be accessible from the client. + * + * @param func - The query function. It receives a {@link QueryCtx} as its first argument. + * @returns The wrapped query. Include this as an `export` to name it and make it accessible. + */ +export const internalQuery = internalQueryGeneric; + +/** + * Define a mutation in this Convex app's public API. + * + * This function will be allowed to modify your Convex database and will be accessible from the client. + * + * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. + * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. + */ +export const mutation = mutationGeneric; + +/** + * Define a mutation that is only accessible from other Convex functions (but not from the client). + * + * This function will be allowed to modify your Convex database. It will not be accessible from the client. + * + * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. + * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. + */ +export const internalMutation = internalMutationGeneric; + +/** + * Define an action in this Convex app's public API. + * + * An action is a function which can execute any JavaScript code, including non-deterministic + * code and code with side-effects, like calling third-party services. + * They can be run in Convex's JavaScript environment or in Node.js using the "use node" directive. + * They can interact with the database indirectly by calling queries and mutations using the {@link ActionCtx}. + * + * @param func - The action. It receives an {@link ActionCtx} as its first argument. + * @returns The wrapped action. Include this as an `export` to name it and make it accessible. + */ +export const action = actionGeneric; + +/** + * Define an action that is only accessible from other Convex functions (but not from the client). + * + * @param func - The function. It receives an {@link ActionCtx} as its first argument. + * @returns The wrapped function. Include this as an `export` to name it and make it accessible. + */ +export const internalAction = internalActionGeneric; + +/** + * Define a Convex HTTP action. + * + * @param func - The function. It receives an {@link ActionCtx} as its first argument, and a `Request` object + * as its second. + * @returns The wrapped endpoint function. Route a URL path to this function in `convex/http.js`. + */ +export const httpAction = httpActionGeneric; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts new file mode 100644 index 000000000000..e09d4ecfe02d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts @@ -0,0 +1 @@ +export * from "../../../../../util/convex.js"; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts new file mode 100644 index 000000000000..a47d55a4dce4 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts @@ -0,0 +1,45 @@ +// eslint-disable-next-line import/no-extraneous-dependencies +import { v } from "convex/values"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { ConvexVectorStore } from "../../../convex.js"; +import { action, mutation } from "./_generated/server.js"; + +export const reset = mutation({ + args: {}, + handler: async (ctx) => { + const documents = await ctx.db.query("documents").collect(); + await Promise.all(documents.map((document) => ctx.db.delete(document._id))); + }, +}); + +export const ingest = action({ + args: { + openAIApiKey: v.string(), + texts: v.array(v.string()), + metadatas: v.array(v.any()), + }, + handler: async (ctx, { openAIApiKey, texts, metadatas }) => { + await ConvexVectorStore.fromTexts( + texts, + metadatas, + new OpenAIEmbeddings({ openAIApiKey }), + { ctx } + ); + }, +}); + +export const similaritySearch = action({ + args: { + openAIApiKey: v.string(), + query: v.string(), + }, + handler: async (ctx, { openAIApiKey, query }) => { + const vectorStore = new ConvexVectorStore( + new OpenAIEmbeddings({ openAIApiKey }), + { ctx } + ); + + const result = await vectorStore.similaritySearch(query, 3); + return result.map(({ metadata }) => metadata); + }, +}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts new file mode 100644 index 000000000000..72f7f31029be --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts @@ -0,0 +1,15 @@ +/* eslint-disable import/no-extraneous-dependencies */ + +import { defineSchema, defineTable } from "convex/server"; +import { v } from "convex/values"; + +export default defineSchema({ + documents: defineTable({ + embedding: v.array(v.number()), + text: v.string(), + metadata: v.any(), + }).vectorIndex("byEmbedding", { + vectorField: "embedding", + dimensions: 1536, + }), +}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/package.json b/libs/langchain-community/src/vectorstores/tests/convex/package.json new file mode 100644 index 000000000000..afd00749387f --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/package.json @@ -0,0 +1,8 @@ +{ + "name": "langchain-convex-tests", + "version": "0.0.1", + "type": "module", + "dependencies": { + "convex": "1.4.1" + } +} diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts new file mode 100644 index 000000000000..4a22d5ec3b94 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -0,0 +1,111 @@ +/* eslint-disable no-process-env */ +import { test, expect } from "@jest/globals"; +import { Client, ClientOptions } from "@elastic/elasticsearch"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; + +import { ElasticVectorSearch } from "../elasticsearch.js"; + +describe("ElasticVectorSearch", () => { + let store: ElasticVectorSearch; + + beforeEach(async () => { + if (!process.env.ELASTIC_URL) { + throw new Error("ELASTIC_URL not set"); + } + + const config: ClientOptions = { + node: process.env.ELASTIC_URL, + }; + if (process.env.ELASTIC_API_KEY) { + config.auth = { + apiKey: process.env.ELASTIC_API_KEY, + }; + } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { + config.auth = { + username: process.env.ELASTIC_USERNAME, + password: process.env.ELASTIC_PASSWORD, + }; + } + const client = new Client(config); + + const indexName = "test_index"; + + const embeddings = new OpenAIEmbeddings(); + store = new ElasticVectorSearch(embeddings, { client, indexName }); + await store.deleteIfExists(); + + expect(store).toBeDefined(); + }); + + test.skip("ElasticVectorSearch integration", async () => { + const createdAt = new Date().getTime(); + + const ids = await store.addDocuments([ + { pageContent: "hello", metadata: { a: createdAt + 1 } }, + { pageContent: "car", metadata: { a: createdAt } }, + { pageContent: "adjective", metadata: { a: createdAt } }, + { pageContent: "hi", metadata: { a: createdAt } }, + ]); + + const results1 = await store.similaritySearch("hello!", 1); + + expect(results1).toHaveLength(1); + expect(results1).toEqual([ + new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }), + ]); + + const results2 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, + }); + + expect(results2).toHaveLength(3); + + const ids2 = await store.addDocuments( + [ + { pageContent: "hello upserted", metadata: { a: createdAt + 1 } }, + { pageContent: "car upserted", metadata: { a: createdAt } }, + { pageContent: "adjective upserted", metadata: { a: createdAt } }, + { pageContent: "hi upserted", metadata: { a: createdAt } }, + ], + { ids } + ); + + expect(ids).toEqual(ids2); + + const results3 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, + }); + + expect(results3).toHaveLength(3); + + console.log(`Upserted:`, results3); + + await store.delete({ ids: ids.slice(2) }); + + const results4 = await store.similaritySearchWithScore("testing!", 3, { + a: createdAt, + }); + + expect(results4).toHaveLength(1); + }); + + test.skip("ElasticVectorSearch integration with more than 10 documents", async () => { + const createdAt = new Date().getTime(); + await store.addDocuments([ + { pageContent: "pretty", metadata: { a: createdAt + 1 } }, + { pageContent: "intelligent", metadata: { a: createdAt } }, + { pageContent: "creative", metadata: { a: createdAt } }, + { pageContent: "courageous", metadata: { a: createdAt } }, + { pageContent: "energetic", metadata: { a: createdAt } }, + { pageContent: "patient", metadata: { a: createdAt } }, + { pageContent: "responsible", metadata: { a: createdAt } }, + { pageContent: "friendly", metadata: { a: createdAt } }, + { pageContent: "confident", metadata: { a: createdAt } }, + { pageContent: "generous", metadata: { a: createdAt } }, + { pageContent: "compassionate", metadata: { a: createdAt } }, + ]); + const results = await store.similaritySearch("*", 11); + expect(results).toHaveLength(11); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts new file mode 100644 index 000000000000..06797b047062 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts @@ -0,0 +1,45 @@ +import { beforeEach, describe, expect, test } from "@jest/globals"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; +import * as os from "node:os"; +import { connect, Table } from "vectordb"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { LanceDB } from "../lancedb.js"; + +describe("LanceDB", () => { + let lanceDBTable: Table; + + beforeEach(async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lcjs-lancedb-")); + const db = await connect(dir); + lanceDBTable = await db.createTable("vectors", [ + { vector: Array(1536), text: "sample", id: 1 }, + ]); + }); + + test("Test fromTexts + addDocuments", async () => { + const embeddings = new OpenAIEmbeddings(); + const vectorStore = await LanceDB.fromTexts( + ["hello bye", "hello world", "bye bye"], + [{ id: 1 }, { id: 2 }, { id: 3 }], + embeddings, + { + table: lanceDBTable, + } + ); + + const results = await vectorStore.similaritySearch("hello bye", 10); + expect(results.length).toBe(4); + + await vectorStore.addDocuments([ + new Document({ + pageContent: "a new world", + metadata: { id: 4 }, + }), + ]); + + const resultsTwo = await vectorStore.similaritySearch("hello bye", 10); + expect(resultsTwo.length).toBe(5); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts b/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts new file mode 100644 index 000000000000..bc328304ddbf --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts @@ -0,0 +1,169 @@ +import { test, expect, afterAll, beforeAll } from "@jest/globals"; +import { ErrorCode, MilvusClient } from "@zilliz/milvus2-sdk-node"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Milvus } from "../milvus.js"; + +let collectionName: string; +let embeddings: OpenAIEmbeddings; +// https://docs.zilliz.com/docs/quick-start-1#create-a-collection +const MILVUS_ADDRESS = ""; +const MILVUS_TOKEN = ""; + +const OPEN_AI_API_KEY = ""; + +beforeAll(async () => { + embeddings = new OpenAIEmbeddings({ + openAIApiKey: OPEN_AI_API_KEY, + }); + collectionName = `test_collection_${Math.random().toString(36).substring(7)}`; +}); + +test.skip("Test Milvus.fromtext with token", async () => { + const texts = [ + `Tortoise: Labyrinth? Labyrinth? Could it Are we in the notorious Little +Harmonic Labyrinth of the dreaded Majotaur?`, + "Achilles: Yiikes! What is that?", + `Tortoise: They say-although I person never believed it myself-that an I + Majotaur has created a tiny labyrinth sits in a pit in the middle of + it, waiting innocent victims to get lost in its fears complexity. + Then, when they wander and dazed into the center, he laughs and + laughs at them-so hard, that he laughs them to death!`, + "Achilles: Oh, no!", + "Tortoise: But it's only a myth. Courage, Achilles.", + ]; + const objA = { A: { B: "some string" } }; + const objB = { A: { B: "some other string" } }; + const metadatas: object[] = [ + { id: 2, other: objA }, + { id: 1, other: objB }, + { id: 3, other: objA }, + { id: 4, other: objB }, + { id: 5, other: objA }, + ]; + const milvus = await Milvus.fromTexts(texts, metadatas, embeddings, { + collectionName, + autoId: false, + primaryField: "id", + clientConfig: { + address: MILVUS_ADDRESS, + token: MILVUS_TOKEN, + }, + }); + const query = "who is achilles?"; + const result = await milvus.similaritySearch(query, 1); + + const resultMetadatas = result.map(({ metadata }) => metadata); + expect(resultMetadatas).toEqual([{ id: 1, other: objB }]); + + const resultTwo = await milvus.similaritySearch(query, 3); + const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); + expect(resultTwoMetadatas).toEqual([ + { id: 1, other: objB }, + { id: 4, other: objB }, + { id: 5, other: objA }, + ]); + + const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); + const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); + expect(resultThreeMetadatas).toEqual([{ id: 1, other: objB }]); +}); + +test.skip("Test Milvus.fromtext", async () => { + const texts = [ + `Tortoise: Labyrinth? Labyrinth? Could it Are we in the notorious Little +Harmonic Labyrinth of the dreaded Majotaur?`, + "Achilles: Yiikes! What is that?", + `Tortoise: They say-although I person never believed it myself-that an I + Majotaur has created a tiny labyrinth sits in a pit in the middle of + it, waiting innocent victims to get lost in its fears complexity. + Then, when they wander and dazed into the center, he laughs and + laughs at them-so hard, that he laughs them to death!`, + "Achilles: Oh, no!", + "Tortoise: But it's only a myth. Courage, Achilles.", + ]; + const objA = { A: { B: "some string" } }; + const objB = { A: { B: "some other string" } }; + const metadatas: object[] = [ + { id: 2, other: objA }, + { id: 1, other: objB }, + { id: 3, other: objA }, + { id: 4, other: objB }, + { id: 5, other: objA }, + ]; + const milvus = await Milvus.fromTexts(texts, metadatas, embeddings, { + collectionName, + url: MILVUS_ADDRESS, + }); + + const query = "who is achilles?"; + const result = await milvus.similaritySearch(query, 1); + const resultMetadatas = result.map(({ metadata }) => metadata); + expect(resultMetadatas).toEqual([{ id: 1, other: objB }]); + + const resultTwo = await milvus.similaritySearch(query, 3); + const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); + expect(resultTwoMetadatas).toEqual([ + { id: 1, other: objB }, + { id: 4, other: objB }, + { id: 5, other: objA }, + ]); + + const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); + const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); + expect(resultThreeMetadatas).toEqual([{ id: 1, other: objB }]); +}); + +test.skip("Test Milvus.fromExistingCollection", async () => { + const milvus = await Milvus.fromExistingCollection(embeddings, { + collectionName, + }); + + const query = "who is achilles?"; + const result = await milvus.similaritySearch(query, 1); + const resultMetadatas = result.map(({ metadata }) => metadata); + expect(resultMetadatas.length).toBe(1); + expect(resultMetadatas[0].id).toEqual(1); + + const resultTwo = await milvus.similaritySearch(query, 3); + const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); + expect(resultTwoMetadatas.length).toBe(3); + expect(resultTwoMetadatas[0].id).toEqual(1); + expect(resultTwoMetadatas[1].id).toEqual(4); + expect(resultTwoMetadatas[2].id).toEqual(5); + + const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); + const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); + expect(resultThreeMetadatas.length).toBe(1); + expect(resultThreeMetadatas[0].id).toEqual(1); +}); + +test.skip("Test Milvus.deleteData", async () => { + const milvus = await Milvus.fromExistingCollection(embeddings, { + collectionName, + }); + + const query = "who is achilles?"; + const result = await milvus.similaritySearch(query, 1); + const resultMetadatas = result.map(({ metadata }) => metadata); + const primaryId = resultMetadatas[0].langchain_primaryid; + expect(resultMetadatas.length).toBe(1); + expect(resultMetadatas[0].id).toEqual(1); + + await milvus.delete({ filter: `langchain_primaryid in [${primaryId}]` }); + + const resultTwo = await milvus.similaritySearch(query, 1); + const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); + expect(resultTwoMetadatas[0].id).not.toEqual(1); +}); + +afterAll(async () => { + // eslint-disable-next-line no-process-env + if (!process.env.MILVUS_URL) return; + // eslint-disable-next-line no-process-env + const client = new MilvusClient(process.env.MILVUS_URL as string); + const dropRes = await client.dropCollection({ + collection_name: collectionName, + }); + // console.log("Drop collection response: ", dropRes) + expect(dropRes.error_code).toBe(ErrorCode.SUCCESS); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts new file mode 100644 index 000000000000..77db1acec374 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts @@ -0,0 +1,90 @@ +/* eslint-disable no-process-env */ +import { test, expect } from "@jest/globals"; + +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { MyScaleStore } from "../myscale.js"; + +test.skip("MyScaleStore.fromText", async () => { + const vectorStore = await MyScaleStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new OpenAIEmbeddings(), + { + host: process.env.MYSCALE_HOST || "localhost", + port: process.env.MYSCALE_PORT || "8443", + username: process.env.MYSCALE_USERNAME || "username", + password: process.env.MYSCALE_PASSWORD || "password", + } + ); + + const results = await vectorStore.similaritySearch("hello world", 1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + const filteredResults = await vectorStore.similaritySearch("hello world", 1, { + whereStr: "metadata.name = '1'", + }); + expect(filteredResults).toEqual([ + new Document({ + pageContent: "Bye bye", + metadata: { id: 1, name: "1" }, + }), + ]); +}); + +test.skip("MyScaleStore.fromExistingIndex", async () => { + await MyScaleStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new OpenAIEmbeddings(), + { + host: process.env.MYSCALE_HOST || "localhost", + port: process.env.MYSCALE_PORT || "8443", + username: process.env.MYSCALE_USERNAME || "username", + password: process.env.MYSCALE_PASSWORD || "password", + table: "test_table", + } + ); + + const vectorStore = await MyScaleStore.fromExistingIndex( + new OpenAIEmbeddings(), + { + host: process.env.MYSCALE_HOST || "localhost", + port: process.env.MYSCALE_PORT || "8443", + username: process.env.MYSCALE_USERNAME || "username", + password: process.env.MYSCALE_PASSWORD || "password", + table: "test_table", + } + ); + + const results = await vectorStore.similaritySearch("hello world", 1); + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + const filteredResults = await vectorStore.similaritySearch("hello world", 1, { + whereStr: "metadata.name = '1'", + }); + expect(filteredResults).toEqual([ + new Document({ + pageContent: "Bye bye", + metadata: { id: 1, name: "1" }, + }), + ]); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts new file mode 100644 index 000000000000..aae909421e7e --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts @@ -0,0 +1,471 @@ +/* eslint-disable no-process-env */ +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../util/testing.js"; +import { Neo4jVectorStore } from "../neo4j_vector.js"; + +const OS_TOKEN_COUNT = 1536; + +const texts = ["foo", "bar", "baz"]; + +class FakeEmbeddingsWithOsDimension extends FakeEmbeddings { + async embedDocuments(documents: string[]): Promise { + return Promise.resolve( + documents.map((_, i) => + Array(OS_TOKEN_COUNT - 1) + .fill(1.0) + .concat([i + 1.0]) + ) + ); + } + + async embedQuery(text: string): Promise { + const index = texts.indexOf(text); + + if (index !== -1) { + return Array(OS_TOKEN_COUNT - 1) + .fill(1.0) + .concat([index + 1]); + } else { + throw new Error(`Text '${text}' not found in the 'texts' array.`); + } + } +} + +async function dropVectorIndexes(store: Neo4jVectorStore) { + const allIndexes = await store.query(` + SHOW INDEXES YIELD name, type + WHERE type = "VECTOR" + RETURN name + `); + + if (allIndexes) { + for (const index of allIndexes) { + await store.query(`DROP INDEX ${index.name}`); + } + } +} + +test.skip("Test fromTexts", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + preDeleteCollection: true, + } + ); + + const output = await neo4jVectorStore.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "foo", + metadata: {}, + }), + new Document({ + pageContent: "bar", + metadata: {}, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); +}); + +test.skip("Test fromTexts Hybrid", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + preDeleteCollection: true, + searchType: "hybrid", + } + ); + + const output = await neo4jVectorStore.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "foo", + metadata: {}, + }), + new Document({ + pageContent: "bar", + metadata: {}, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); +}); + +test.skip("Test fromExistingIndex", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + preDeleteCollection: true, + } + ); + + const existingIndex = await Neo4jVectorStore.fromExistingIndex(embeddings, { + url, + username, + password, + indexName: "vector", + }); + + const output = await existingIndex.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "foo", + metadata: {}, + }), + new Document({ + pageContent: "bar", + metadata: {}, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); + await existingIndex.close(); +}); + +test.skip("Test fromExistingIndex Hybrid", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + keywordIndexName: "keyword", + searchType: "hybrid", + preDeleteCollection: true, + } + ); + + const existingIndex = await Neo4jVectorStore.fromExistingIndex(embeddings, { + url, + username, + password, + indexName: "vector", + keywordIndexName: "keyword", + searchType: "hybrid", + }); + + const output = await existingIndex.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "foo", + metadata: {}, + }), + new Document({ + pageContent: "bar", + metadata: {}, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); + await existingIndex.close(); +}); + +test.skip("Test retrievalQuery", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + preDeleteCollection: true, + retrievalQuery: + "RETURN node.text AS text, score, {foo:'bar'} AS metadata", + } + ); + + const output = await neo4jVectorStore.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "foo", + metadata: { foo: "bar" }, + }), + new Document({ + pageContent: "bar", + metadata: { foo: "bar" }, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); +}); + +test.skip("Test fromExistingGraph", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + preDeleteCollection: true, + } + ); + + await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); + + await neo4jVectorStore.query( + "CREATE (:Test {name:'Foo'}), (:Test {name:'Bar', foo:'bar'})" + ); + + const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { + url, + username, + password, + indexName: "vector1", + nodeLabel: "Test", + textNodeProperties: ["name"], + embeddingNodeProperty: "embedding", + }); + + const output = await existingGraph.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "\nname: Foo", + metadata: {}, + }), + new Document({ + pageContent: "\nname: Bar", + metadata: { foo: "bar" }, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); + await existingGraph.close(); +}); + +test.skip("Test fromExistingGraph multiple properties", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + preDeleteCollection: true, + } + ); + + await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); + + await neo4jVectorStore.query( + "CREATE (:Test {name:'Foo', name2:'Fooz'}), (:Test {name:'Bar', foo:'bar'})" + ); + + const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { + url, + username, + password, + indexName: "vector1", + nodeLabel: "Test", + textNodeProperties: ["name", "name2"], + embeddingNodeProperty: "embedding", + }); + + const output = await existingGraph.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "\nname: Foo\nname2: Fooz", + metadata: {}, + }), + new Document({ + pageContent: "\nname: Bar\nname2: ", + metadata: { foo: "bar" }, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); + await existingGraph.close(); +}); + +test.skip("Test fromExistingGraph multiple properties hybrid", async () => { + const url = process.env.NEO4J_URI as string; + const username = process.env.NEO4J_USERNAME as string; + const password = process.env.NEO4J_PASSWORD as string; + + expect(url).toBeDefined(); + expect(username).toBeDefined(); + expect(password).toBeDefined(); + + const embeddings = new FakeEmbeddingsWithOsDimension(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const metadatas: any[] = []; + + const neo4jVectorStore = await Neo4jVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + url, + username, + password, + indexName: "vector", + preDeleteCollection: true, + } + ); + + await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); + + await neo4jVectorStore.query( + "CREATE (:Test {name:'Foo', name2:'Fooz'}), (:Test {name:'Bar', foo:'bar'})" + ); + + const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { + url, + username, + password, + indexName: "vector1", + nodeLabel: "Test", + textNodeProperties: ["name", "name2"], + embeddingNodeProperty: "embedding", + searchType: "hybrid", + }); + + const output = await existingGraph.similaritySearch("foo", 2); + + const expectedResult = [ + new Document({ + pageContent: "\nname: Foo\nname2: Fooz", + metadata: {}, + }), + new Document({ + pageContent: "\nname: Bar\nname2: ", + metadata: { foo: "bar" }, + }), + ]; + + expect(output).toStrictEqual(expectedResult); + await dropVectorIndexes(neo4jVectorStore); + await neo4jVectorStore.close(); + await existingGraph.close(); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts new file mode 100644 index 000000000000..f3a497dc5b9d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts @@ -0,0 +1,44 @@ +/* eslint-disable no-process-env */ +import { test, expect } from "@jest/globals"; +import { Client } from "@opensearch-project/opensearch"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { OpenSearchVectorStore } from "../opensearch.js"; + +test.skip("OpenSearchVectorStore integration", async () => { + if (!process.env.OPENSEARCH_URL) { + throw new Error("OPENSEARCH_URL not set"); + } + + const client = new Client({ + nodes: [process.env.OPENSEARCH_URL], + }); + + const indexName = "test_index"; + + const embeddings = new OpenAIEmbeddings(); + const store = new OpenSearchVectorStore(embeddings, { client, indexName }); + await store.deleteIfExists(); + + expect(store).toBeDefined(); + + await store.addDocuments([ + { pageContent: "hello", metadata: { a: 2 } }, + { pageContent: "car", metadata: { a: 1 } }, + { pageContent: "adjective", metadata: { a: 1 } }, + { pageContent: "hi", metadata: { a: 1 } }, + ]); + + const results1 = await store.similaritySearch("hello!", 1); + + expect(results1).toHaveLength(1); + expect(results1).toEqual([ + new Document({ metadata: { a: 2 }, pageContent: "hello" }), + ]); + + const results2 = await store.similaritySearchWithScore("hello!", 1, { + a: 1, + }); + + expect(results2).toHaveLength(1); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts new file mode 100644 index 000000000000..70d862f96dc4 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts @@ -0,0 +1,50 @@ +/* eslint-disable no-process-env */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ +import { describe, expect, test } from "@jest/globals"; +import { QdrantClient } from "@qdrant/js-client-rest"; +import { faker } from "@faker-js/faker"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { QdrantVectorStore } from "../qdrant.js"; +import { OllamaEmbeddings } from "../../embeddings/ollama.js"; + +describe.skip("QdrantVectorStore testcase", () => { + test("base usage", async () => { + const embeddings = new OpenAIEmbeddings({}); + + const qdrantVectorStore = new QdrantVectorStore(embeddings, { + url: process.env.QDRANT_URL || "http://localhost:6333", + collectionName: process.env.QDRANT_COLLECTION || "documents", + }); + + const pageContent = faker.lorem.sentence(5); + + await qdrantVectorStore.addDocuments([{ pageContent, metadata: {} }]); + + const results = await qdrantVectorStore.similaritySearch(pageContent, 1); + + expect(results[0]).toEqual(new Document({ metadata: {}, pageContent })); + }); + + test("passing client directly with a local model that creates embeddings with a different number of dimensions", async () => { + const embeddings = new OllamaEmbeddings({}); + + const pageContent = faker.lorem.sentence(5); + + const qdrantVectorStore = await QdrantVectorStore.fromDocuments( + [{ pageContent, metadata: {} }], + embeddings, + { + collectionName: "different_dimensions", + client: new QdrantClient({ + url: process.env.QDRANT_URL, + apiKey: process.env.QDRANT_API_KEY, + }), + } + ); + + const results = await qdrantVectorStore.similaritySearch(pageContent, 1); + + expect(results[0]).toEqual(new Document({ metadata: {}, pageContent })); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts new file mode 100644 index 000000000000..a7402e023051 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts @@ -0,0 +1,33 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { jest, test, expect } from "@jest/globals"; +import { FakeEmbeddings } from "../../util/testing.js"; + +import { QdrantVectorStore } from "../qdrant.js"; + +test("QdrantVectorStore works", async () => { + const client = { + upsert: jest.fn(), + search: jest.fn().mockResolvedValue([]), + getCollections: jest.fn().mockResolvedValue({ collections: [] }), + createCollection: jest.fn(), + }; + + const embeddings = new FakeEmbeddings(); + + const store = new QdrantVectorStore(embeddings, { client: client as any }); + + expect(store).toBeDefined(); + + await store.addDocuments([ + { + pageContent: "hello", + metadata: {}, + }, + ]); + + expect(client.upsert).toHaveBeenCalledTimes(1); + + const results = await store.similaritySearch("hello", 1); + + expect(results).toHaveLength(0); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts new file mode 100644 index 000000000000..f44f52b8d273 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts @@ -0,0 +1,71 @@ +/* eslint-disable no-process-env */ +/* eslint-disable no-promise-executor-return */ + +import { RedisClientType, createClient } from "redis"; +import { v4 as uuidv4 } from "uuid"; +import { test, expect } from "@jest/globals"; +import { faker } from "@faker-js/faker"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { RedisVectorStore } from "../redis.js"; + +describe("RedisVectorStore", () => { + let vectorStore: RedisVectorStore; + + beforeEach(async () => { + const client = createClient({ url: process.env.REDIS_URL }); + await client.connect(); + + vectorStore = new RedisVectorStore(new OpenAIEmbeddings(), { + redisClient: client as RedisClientType, + indexName: "test-index", + keyPrefix: "test:", + }); + }); + + test.skip("auto-generated ids", async () => { + const pageContent = faker.lorem.sentence(5); + + await vectorStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]); + + const results = await vectorStore.similaritySearch(pageContent, 1); + + expect(results).toEqual([ + new Document({ metadata: { foo: "bar" }, pageContent }), + ]); + }); + + test.skip("user-provided keys", async () => { + const documentKey = `test:${uuidv4()}`; + const pageContent = faker.lorem.sentence(5); + + await vectorStore.addDocuments([{ pageContent, metadata: {} }], { + keys: [documentKey], + }); + + const results = await vectorStore.similaritySearch(pageContent, 1); + + expect(results).toEqual([new Document({ metadata: {}, pageContent })]); + }); + + test.skip("metadata filtering", async () => { + await vectorStore.dropIndex(); + const pageContent = faker.lorem.sentence(5); + const uuid = uuidv4(); + + await vectorStore.addDocuments([ + { pageContent, metadata: { foo: "bar" } }, + { pageContent, metadata: { foo: uuid } }, + { pageContent, metadata: { foo: "qux" } }, + ]); + + // If the filter wasn't working, we'd get all 3 documents back + const results = await vectorStore.similaritySearch(pageContent, 3, [ + `${uuid}`, + ]); + + expect(results).toEqual([ + new Document({ metadata: { foo: uuid }, pageContent }), + ]); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/redis.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.test.ts new file mode 100644 index 000000000000..4378dc9c2293 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/redis.test.ts @@ -0,0 +1,222 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { jest, test, expect, describe } from "@jest/globals"; +import { FakeEmbeddings } from "../../util/testing.js"; + +import { RedisVectorStore } from "../redis.js"; + +const createRedisClientMockup = () => { + const hSetMock = jest.fn(); + + return { + ft: { + info: jest.fn().mockResolvedValue({ + numDocs: 0, + }), + create: jest.fn(), + search: jest.fn().mockResolvedValue({ + total: 0, + documents: [], + }), + dropIndex: jest.fn(), + }, + hSet: hSetMock, + multi: jest.fn().mockImplementation(() => ({ + exec: jest.fn(), + hSet: hSetMock, + })), + }; +}; + +test("RedisVectorStore with external keys", async () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + }); + + expect(store).toBeDefined(); + + await store.addDocuments( + [ + { + pageContent: "hello", + metadata: { + a: 1, + b: { nested: [1, { a: 4 }] }, + }, + }, + ], + { keys: ["id1"] } + ); + + expect(client.hSet).toHaveBeenCalledTimes(1); + expect(client.hSet).toHaveBeenCalledWith("id1", { + content_vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), + content: "hello", + metadata: JSON.stringify({ a: 1, b: { nested: [1, { a: 4 }] } }), + }); + + const results = await store.similaritySearch("goodbye", 1); + + expect(results).toHaveLength(0); +}); + +test("RedisVectorStore with generated keys", async () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + }); + + expect(store).toBeDefined(); + + await store.addDocuments([{ pageContent: "hello", metadata: { a: 1 } }]); + + expect(client.hSet).toHaveBeenCalledTimes(1); + + const results = await store.similaritySearch("goodbye", 1); + + expect(results).toHaveLength(0); +}); + +test("RedisVectorStore with filters", async () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + }); + + expect(store).toBeDefined(); + + await store.similaritySearch("hello", 1, ["a"]); + + expect(client.ft.search).toHaveBeenCalledWith( + "documents", + "@metadata:(a) => [KNN 1 @content_vector $vector AS vector_score]", + { + PARAMS: { + vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), + }, + RETURN: ["metadata", "content", "vector_score"], + SORTBY: "vector_score", + DIALECT: 2, + LIMIT: { + from: 0, + size: 1, + }, + } + ); +}); + +describe("RedisVectorStore dropIndex", () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + }); + + test("without deleteDocuments param provided", async () => { + await store.dropIndex(); + + expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", undefined); + }); + + test("with deleteDocuments as false", async () => { + await store.dropIndex(false); + + expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", undefined); + }); + + test("with deleteDocument as true", async () => { + await store.dropIndex(true); + + expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", { + DD: true, + }); + }); + + test("through delete convenience method", async () => { + await store.delete({ deleteAll: true }); + + expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", { + DD: true, + }); + }); +}); + +describe("RedisVectorStore createIndex when index does not exist", () => { + test("calls ft.create with default create options", async () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + }); + store.checkIndexExists = jest.fn().mockResolvedValue(false); + + await store.createIndex(); + + expect(client.ft.create).toHaveBeenCalledWith( + "documents", + expect.any(Object), + { + ON: "HASH", + PREFIX: "doc:documents:", + } + ); + }); + + test("calls ft.create with custom options", async () => { + const client = createRedisClientMockup(); + const embeddings = new FakeEmbeddings(); + const store = new RedisVectorStore(embeddings, { + redisClient: client as any, + indexName: "documents", + createIndexOptions: { + ON: "JSON", + FILTER: '@indexName == "documents"', + SCORE: 0.5, + MAXTEXTFIELDS: true, + TEMPORARY: 1000, + NOOFFSETS: true, + NOHL: true, + NOFIELDS: true, + NOFREQS: true, + SKIPINITIALSCAN: true, + STOPWORDS: ["a", "b"], + LANGUAGE: "German", + }, + }); + store.checkIndexExists = jest.fn().mockResolvedValue(false); + + await store.createIndex(); + + expect(client.ft.create).toHaveBeenCalledWith( + "documents", + expect.any(Object), + { + ON: "JSON", + PREFIX: "doc:documents:", + FILTER: '@indexName == "documents"', + SCORE: 0.5, + MAXTEXTFIELDS: true, + TEMPORARY: 1000, + NOOFFSETS: true, + NOHL: true, + NOFIELDS: true, + NOFREQS: true, + SKIPINITIALSCAN: true, + STOPWORDS: ["a", "b"], + LANGUAGE: "German", + } + ); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts new file mode 100644 index 000000000000..ec8ae55c3337 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts @@ -0,0 +1,175 @@ +/* eslint-disable no-process-env */ +/* eslint-disable import/no-extraneous-dependencies */ +import { test, expect } from "@jest/globals"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { SingleStoreVectorStore } from "../singlestore.js"; +import { Document } from "@langchain/core/documents"; + +test.skip("SingleStoreVectorStore", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + + const vectorStore = await SingleStoreVectorStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new OpenAIEmbeddings(), + { + connectionOptions: { + host: process.env.SINGLESTORE_HOST, + port: Number(process.env.SINGLESTORE_PORT), + user: process.env.SINGLESTORE_USERNAME, + password: process.env.SINGLESTORE_PASSWORD, + database: process.env.SINGLESTORE_DATABASE, + }, + contentColumnName: "cont", + metadataColumnName: "met", + vectorColumnName: "vec", + } + ); + expect(vectorStore).toBeDefined(); + + const results = await vectorStore.similaritySearch("hello world", 1); + + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + await vectorStore.addDocuments([ + new Document({ + pageContent: "Green forest", + metadata: { id: 4, name: "4" }, + }), + new Document({ + pageContent: "Green field", + metadata: { id: 5, name: "5" }, + }), + ]); + + const results2 = await vectorStore.similaritySearch("forest", 1); + + expect(results2).toEqual([ + new Document({ + pageContent: "Green forest", + metadata: { id: 4, name: "4" }, + }), + ]); + + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore euclidean_distance", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + + const vectorStore = await SingleStoreVectorStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2" }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new OpenAIEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "euclidean_distance_test", + distanceMetric: "EUCLIDEAN_DISTANCE", + } + ); + expect(vectorStore).toBeDefined(); + + const results = await vectorStore.similaritySearch("hello world", 1); + + expect(results).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2" }, + }), + ]); + + await vectorStore.end(); +}); + +test.skip("SingleStoreVectorStore filtering", async () => { + expect(process.env.SINGLESTORE_HOST).toBeDefined(); + expect(process.env.SINGLESTORE_PORT).toBeDefined(); + expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); + expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); + expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); + + const vectorStore = await SingleStoreVectorStore.fromTexts( + ["Hello world", "Bye bye", "hello nice world"], + [ + { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, + { id: 1, name: "1" }, + { id: 3, name: "3" }, + ], + new OpenAIEmbeddings(), + { + connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, + tableName: "filtering_test", + } + ); + expect(vectorStore).toBeDefined(); + + const results1 = await vectorStore.similaritySearch("hello world", 1, { + id: 3, + }); + + expect(results1).toEqual([ + new Document({ + pageContent: "hello nice world", + metadata: { id: 3, name: "3" }, + }), + ]); + + const results2 = await vectorStore.similaritySearch("hello nice world", 1, { + name: "2", + }); + expect(results2).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, + }), + ]); + + const results3 = await vectorStore.similaritySearch("hello nice world", 1, { + sub: { sub2: { idx: 1 } }, + }); + expect(results3).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, + }), + ]); + + const results4 = await vectorStore.similaritySearch("hello nice world", 1, { + name: "2", + id: 2, + }); + expect(results4).toEqual([ + new Document({ + pageContent: "Hello world", + metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, + }), + ]); + + const results5 = await vectorStore.similaritySearch("hello nice world", 1, { + name: "3", + sub: { sub2: { idx: 1 } }, + }); + expect(results5).toEqual([]); + await vectorStore.end(); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/tigris.test.ts b/libs/langchain-community/src/vectorstores/tests/tigris.test.ts new file mode 100644 index 000000000000..e03943d52312 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/tigris.test.ts @@ -0,0 +1,76 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { jest, test, expect } from "@jest/globals"; +import { FakeEmbeddings } from "../../util/testing.js"; + +import { TigrisVectorStore } from "../tigris.js"; + +test("TigrisVectorStore with external ids", async () => { + const client = { + addDocumentsWithVectors: jest.fn(), + similaritySearchVectorWithScore: jest + .fn() + .mockImplementation(async () => []), + }; + const embeddings = new FakeEmbeddings(); + + const store = new TigrisVectorStore(embeddings, { + index: client as any, + }); + + expect(store).toBeDefined(); + + await store.addDocuments( + [ + { + pageContent: "hello", + metadata: { + a: 1, + b: { nested: [1, { a: 4 }] }, + }, + }, + ], + ["id1"] + ); + + expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1); + + expect(client.addDocumentsWithVectors).toHaveBeenCalledWith({ + ids: ["id1"], + embeddings: [[0.1, 0.2, 0.3, 0.4]], + documents: [ + { + content: "hello", + metadata: { + a: 1, + b: { nested: [1, { a: 4 }] }, + }, + }, + ], + }); + + const results = await store.similaritySearch("hello", 1); + + expect(results).toHaveLength(0); +}); + +test("TigrisVectorStore with generated ids", async () => { + const client = { + addDocumentsWithVectors: jest.fn(), + similaritySearchVectorWithScore: jest + .fn() + .mockImplementation(async () => []), + }; + const embeddings = new FakeEmbeddings(); + + const store = new TigrisVectorStore(embeddings, { index: client as any }); + + expect(store).toBeDefined(); + + await store.addDocuments([{ pageContent: "hello", metadata: { a: 1 } }]); + + expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1); + + const results = await store.similaritySearch("hello", 1); + + expect(results).toHaveLength(0); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts b/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts new file mode 100644 index 000000000000..1a59fde00409 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts @@ -0,0 +1,51 @@ +import { expect, test } from "@jest/globals"; +import { DataSourceOptions } from "typeorm"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { TypeORMVectorStore } from "../typeorm.js"; + +test.skip("Test embeddings creation", async () => { + const args = { + postgresConnectionOptions: { + type: "postgres", + host: "localhost", + port: 5432, + username: "myuser", + password: "ChangeMe", + database: "api", + } as DataSourceOptions, + tableName: "testlangchain", + }; + + const typeormVectorStore = await TypeORMVectorStore.fromDataSource( + new OpenAIEmbeddings(), + args + ); + + expect(typeormVectorStore).toBeDefined(); + + const docHello = { + pageContent: "hello", + metadata: { a: 1 }, + }; + const docCat = { + pageContent: "Cat drinks milk", + metadata: { a: 2 }, + }; + const docHi = { pageContent: "hi", metadata: { a: 1 } }; + + await typeormVectorStore.addDocuments([docHello, docHi, docCat]); + + const results = await typeormVectorStore.similaritySearch("hello", 2, { + a: 2, + }); + + expect(results).toHaveLength(1); + + expect(results[0].pageContent).toEqual(docCat.pageContent); + + await typeormVectorStore.appDataSource.query( + 'TRUNCATE TABLE "testlangchain"' + ); + + await typeormVectorStore.appDataSource.destroy(); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/typesense.test.ts b/libs/langchain-community/src/vectorstores/tests/typesense.test.ts new file mode 100644 index 000000000000..c3eba14f7d49 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/typesense.test.ts @@ -0,0 +1,127 @@ +import { Client } from "typesense"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../util/testing.js"; +import { Typesense } from "../typesense.js"; + +test("documentsToTypesenseRecords should return the correct typesense records", async () => { + const embeddings = new FakeEmbeddings(); + const vectorstore = new Typesense(embeddings, { + schemaName: "test", + typesenseClient: {} as unknown as Client, + columnNames: { + vector: "vec", + pageContent: "text", + metadataColumnNames: ["foo", "bar", "baz"], + }, + }); + + const documents: Document[] = [ + { + metadata: { + id: "1", + foo: "fooo", + bar: "barr", + baz: "bazz", + }, + pageContent: "hello world", + }, + { + metadata: { + id: "2", + foo: "foooo", + bar: "barrr", + baz: "bazzz", + }, + pageContent: "hello world 2", + }, + ]; + + const expected = [ + { + text: "hello world", + foo: "fooo", + bar: "barr", + baz: "bazz", + vec: await embeddings.embedQuery("hello world"), + }, + { + text: "hello world 2", + foo: "foooo", + bar: "barrr", + baz: "bazzz", + vec: await embeddings.embedQuery("hello world 2"), + }, + ]; + + expect( + await vectorstore._documentsToTypesenseRecords( + documents, + await embeddings.embedDocuments(["hello world", "hello world 2"]) + ) + ).toEqual(expected); +}); + +test("typesenseRecordsToDocuments should return the correct langchain documents", async () => { + const embeddings = new FakeEmbeddings(); + const vectorstore = new Typesense(embeddings, { + schemaName: "test", + typesenseClient: {} as unknown as Client, + columnNames: { + vector: "vec", + pageContent: "text", + metadataColumnNames: ["foo", "bar", "baz"], + }, + }); + + const typesenseRecords = [ + { + document: { + text: "hello world", + foo: "fooo", + bar: "barr", + baz: "bazz", + vec: await embeddings.embedQuery("hello world"), + }, + vector_distance: 0.2342145, + }, + { + document: { + text: "hello world 2", + foo: "foooo", + bar: "barrr", + baz: "bazzz", + vec: await embeddings.embedQuery("hello world 2"), + }, + vector_distance: 0.4521355, + }, + ]; + + const expected = [ + [ + { + metadata: { + foo: "fooo", + bar: "barr", + baz: "bazz", + }, + pageContent: "hello world", + }, + 0.2342145, + ], + [ + { + metadata: { + foo: "foooo", + bar: "barrr", + baz: "bazzz", + }, + pageContent: "hello world 2", + }, + 0.4521355, + ], + ]; + + expect(vectorstore._typesenseRecordsToDocuments(typesenseRecords)).toEqual( + expected + ); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts new file mode 100644 index 000000000000..7ca5c11ac67d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts @@ -0,0 +1,225 @@ +/* eslint-disable @typescript-eslint/no-unused-vars */ +/* eslint-disable no-process-env */ +import fs from "fs"; +import { expect, beforeAll } from "@jest/globals"; +import { insecureHash } from "@langchain/core/utils/hash"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../util/testing.js"; +import { VectaraFile, VectaraLibArgs, VectaraStore } from "../vectara.js"; + +const getDocs = (): Document[] => { + // Some text from Lord of the Rings + const englishOne = `It all depends on what you want. You can trust us to stick to you through thick and thin to the + bitter end. And you can trust us to keep any secret of yours - closer than you keep it yourself. + But you cannot trust us to let you face trouble alone, and go off without a word. We are your + friends, Frodo. Anyway: there it is. We know most of what Gandalf has told you. We know a good + deal about the Ring. We are horribly afraid - but we are coming with you; or following you + like hounds.`; + const englishTwo = `Sam lay back, and stared with open mouth, and for a moment, between bewilderment and great joy, + he could not answer. At last he gasped: “Gandalf! I thought you were dead! But then I thought I + was dead myself. Is everything sad going to come untrue? What's happened to the world?`; + const frenchOne = `Par exemple, sur la planète Terre, l'homme a toujours supposé qu'il était plus intelligent que les dauphins + parce qu'il avait accompli tant de choses - la roue, New York, les guerres, etc. passer du + bon temps. Mais à l'inverse, les dauphins ont toujours cru qu'ils étaient bien plus + intelligents que l'homme, pour les mêmes raisons précisément.`; + + const documents = [ + new Document({ + pageContent: englishOne, + metadata: { + document_id: insecureHash(englishOne), // Generate a hashcode for document id based on the text + title: "Lord of the Rings", + author: "Tolkien", + genre: "fiction", + lang: "eng", + }, + }), + new Document({ + pageContent: englishTwo, + metadata: { + document_id: insecureHash(englishTwo), // Generate a hashcode for document id based on the text + title: "Lord of the Rings", + author: "Tolkien", + genre: "fiction", + lang: "eng", + }, + }), + new Document({ + pageContent: frenchOne, + metadata: { + document_id: insecureHash(frenchOne), // Generate a hashcode for document id based on the text + title: "The hitchhiker's guide to the galaxy", + author: "Douglas Adams", + genre: "fiction", + lang: "fra", + }, + }), + ]; + return documents; +}; + +let corpusId: number[] = []; +const envValue = process.env.VECTARA_CORPUS_ID; +if (envValue) { + corpusId = envValue.split(",").map((id) => { + const num = Number(id); + if (Number.isNaN(num)) corpusId = [0]; + return num; + }); + + if (corpusId.length === 0) corpusId = [0]; +} else { + corpusId = [0]; +} + +describe("VectaraStore", () => { + ["VECTARA_CUSTOMER_ID", "VECTARA_CORPUS_ID", "VECTARA_API_KEY"].forEach( + (envVar) => { + if (!process.env[envVar]) { + throw new Error(`${envVar} not set`); + } + } + ); + + describe("fromTexts", () => { + const args: VectaraLibArgs = { + customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, + corpusId, + apiKey: process.env.VECTARA_API_KEY || "", + }; + + test.skip("with fakeEmbeddings doesn't throw error", () => { + expect(() => + VectaraStore.fromTexts([], [], new FakeEmbeddings(), args) + ).not.toThrow(); + }); + }); + + describe("fromDocuments", () => { + const args: VectaraLibArgs = { + customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, + corpusId, + apiKey: process.env.VECTARA_API_KEY || "", + }; + + test.skip("with fakeEmbeddings doesn't throw error", async () => { + await expect( + VectaraStore.fromDocuments(getDocs(), new FakeEmbeddings(), args) + ).resolves.toBeDefined(); + }); + }); + + describe("access operations", () => { + let store: VectaraStore; + let doc_ids: string[] = []; + + beforeAll(async () => { + store = new VectaraStore({ + customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, + corpusId, + apiKey: process.env.VECTARA_API_KEY || "", + }); + doc_ids = await store.addDocuments(getDocs()); + }); + + test.skip("similaritySearchWithScore", async () => { + const resultsWithScore = await store.similaritySearchWithScore( + "What did Sam do?", + 10, // Number of results needed + { lambda: 0.025 } + ); + expect(resultsWithScore.length).toBeGreaterThan(0); + expect(resultsWithScore[0][0].pageContent.length).toBeGreaterThan(0); + expect(resultsWithScore[0][0].metadata.title).toBe("Lord of the Rings"); + expect(resultsWithScore[0][1]).toBeGreaterThan(0); + }); + + test.skip("similaritySearch", async () => { + const results = await store.similaritySearch( + "Was Gandalf dead?", + 10, // Number of results needed + { + lambda: 0.025, + contextConfig: { + sentencesAfter: 1, + sentencesBefore: 1, + }, + } + ); + expect(results.length).toBeGreaterThan(0); + expect(results[0].pageContent.length).toBeGreaterThan(0); + expect(results[0].metadata.title).toBe("Lord of the Rings"); + }); + + test.skip("similaritySearch with filter", async () => { + const results = await store.similaritySearch( + "Was Gandalf dead?", + 10, // Number of results needed + { filter: "part.lang = 'fra'", lambda: 0.025 } // Filter on the language of the document + ); + expect(results.length).toBeGreaterThan(0); + expect(results[0].pageContent.length).toBeGreaterThan(0); + // Query filtered on French, so we expect only French results + const hasEnglish = results.some( + (result) => + // eslint-disable-next-line @typescript-eslint/no-explicit-any + result.metadata.lang === "eng" + ); + expect(hasEnglish).toBe(false); + }); + + test.skip("addFiles", async () => { + const docs = getDocs(); + const englishOneContent = docs[0].pageContent; + const frenchOneContent = docs[2].pageContent; + + const files = [ + { filename: "englishOne.txt", content: englishOneContent }, + { filename: "frenchOne.txt", content: frenchOneContent }, + ]; + + const vectaraFiles: VectaraFile[] = []; + for (const file of files) { + fs.writeFileSync(file.filename, file.content); + + const buffer = fs.readFileSync(file.filename); + vectaraFiles.push({ + blob: new Blob([buffer], { type: "text/plain" }), + fileName: file.filename, + }); + } + + const bitcoinBuffer = fs.readFileSync( + "../examples/src/document_loaders/example_data/bitcoin.pdf" + ); + vectaraFiles.push({ + blob: new Blob([bitcoinBuffer], { type: "application/pdf" }), + fileName: "bitcoin.pdf", + }); + + const file_doc_ids = await store.addFiles(vectaraFiles); + doc_ids = [...doc_ids, ...file_doc_ids]; + + for (const file of files) { + fs.unlinkSync(file.filename); + } + + expect(file_doc_ids.length).toEqual(3); + const searchResults = await store.similaritySearch("What is bitcoin"); + expect(searchResults.length).toBeGreaterThan(0); + expect(searchResults[0].pageContent).toContain( + "A Peer-to-Peer Electronic Cash System" + ); + }); + + // delete documents added in the test + afterAll(async () => { + store = new VectaraStore({ + customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, + corpusId, + apiKey: process.env.VECTARA_API_KEY || "", + }); + await store.deleteDocuments(doc_ids); + }); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts new file mode 100644 index 000000000000..5e572a012f81 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts @@ -0,0 +1,129 @@ +import { expect, test } from "@jest/globals"; +import { OpenAIEmbeddings } from "@langchain/openai"; + +import { VercelPostgres } from "../vercel_postgres.js"; + +let vercelPostgresStore: VercelPostgres; + +const config = { + tableName: "testvercelvectorstorelangchain2", + columns: { + idColumnName: "id", + vectorColumnName: "vector", + contentColumnName: "content", + metadataColumnName: "metadata", + }, +}; + +describe("Test VercelPostgres store", () => { + afterEach(async () => { + await vercelPostgresStore?.delete({ deleteAll: true }); + await vercelPostgresStore?.end(); + }); + + test("Test embeddings creation", async () => { + vercelPostgresStore = await VercelPostgres.initialize( + new OpenAIEmbeddings(), + config + ); + + expect(vercelPostgresStore).toBeDefined(); + + const docHello = { + pageContent: "hello", + metadata: { a: 1 }, + }; + const docCat = { + pageContent: "Cat drinks milk", + metadata: { a: 2 }, + }; + const docHi = { pageContent: "hi", metadata: { a: 1 } }; + + const ids = await vercelPostgresStore.addDocuments([ + docHello, + docHi, + docCat, + ]); + + const results = await vercelPostgresStore.similaritySearch("hello", 2, { + a: 2, + }); + + expect(results).toHaveLength(1); + + expect(results[0].pageContent).toEqual(docCat.pageContent); + + await vercelPostgresStore.addDocuments( + [{ pageContent: "Dog drinks milk", metadata: { a: 2 } }], + { ids: [ids[2]] } + ); + + const results2 = await vercelPostgresStore.similaritySearch("hello", 2, { + a: 2, + }); + + expect(results2).toHaveLength(1); + expect(results2[0].pageContent).toEqual("Dog drinks milk"); + + await vercelPostgresStore.delete({ ids: [ids[2]] }); + + const results3 = await vercelPostgresStore.similaritySearch("hello", 2, { + a: 2, + }); + + expect(results3).toHaveLength(0); + }); + + test("Test metadata filtering", async () => { + vercelPostgresStore = await VercelPostgres.initialize( + new OpenAIEmbeddings(), + config + ); + + const docGreen = { + pageContent: "Hi, I am the color green.", + metadata: { color: "green" }, + }; + const docBlue = { + pageContent: "Hi, I am the color blue.", + metadata: { color: "blue" }, + }; + const docYellow = { + pageContent: "Hi, I am the color yellow.", + metadata: { color: "yellow" }, + }; + const docIrrelevant = { + pageContent: "Hi, I am an irrelevant doc without metadata.", + metadata: {}, + }; + + await vercelPostgresStore.addDocuments([ + docGreen, + docBlue, + docYellow, + docIrrelevant, + ]); + + const results1 = await vercelPostgresStore.similaritySearch("color", 5, { + color: "blue", + }); + + expect(results1).toHaveLength(1); + + const results2 = await vercelPostgresStore.similaritySearch( + "irrelevant query", + 5, + { + color: { in: ["blue", "yellow"] }, + } + ); + + expect(results2).toHaveLength(2); + + const results2WithColorGreen = results2.filter( + (result) => result.metadata.color === "green" + ); + + expect(results2WithColorGreen).toHaveLength(0); + }); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts new file mode 100644 index 000000000000..27af3a74bab0 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts @@ -0,0 +1,49 @@ +import { expect, test } from "@jest/globals"; +import { Voy as VoyOriginClient } from "voy-search"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { VoyVectorStore } from "../voy.js"; + +const client = new VoyOriginClient(); + +test("it can create index using Voy.from text, add new elements to the index and get queried documents", async () => { + const vectorStore = await VoyVectorStore.fromTexts( + ["initial first page", "initial second page"], + [{ id: 1 }, { id: 2 }], + new OpenAIEmbeddings(), + client + ); + // the number of dimensions is produced by OpenAI + expect(vectorStore.numDimensions).toBe(1536); + await vectorStore.addDocuments([ + new Document({ + pageContent: "added first page", + metadata: { id: 5 }, + }), + new Document({ + pageContent: "added second page", + metadata: { id: 4 }, + }), + new Document({ + pageContent: "added third page", + metadata: { id: 6 }, + }), + ]); + expect(vectorStore.docstore.length).toBe(5); + await vectorStore.addDocuments([ + new Document({ + pageContent: "added another first page", + metadata: { id: 7 }, + }), + ]); + const results = await vectorStore.similaritySearchWithScore("added first", 6); + expect(results.length).toBe(6); + await vectorStore.delete({ + deleteAll: true, + }); + const results2 = await vectorStore.similaritySearchWithScore( + "added first", + 6 + ); + expect(results2.length).toBe(0); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/voy.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.test.ts new file mode 100644 index 000000000000..b941e94ca72b --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/voy.test.ts @@ -0,0 +1,56 @@ +import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../util/testing.js"; +import { VoyVectorStore, VoyClient } from "../voy.js"; + +const fakeClient: VoyClient = { + index: ({ embeddings }) => embeddings.map((i) => i.id).join(","), + add: (_) => {}, + search: () => ({ + neighbors: [ + { id: "0", title: "", url: "" }, + { id: "1", title: "", url: "" }, + ], + }), + clear: () => {}, +}; + +test("it can create index using Voy.from text, add new elements to the index and get queried documents", async () => { + const vectorStore = await VoyVectorStore.fromTexts( + ["initial first page", "initial second page"], + [{ id: 1 }, { id: 2 }], + new FakeEmbeddings(), + fakeClient + ); + + // the number of dimensions is produced by fake embeddings + expect(vectorStore.numDimensions).toBe(4); + await vectorStore.addVectors( + [ + [0, 1, 0, 0], + [1, 0, 0, 0], + [0.5, 0.5, 0.5, 0.5], + ], + [ + new Document({ + pageContent: "added first page", + metadata: { id: 5 }, + }), + new Document({ + pageContent: "added second page", + metadata: { id: 4 }, + }), + new Document({ + pageContent: "added third page", + metadata: { id: 6 }, + }), + ] + ); + expect(vectorStore.docstore.length).toBe(5); + const results = await vectorStore.similaritySearchVectorWithScore( + [1, 0, 0, 0], + 3 + ); + expect(results[0][0].metadata.id).toBe(1); + expect(results[1][0].metadata.id).toBe(2); +}); diff --git a/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts new file mode 100644 index 000000000000..a0d03964a893 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts @@ -0,0 +1,166 @@ +/* eslint-disable no-process-env */ +// eslint-disable-next-line import/no-extraneous-dependencies +import { BaseClient } from "@xata.io/client"; + +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { XataVectorSearch } from "../xata.js"; + +// Tests require a DB with a table called "docs" with: +// * a column name content of type Text +// * a column named embedding of type Vector +// * a column named a of type Integer + +test.skip("XataVectorSearch integration", async () => { + if (!process.env.XATA_API_KEY) { + throw new Error("XATA_API_KEY not set"); + } + + if (!process.env.XATA_DB_URL) { + throw new Error("XATA_DB_URL not set"); + } + const xata = new BaseClient({ + databaseURL: process.env.XATA_DB_URL, + apiKey: process.env.XATA_API_KEY, + branch: process.env.XATA_BRANCH || "main", + }); + + const table = "docs"; + const embeddings = new OpenAIEmbeddings(); + + const store = new XataVectorSearch(embeddings, { client: xata, table }); + expect(store).toBeDefined(); + + const createdAt = new Date().getTime(); + + const ids1 = await store.addDocuments([ + { pageContent: "hello", metadata: { a: createdAt + 1 } }, + { pageContent: "car", metadata: { a: createdAt } }, + { pageContent: "adjective", metadata: { a: createdAt } }, + { pageContent: "hi", metadata: { a: createdAt } }, + ]); + + let results1 = await store.similaritySearch("hello!", 1); + + // search store is eventually consistent so we need to retry if nothing is + // returned + for (let i = 0; i < 5 && results1.length === 0; i += 1) { + results1 = await store.similaritySearch("hello!", 1); + // eslint-disable-next-line no-promise-executor-return + await new Promise((r) => setTimeout(r, 1000)); + } + + expect(results1).toHaveLength(1); + expect(results1).toEqual([ + new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }), + ]); + + const results2 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, + }); + expect(results2).toHaveLength(3); + + const ids2 = await store.addDocuments( + [ + { pageContent: "hello upserted", metadata: { a: createdAt + 1 } }, + { pageContent: "car upserted", metadata: { a: createdAt } }, + { pageContent: "adjective upserted", metadata: { a: createdAt } }, + { pageContent: "hi upserted", metadata: { a: createdAt } }, + ], + { ids: ids1 } + ); + + expect(ids1).toEqual(ids2); + + const results3 = await store.similaritySearchWithScore("testing!", 6, { + a: createdAt, + }); + + expect(results3).toHaveLength(3); + + await store.delete({ ids: ids1.slice(2) }); + + let results4 = await store.similaritySearchWithScore("testing!", 3, { + a: createdAt, + }); + for (let i = 0; i < 5 && results4.length > 1; i += 1) { + results4 = await store.similaritySearchWithScore("testing!", 3, { + a: createdAt, + }); + // eslint-disable-next-line no-promise-executor-return + await new Promise((r) => setTimeout(r, 1000)); + } + + expect(results4).toHaveLength(1); + + await store.delete({ ids: ids1 }); + let results5 = await store.similaritySearch("hello!", 1); + for (let i = 0; i < 5 && results1.length > 0; i += 1) { + results5 = await store.similaritySearch("hello!", 1); + // eslint-disable-next-line no-promise-executor-return + await new Promise((r) => setTimeout(r, 1000)); + } + expect(results5).toHaveLength(0); +}); + +test.skip("Search a XataVectorSearch using a metadata filter", async () => { + if (!process.env.XATA_API_KEY) { + throw new Error("XATA_API_KEY not set"); + } + + if (!process.env.XATA_DB_URL) { + throw new Error("XATA_DB_URL not set"); + } + const xata = new BaseClient({ + databaseURL: process.env.XATA_DB_URL, + apiKey: process.env.XATA_API_KEY, + branch: process.env.XATA_BRANCH || "main", + }); + + const table = "docs"; + const embeddings = new OpenAIEmbeddings(); + + const store = new XataVectorSearch(embeddings, { client: xata, table }); + expect(store).toBeDefined(); + + const createdAt = new Date().getTime(); + + const ids = await store.addDocuments([ + { pageContent: "hello 0", metadata: { a: createdAt } }, + { pageContent: "hello 1", metadata: { a: createdAt + 1 } }, + { pageContent: "hello 2", metadata: { a: createdAt + 2 } }, + { pageContent: "hello 3", metadata: { a: createdAt + 3 } }, + ]); + + // search store is eventually consistent so we need to retry if nothing is + // returned + let results1 = await store.similaritySearch("hello!", 1); + for (let i = 0; i < 5 && results1.length < 4; i += 1) { + results1 = await store.similaritySearch("hello", 6); + // eslint-disable-next-line no-promise-executor-return + await new Promise((r) => setTimeout(r, 1000)); + } + + expect(results1).toHaveLength(4); + + const results = await store.similaritySearch("hello", 1, { + a: createdAt + 2, + }); + expect(results).toHaveLength(1); + + expect(results).toEqual([ + new Document({ + metadata: { a: createdAt + 2 }, + pageContent: "hello 2", + }), + ]); + + await store.delete({ ids }); + let results5 = await store.similaritySearch("hello!", 1); + for (let i = 0; i < 5 && results1.length > 0; i += 1) { + results5 = await store.similaritySearch("hello", 1); + // eslint-disable-next-line no-promise-executor-return + await new Promise((r) => setTimeout(r, 1000)); + } + expect(results5).toHaveLength(0); +}); diff --git a/libs/langchain-community/src/vectorstores/tigris.ts b/libs/langchain-community/src/vectorstores/tigris.ts new file mode 100644 index 000000000000..5913f6c08f37 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tigris.ts @@ -0,0 +1,177 @@ +import * as uuid from "uuid"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Type definition for the arguments required to initialize a + * TigrisVectorStore instance. + */ +export type TigrisLibArgs = { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + index: any; +}; + +/** + * Class for managing and operating vector search applications with + * Tigris, an open-source Serverless NoSQL Database and Search Platform. + */ +export class TigrisVectorStore extends VectorStore { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + index?: any; + + _vectorstoreType(): string { + return "tigris"; + } + + constructor(embeddings: Embeddings, args: TigrisLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + this.index = args.index; + } + + /** + * Method to add an array of documents to the Tigris database. + * @param documents An array of Document instances to be added to the Tigris database. + * @param options Optional parameter that can either be an array of string IDs or an object with a property 'ids' that is an array of string IDs. + * @returns A Promise that resolves when the documents have been added to the Tigris database. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] } | string[] + ): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method to add vectors to the Tigris database. + * @param vectors An array of vectors to be added to the Tigris database. + * @param documents An array of Document instances corresponding to the vectors. + * @param options Optional parameter that can either be an array of string IDs or an object with a property 'ids' that is an array of string IDs. + * @returns A Promise that resolves when the vectors have been added to the Tigris database. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } | string[] + ) { + if (vectors.length === 0) { + return; + } + + if (vectors.length !== documents.length) { + throw new Error(`Vectors and metadatas must have the same length`); + } + + const ids = Array.isArray(options) ? options : options?.ids; + const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; + await this.index?.addDocumentsWithVectors({ + ids: documentIds, + embeddings: vectors, + documents: documents.map(({ metadata, pageContent }) => ({ + content: pageContent, + metadata, + })), + }); + } + + /** + * Method to perform a similarity search in the Tigris database and return + * the k most similar vectors along with their similarity scores. + * @param query The query vector. + * @param k The number of most similar vectors to return. + * @param filter Optional filter object to apply during the search. + * @returns A Promise that resolves to an array of tuples, each containing a Document and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: object + ) { + const result = await this.index?.similaritySearchVectorWithScore({ + query, + k, + filter, + }); + + if (!result) { + return []; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return result.map(([document, score]: [any, any]) => [ + new Document({ + pageContent: document.content, + metadata: document.metadata, + }), + score, + ]) as [Document, number][]; + } + + /** + * Static method to create a new instance of TigrisVectorStore from an + * array of texts. + * @param texts An array of texts to be converted into Document instances and added to the Tigris database. + * @param metadatas Either an array of metadata objects or a single metadata object to be associated with the texts. + * @param embeddings An instance of Embeddings to be used for embedding the texts. + * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. + * @returns A Promise that resolves to a new instance of TigrisVectorStore. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: TigrisLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return TigrisVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a new instance of TigrisVectorStore from an + * array of Document instances. + * @param docs An array of Document instances to be added to the Tigris database. + * @param embeddings An instance of Embeddings to be used for embedding the documents. + * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. + * @returns A Promise that resolves to a new instance of TigrisVectorStore. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: TigrisLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create a new instance of TigrisVectorStore from an + * existing index. + * @param embeddings An instance of Embeddings to be used for embedding the documents. + * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. + * @returns A Promise that resolves to a new instance of TigrisVectorStore. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: TigrisLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/typeorm.ts b/libs/langchain-community/src/vectorstores/typeorm.ts new file mode 100644 index 000000000000..13fc7b758bd4 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/typeorm.ts @@ -0,0 +1,298 @@ +import { Metadata } from "@opensearch-project/opensearch/api/types.js"; +import { DataSource, DataSourceOptions, EntitySchema } from "typeorm"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +/** + * Interface that defines the arguments required to create a + * `TypeORMVectorStore` instance. It includes Postgres connection options, + * table name, filter, and verbosity level. + */ +export interface TypeORMVectorStoreArgs { + postgresConnectionOptions: DataSourceOptions; + tableName?: string; + filter?: Metadata; + verbose?: boolean; +} + +/** + * Class that extends the `Document` base class and adds an `embedding` + * property. It represents a document in the vector store. + */ +export class TypeORMVectorStoreDocument extends Document { + embedding: string; + + id?: string; +} + +const defaultDocumentTableName = "documents"; + +/** + * Class that provides an interface to a Postgres vector database. It + * extends the `VectorStore` base class and implements methods for adding + * documents and vectors, performing similarity searches, and ensuring the + * existence of a table in the database. + */ +export class TypeORMVectorStore extends VectorStore { + declare FilterType: Metadata; + + tableName: string; + + documentEntity: EntitySchema; + + filter?: Metadata; + + appDataSource: DataSource; + + _verbose?: boolean; + + _vectorstoreType(): string { + return "typeorm"; + } + + private constructor(embeddings: Embeddings, fields: TypeORMVectorStoreArgs) { + super(embeddings, fields); + this.tableName = fields.tableName || defaultDocumentTableName; + this.filter = fields.filter; + + const TypeORMDocumentEntity = new EntitySchema({ + name: fields.tableName ?? defaultDocumentTableName, + columns: { + id: { + generated: "uuid", + type: "uuid", + primary: true, + }, + pageContent: { + type: String, + }, + metadata: { + type: "jsonb", + }, + embedding: { + type: String, + }, + }, + }); + const appDataSource = new DataSource({ + entities: [TypeORMDocumentEntity], + ...fields.postgresConnectionOptions, + }); + this.appDataSource = appDataSource; + this.documentEntity = TypeORMDocumentEntity; + + this._verbose = + getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? + fields.verbose ?? + false; + } + + /** + * Static method to create a new `TypeORMVectorStore` instance from a + * `DataSource`. It initializes the `DataSource` if it is not already + * initialized. + * @param embeddings Embeddings instance. + * @param fields `TypeORMVectorStoreArgs` instance. + * @returns A new instance of `TypeORMVectorStore`. + */ + static async fromDataSource( + embeddings: Embeddings, + fields: TypeORMVectorStoreArgs + ): Promise { + const postgresqlVectorStore = new TypeORMVectorStore(embeddings, fields); + + if (!postgresqlVectorStore.appDataSource.isInitialized) { + await postgresqlVectorStore.appDataSource.initialize(); + } + + return postgresqlVectorStore; + } + + /** + * Method to add documents to the vector store. It ensures the existence + * of the table in the database, converts the documents into vectors, and + * adds them to the store. + * @param documents Array of `Document` instances. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + // This will create the table if it does not exist. We can call it every time as it doesn't + // do anything if the table already exists, and it is not expensive in terms of performance + await this.ensureTableInDatabase(); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Method to add vectors to the vector store. It converts the vectors into + * rows and inserts them into the database. + * @param vectors Array of vectors. + * @param documents Array of `Document` instances. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const rows = vectors.map((embedding, idx) => { + const embeddingString = `[${embedding.join(",")}]`; + const documentRow = { + pageContent: documents[idx].pageContent, + embedding: embeddingString, + metadata: documents[idx].metadata, + }; + + return documentRow; + }); + + const documentRepository = this.appDataSource.getRepository( + this.documentEntity + ); + + const chunkSize = 500; + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + + try { + await documentRepository.save(chunk); + } catch (e) { + console.error(e); + throw new Error(`Error inserting: ${chunk[0].pageContent}`); + } + } + } + + /** + * Method to perform a similarity search in the vector store. It returns + * the `k` most similar documents to the query vector, along with their + * similarity scores. + * @param query Query vector. + * @param k Number of most similar documents to return. + * @param filter Optional filter to apply to the search. + * @returns Promise that resolves with an array of tuples, each containing a `TypeORMVectorStoreDocument` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[TypeORMVectorStoreDocument, number][]> { + const embeddingString = `[${query.join(",")}]`; + const _filter = filter ?? "{}"; + + const queryString = ` + SELECT *, embedding <=> $1 as "_distance" + FROM ${this.tableName} + WHERE metadata @> $2 + ORDER BY "_distance" ASC + LIMIT $3;`; + + const documents = await this.appDataSource.query(queryString, [ + embeddingString, + _filter, + k, + ]); + + const results = [] as [TypeORMVectorStoreDocument, number][]; + for (const doc of documents) { + if (doc._distance != null && doc.pageContent != null) { + const document = new Document(doc) as TypeORMVectorStoreDocument; + document.id = doc.id; + results.push([document, doc._distance]); + } + } + + return results; + } + + /** + * Method to ensure the existence of the table in the database. It creates + * the table if it does not already exist. + * @returns Promise that resolves when the table has been ensured. + */ + async ensureTableInDatabase(): Promise { + await this.appDataSource.query("CREATE EXTENSION IF NOT EXISTS vector;"); + await this.appDataSource.query( + 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";' + ); + + await this.appDataSource.query(` + CREATE TABLE IF NOT EXISTS ${this.tableName} ( + "id" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, + "pageContent" text, + metadata jsonb, + embedding vector + ); + `); + } + + /** + * Static method to create a new `TypeORMVectorStore` instance from an + * array of texts and their metadata. It converts the texts into + * `Document` instances and adds them to the store. + * @param texts Array of texts. + * @param metadatas Array of metadata objects or a single metadata object. + * @param embeddings Embeddings instance. + * @param dbConfig `TypeORMVectorStoreArgs` instance. + * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: TypeORMVectorStoreArgs + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return TypeORMVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a new `TypeORMVectorStore` instance from an + * array of `Document` instances. It adds the documents to the store. + * @param docs Array of `Document` instances. + * @param embeddings Embeddings instance. + * @param dbConfig `TypeORMVectorStoreArgs` instance. + * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: TypeORMVectorStoreArgs + ): Promise { + const instance = await TypeORMVectorStore.fromDataSource( + embeddings, + dbConfig + ); + await instance.addDocuments(docs); + + return instance; + } + + /** + * Static method to create a new `TypeORMVectorStore` instance from an + * existing index. + * @param embeddings Embeddings instance. + * @param dbConfig `TypeORMVectorStoreArgs` instance. + * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: TypeORMVectorStoreArgs + ): Promise { + const instance = await TypeORMVectorStore.fromDataSource( + embeddings, + dbConfig + ); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/typesense.ts b/libs/langchain-community/src/vectorstores/typesense.ts new file mode 100644 index 000000000000..a9ce04a1ae20 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/typesense.ts @@ -0,0 +1,320 @@ +import type { Client } from "typesense"; +import type { MultiSearchRequestSchema } from "typesense/lib/Typesense/MultiSearch.js"; +import type { + SearchResponseHit, + DocumentSchema, +} from "typesense/lib/Typesense/Documents.js"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; + +/** + * Interface for the response hit from a vector search in Typesense. + */ +interface VectorSearchResponseHit + extends SearchResponseHit { + vector_distance?: number; +} + +/** + * Typesense vector store configuration. + */ +export interface TypesenseConfig extends AsyncCallerParams { + /** + * Typesense client. + */ + typesenseClient: Client; + /** + * Typesense schema name in which documents will be stored and searched. + */ + schemaName: string; + /** + * Typesense search parameters. + * @default { q: '*', per_page: 5, query_by: '' } + */ + searchParams?: MultiSearchRequestSchema; + /** + * Column names. + */ + columnNames?: { + /** + * Vector column name. + * @default 'vec' + */ + vector?: string; + /** + * Page content column name. + * @default 'text' + */ + pageContent?: string; + /** + * Metadata column names. + * @default [] + */ + metadataColumnNames?: string[]; + }; + /** + * Replace default import function. + * Default import function will update documents if there is a document with the same id. + * @param data + * @param collectionName + */ + import? = Record>( + data: T[], + collectionName: string + ): Promise; +} + +/** + * Typesense vector store. + */ +export class Typesense extends VectorStore { + declare FilterType: Partial; + + private client: Client; + + private schemaName: string; + + private searchParams: MultiSearchRequestSchema; + + private vectorColumnName: string; + + private pageContentColumnName: string; + + private metadataColumnNames: string[]; + + private caller: AsyncCaller; + + private import: ( + data: Record[], + collectionName: string + ) => Promise; + + _vectorstoreType(): string { + return "typesense"; + } + + constructor(embeddings: Embeddings, config: TypesenseConfig) { + super(embeddings, config); + + // Assign config values to class properties. + this.client = config.typesenseClient; + this.schemaName = config.schemaName; + this.searchParams = config.searchParams || { + q: "*", + per_page: 5, + query_by: "", + }; + this.vectorColumnName = config.columnNames?.vector || "vec"; + this.pageContentColumnName = config.columnNames?.pageContent || "text"; + this.metadataColumnNames = config.columnNames?.metadataColumnNames || []; + + // Assign import function. + this.import = config.import || this.importToTypesense.bind(this); + + this.caller = new AsyncCaller(config); + } + + /** + * Default function to import data to typesense + * @param data + * @param collectionName + */ + private async importToTypesense< + T extends Record = Record + >(data: T[], collectionName: string) { + const chunkSize = 2000; + for (let i = 0; i < data.length; i += chunkSize) { + const chunk = data.slice(i, i + chunkSize); + + await this.caller.call(async () => { + await this.client + .collections(collectionName) + .documents() + .import(chunk, { action: "emplace", dirty_values: "drop" }); + }); + } + } + + /** + * Transform documents to Typesense records. + * @param documents + * @returns Typesense records. + */ + _documentsToTypesenseRecords( + documents: Document[], + vectors: number[][] + ): Record[] { + const metadatas = documents.map((doc) => doc.metadata); + + const typesenseDocuments = documents.map((doc, index) => { + const metadata = metadatas[index]; + const objectWithMetadatas: Record = {}; + + this.metadataColumnNames.forEach((metadataColumnName) => { + objectWithMetadatas[metadataColumnName] = metadata[metadataColumnName]; + }); + + return { + [this.pageContentColumnName]: doc.pageContent, + [this.vectorColumnName]: vectors[index], + ...objectWithMetadatas, + }; + }); + + return typesenseDocuments; + } + + /** + * Transform the Typesense records to documents. + * @param typesenseRecords + * @returns documents + */ + _typesenseRecordsToDocuments( + typesenseRecords: + | { document?: Record; vector_distance: number }[] + | undefined + ): [Document, number][] { + const documents: [Document, number][] = + typesenseRecords?.map((hit) => { + const objectWithMetadatas: Record = {}; + const hitDoc = hit.document || {}; + this.metadataColumnNames.forEach((metadataColumnName) => { + objectWithMetadatas[metadataColumnName] = hitDoc[metadataColumnName]; + }); + + const document: Document = { + pageContent: (hitDoc[this.pageContentColumnName] as string) || "", + metadata: objectWithMetadatas, + }; + return [document, hit.vector_distance]; + }) || []; + + return documents; + } + + /** + * Add documents to the vector store. + * Will be updated if in the metadata there is a document with the same id if is using the default import function. + * Metadata will be added in the columns of the schema based on metadataColumnNames. + * @param documents Documents to add. + */ + async addDocuments(documents: Document[]) { + const typesenseDocuments = this._documentsToTypesenseRecords( + documents, + await this.embeddings.embedDocuments( + documents.map((doc) => doc.pageContent) + ) + ); + await this.import(typesenseDocuments, this.schemaName); + } + + /** + * Adds vectors to the vector store. + * @param vectors Vectors to add. + * @param documents Documents associated with the vectors. + */ + async addVectors(vectors: number[][], documents: Document[]) { + const typesenseDocuments = this._documentsToTypesenseRecords( + documents, + vectors + ); + await this.import(typesenseDocuments, this.schemaName); + } + + /** + * Search for similar documents with their similarity score. + * @param vectorPrompt vector to search for + * @param k amount of results to return + * @returns similar documents with their similarity score + */ + async similaritySearchVectorWithScore( + vectorPrompt: number[], + k?: number, + filter: this["FilterType"] = {} + ) { + const amount = k || this.searchParams.per_page || 5; + const vector_query = `${this.vectorColumnName}:([${vectorPrompt}], k:${amount})`; + const typesenseResponse = await this.client.multiSearch.perform( + { + searches: [ + { + ...this.searchParams, + ...filter, + per_page: amount, + vector_query, + collection: this.schemaName, + }, + ], + }, + {} + ); + const results = typesenseResponse.results[0].hits; + + const hits = results?.map((hit: VectorSearchResponseHit) => ({ + document: hit?.document || {}, + vector_distance: hit?.vector_distance || 2, + })) as + | { document: Record; vector_distance: number }[] + | undefined; + + return this._typesenseRecordsToDocuments(hits); + } + + /** + * Delete documents from the vector store. + * @param documentIds ids of the documents to delete + */ + async deleteDocuments(documentIds: string[]) { + await this.client + .collections(this.schemaName) + .documents() + .delete({ + filter_by: `id:=${documentIds.join(",")}`, + }); + } + + /** + * Create a vector store from documents. + * @param docs documents + * @param embeddings embeddings + * @param config Typesense configuration + * @returns Typesense vector store + * @warning You can omit this method, and only use the constructor and addDocuments. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + config: TypesenseConfig + ): Promise { + const instance = new Typesense(embeddings, config); + await instance.addDocuments(docs); + + return instance; + } + + /** + * Create a vector store from texts. + * @param texts + * @param metadatas + * @param embeddings + * @param config + * @returns Typesense vector store + */ + static async fromTexts( + texts: string[], + metadatas: object[], + embeddings: Embeddings, + config: TypesenseConfig + ) { + const instance = new Typesense(embeddings, config); + const documents: Document[] = texts.map((text, i) => ({ + pageContent: text, + metadata: metadatas[i] || {}, + })); + await instance.addDocuments(documents); + + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/vectara.ts b/libs/langchain-community/src/vectorstores/vectara.ts new file mode 100644 index 000000000000..8e45a9c1aefa --- /dev/null +++ b/libs/langchain-community/src/vectorstores/vectara.ts @@ -0,0 +1,532 @@ +import * as uuid from "uuid"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { FakeEmbeddings } from "../util/testing.js"; + +/** + * Interface for the arguments required to initialize a VectaraStore + * instance. + */ +export interface VectaraLibArgs { + customerId: number; + corpusId: number | number[]; + apiKey: string; + verbose?: boolean; + source?: string; +} + +/** + * Interface for the headers required for Vectara API calls. + */ +interface VectaraCallHeader { + headers: { + "x-api-key": string; + "Content-Type": string; + "customer-id": string; + "X-Source": string; + }; +} + +/** + * Interface for the file objects to be uploaded to Vectara. + */ +export interface VectaraFile { + // The contents of the file to be uploaded. + blob: Blob; + // The name of the file to be uploaded. + fileName: string; +} + +/** + * Interface for the filter options used in Vectara API calls. + */ +export interface VectaraFilter { + // Example of a vectara filter string can be: "doc.rating > 3.0 and part.lang = 'deu'" + // See https://docs.vectara.com/docs/search-apis/sql/filter-overview for more details. + filter?: string; + // Improve retrieval accuracy by adjusting the balance (from 0 to 1), known as lambda, + // between neural search and keyword-based search factors. Values between 0.01 and 0.2 tend to work well. + // see https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching for more details. + lambda?: number; + // The number of sentences before/after the matching segment to add to the context. + contextConfig?: VectaraContextConfig; +} + +/** + * Interface for the context configuration used in Vectara API calls. + */ +export interface VectaraContextConfig { + // The number of sentences before the matching segment to add. Default is 2. + sentencesBefore?: number; + // The number of sentences after the matching segment to add. Default is 2. + sentencesAfter?: number; +} + +/** + * Class for interacting with the Vectara API. Extends the VectorStore + * class. + */ +export class VectaraStore extends VectorStore { + get lc_secrets(): { [key: string]: string } { + return { + apiKey: "VECTARA_API_KEY", + corpusId: "VECTARA_CORPUS_ID", + customerId: "VECTARA_CUSTOMER_ID", + }; + } + + get lc_aliases(): { [key: string]: string } { + return { + apiKey: "vectara_api_key", + corpusId: "vectara_corpus_id", + customerId: "vectara_customer_id", + }; + } + + declare FilterType: VectaraFilter; + + private apiEndpoint = "api.vectara.io"; + + private apiKey: string; + + private corpusId: number[]; + + private customerId: number; + + private verbose: boolean; + + private source: string; + + private vectaraApiTimeoutSeconds = 60; + + _vectorstoreType(): string { + return "vectara"; + } + + constructor(args: VectaraLibArgs) { + // Vectara doesn't need embeddings, but we need to pass something to the parent constructor + // The embeddings are abstracted out from the user in Vectara. + super(new FakeEmbeddings(), args); + + const apiKey = args.apiKey ?? getEnvironmentVariable("VECTARA_API_KEY"); + if (!apiKey) { + throw new Error("Vectara api key is not provided."); + } + this.apiKey = apiKey; + this.source = args.source ?? "langchainjs"; + + const corpusId = + args.corpusId ?? + getEnvironmentVariable("VECTARA_CORPUS_ID") + ?.split(",") + .map((id) => { + const num = Number(id); + if (Number.isNaN(num)) + throw new Error("Vectara corpus id is not a number."); + return num; + }); + if (!corpusId) { + throw new Error("Vectara corpus id is not provided."); + } + + if (typeof corpusId === "number") { + this.corpusId = [corpusId]; + } else { + if (corpusId.length === 0) + throw new Error("Vectara corpus id is not provided."); + this.corpusId = corpusId; + } + + const customerId = + args.customerId ?? getEnvironmentVariable("VECTARA_CUSTOMER_ID"); + if (!customerId) { + throw new Error("Vectara customer id is not provided."); + } + this.customerId = customerId; + + this.verbose = args.verbose ?? false; + } + + /** + * Returns a header for Vectara API calls. + * @returns A Promise that resolves to a VectaraCallHeader object. + */ + async getJsonHeader(): Promise { + return { + headers: { + "x-api-key": this.apiKey, + "Content-Type": "application/json", + "customer-id": this.customerId.toString(), + "X-Source": this.source, + }, + }; + } + + /** + * Throws an error, as this method is not implemented. Use addDocuments + * instead. + * @param _vectors Not used. + * @param _documents Not used. + * @returns Does not return a value. + */ + async addVectors( + _vectors: number[][], + _documents: Document[] + ): Promise { + throw new Error( + "Method not implemented. Please call addDocuments instead." + ); + } + + /** + * Method to delete data from the Vectara corpus. + * @param params an array of document IDs to be deleted + * @returns Promise that resolves when the deletion is complete. + */ + async deleteDocuments(ids: string[]): Promise { + if (ids && ids.length > 0) { + const headers = await this.getJsonHeader(); + for (const id of ids) { + const data = { + customer_id: this.customerId, + corpus_id: this.corpusId[0], + document_id: id, + }; + + try { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + this.vectaraApiTimeoutSeconds * 1000 + ); + const response = await fetch( + `https://${this.apiEndpoint}/v1/delete-doc`, + { + method: "POST", + headers: headers?.headers, + body: JSON.stringify(data), + signal: controller.signal, + } + ); + clearTimeout(timeout); + if (response.status !== 200) { + throw new Error( + `Vectara API returned status code ${response.status} when deleting document ${id}` + ); + } + } catch (e) { + const error = new Error(`Error ${(e as Error).message}`); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).code = 500; + throw error; + } + } + } else { + throw new Error(`no "ids" specified for deletion`); + } + } + + /** + * Adds documents to the Vectara store. + * @param documents An array of Document objects to add to the Vectara store. + * @returns A Promise that resolves to an array of document IDs indexed in Vectara. + */ + async addDocuments(documents: Document[]): Promise { + if (this.corpusId.length > 1) + throw new Error("addDocuments does not support multiple corpus ids"); + + const headers = await this.getJsonHeader(); + const doc_ids: string[] = []; + let countAdded = 0; + for (const document of documents) { + const doc_id: string = document.metadata?.document_id ?? uuid.v4(); + const data = { + customer_id: this.customerId, + corpus_id: this.corpusId[0], + document: { + document_id: doc_id, + title: document.metadata?.title ?? "", + metadata_json: JSON.stringify(document.metadata ?? {}), + section: [ + { + text: document.pageContent, + }, + ], + }, + }; + + try { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + this.vectaraApiTimeoutSeconds * 1000 + ); + const response = await fetch(`https://${this.apiEndpoint}/v1/index`, { + method: "POST", + headers: headers?.headers, + body: JSON.stringify(data), + signal: controller.signal, + }); + clearTimeout(timeout); + const result = await response.json(); + if ( + result.status?.code !== "OK" && + result.status?.code !== "ALREADY_EXISTS" + ) { + const error = new Error( + `Vectara API returned status code ${ + result.status?.code + }: ${JSON.stringify(result.message)}` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).code = 500; + throw error; + } else { + countAdded += 1; + doc_ids.push(doc_id); + } + } catch (e) { + const error = new Error( + `Error ${(e as Error).message} while adding document` + ); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (error as any).code = 500; + throw error; + } + } + if (this.verbose) { + console.log(`Added ${countAdded} documents to Vectara`); + } + + return doc_ids; + } + + /** + * Vectara provides a way to add documents directly via their API. This API handles + * pre-processing and chunking internally in an optimal manner. This method is a wrapper + * to utilize that API within LangChain. + * + * @param files An array of VectaraFile objects representing the files and their respective file names to be uploaded to Vectara. + * @param metadata Optional. An array of metadata objects corresponding to each file in the `filePaths` array. + * @returns A Promise that resolves to the number of successfully uploaded files. + */ + async addFiles( + files: VectaraFile[], + metadatas: Record | undefined = undefined + ) { + if (this.corpusId.length > 1) + throw new Error("addFiles does not support multiple corpus ids"); + + const doc_ids: string[] = []; + + for (const [index, file] of files.entries()) { + const md = metadatas ? metadatas[index] : {}; + + const data = new FormData(); + data.append("file", file.blob, file.fileName); + data.append("doc-metadata", JSON.stringify(md)); + + const response = await fetch( + `https://api.vectara.io/v1/upload?c=${this.customerId}&o=${this.corpusId[0]}&d=true`, + { + method: "POST", + headers: { + "x-api-key": this.apiKey, + "X-Source": this.source, + }, + body: data, + } + ); + + const { status } = response; + if (status === 409) { + throw new Error(`File at index ${index} already exists in Vectara`); + } else if (status !== 200) { + throw new Error(`Vectara API returned status code ${status}`); + } else { + const result = await response.json(); + const doc_id = result.document.documentId; + doc_ids.push(doc_id); + } + } + + if (this.verbose) { + console.log(`Uploaded ${files.length} files to Vectara`); + } + + return doc_ids; + } + + /** + * Performs a similarity search and returns documents along with their + * scores. + * @param query The query string for the similarity search. + * @param k Optional. The number of results to return. Default is 10. + * @param filter Optional. A VectaraFilter object to refine the search results. + * @returns A Promise that resolves to an array of tuples, each containing a Document and its score. + */ + async similaritySearchWithScore( + query: string, + k = 10, + filter: VectaraFilter | undefined = undefined + ): Promise<[Document, number][]> { + const headers = await this.getJsonHeader(); + + const corpusKeys = this.corpusId.map((corpusId) => ({ + customerId: this.customerId, + corpusId, + metadataFilter: filter?.filter ?? "", + lexicalInterpolationConfig: { lambda: filter?.lambda ?? 0.025 }, + })); + + const data = { + query: [ + { + query, + numResults: k, + contextConfig: { + sentencesAfter: filter?.contextConfig?.sentencesAfter ?? 2, + sentencesBefore: filter?.contextConfig?.sentencesBefore ?? 2, + }, + corpusKey: corpusKeys, + }, + ], + }; + + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + this.vectaraApiTimeoutSeconds * 1000 + ); + const response = await fetch(`https://${this.apiEndpoint}/v1/query`, { + method: "POST", + headers: headers?.headers, + body: JSON.stringify(data), + signal: controller.signal, + }); + clearTimeout(timeout); + if (response.status !== 200) { + throw new Error(`Vectara API returned status code ${response.status}`); + } + + const result = await response.json(); + const responses = result.responseSet[0].response; + const documents = result.responseSet[0].document; + + for (let i = 0; i < responses.length; i += 1) { + const responseMetadata = responses[i].metadata; + const documentMetadata = documents[responses[i].documentIndex].metadata; + const combinedMetadata: Record = {}; + + responseMetadata.forEach((item: { name: string; value: unknown }) => { + combinedMetadata[item.name] = item.value; + }); + + documentMetadata.forEach((item: { name: string; value: unknown }) => { + combinedMetadata[item.name] = item.value; + }); + + responses[i].metadata = combinedMetadata; + } + + const documentsAndScores = responses.map( + (response: { + text: string; + metadata: Record; + score: number; + }) => [ + new Document({ + pageContent: response.text, + metadata: response.metadata, + }), + response.score, + ] + ); + return documentsAndScores; + } + + /** + * Performs a similarity search and returns documents. + * @param query The query string for the similarity search. + * @param k Optional. The number of results to return. Default is 10. + * @param filter Optional. A VectaraFilter object to refine the search results. + * @returns A Promise that resolves to an array of Document objects. + */ + async similaritySearch( + query: string, + k = 10, + filter: VectaraFilter | undefined = undefined + ): Promise { + const resultWithScore = await this.similaritySearchWithScore( + query, + k, + filter + ); + return resultWithScore.map((result) => result[0]); + } + + /** + * Throws an error, as this method is not implemented. Use + * similaritySearch or similaritySearchWithScore instead. + * @param _query Not used. + * @param _k Not used. + * @param _filter Not used. + * @returns Does not return a value. + */ + async similaritySearchVectorWithScore( + _query: number[], + _k: number, + _filter?: VectaraFilter | undefined + ): Promise<[Document, number][]> { + throw new Error( + "Method not implemented. Please call similaritySearch or similaritySearchWithScore instead." + ); + } + + /** + * Creates a VectaraStore instance from texts. + * @param texts An array of text strings. + * @param metadatas Metadata for the texts. Can be a single object or an array of objects. + * @param _embeddings Not used. + * @param args A VectaraLibArgs object for initializing the VectaraStore instance. + * @returns A Promise that resolves to a VectaraStore instance. + */ + static fromTexts( + texts: string[], + metadatas: object | object[], + _embeddings: Embeddings, + args: VectaraLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return VectaraStore.fromDocuments(docs, new FakeEmbeddings(), args); + } + + /** + * Creates a VectaraStore instance from documents. + * @param docs An array of Document objects. + * @param _embeddings Not used. + * @param args A VectaraLibArgs object for initializing the VectaraStore instance. + * @returns A Promise that resolves to a VectaraStore instance. + */ + static async fromDocuments( + docs: Document[], + _embeddings: Embeddings, + args: VectaraLibArgs + ): Promise { + const instance = new this(args); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/vercel_postgres.ts b/libs/langchain-community/src/vectorstores/vercel_postgres.ts new file mode 100644 index 000000000000..6cab962fa0c2 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/vercel_postgres.ts @@ -0,0 +1,393 @@ +import { + type VercelPool, + type VercelPoolClient, + type VercelPostgresPoolConfig, + createPool, +} from "@vercel/postgres"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +type Metadata = Record>; + +/** + * Interface that defines the arguments required to create a + * `VercelPostgres` instance. It includes Postgres connection options, + * table name, filter, and verbosity level. + */ +export interface VercelPostgresFields { + pool: VercelPool; + client: VercelPoolClient; + tableName?: string; + columns?: { + idColumnName?: string; + vectorColumnName?: string; + contentColumnName?: string; + metadataColumnName?: string; + }; + filter?: Metadata; + verbose?: boolean; +} + +/** + * Class that provides an interface to a Vercel Postgres vector database. It + * extends the `VectorStore` base class and implements methods for adding + * documents and vectors and performing similarity searches. + */ +export class VercelPostgres extends VectorStore { + declare FilterType: Metadata; + + tableName: string; + + idColumnName: string; + + vectorColumnName: string; + + contentColumnName: string; + + metadataColumnName: string; + + filter?: Metadata; + + _verbose?: boolean; + + pool: VercelPool; + + client: VercelPoolClient; + + _vectorstoreType(): string { + return "vercel"; + } + + private constructor(embeddings: Embeddings, config: VercelPostgresFields) { + super(embeddings, config); + this.tableName = config.tableName ?? "langchain_vectors"; + this.filter = config.filter; + + this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding"; + this.contentColumnName = config.columns?.contentColumnName ?? "text"; + this.idColumnName = config.columns?.idColumnName ?? "id"; + this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata"; + + this.pool = config.pool; + this.client = config.client; + + this._verbose = + getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? + !!config.verbose; + } + + /** + * Static method to create a new `VercelPostgres` instance from a + * connection. It creates a table if one does not exist, and calls + * `connect` to return a new instance of `VercelPostgres`. + * + * @param embeddings - Embeddings instance. + * @param fields - `VercelPostgres` configuration options. + * @returns A new instance of `VercelPostgres`. + */ + static async initialize( + embeddings: Embeddings, + config?: Partial & { + postgresConnectionOptions?: VercelPostgresPoolConfig; + } + ): Promise { + // Default maxUses to 1 for edge environments: + // https://github.com/vercel/storage/tree/main/packages/postgres#a-note-on-edge-environments + const pool = + config?.pool ?? + createPool({ maxUses: 1, ...config?.postgresConnectionOptions }); + const client = config?.client ?? (await pool.connect()); + const postgresqlVectorStore = new VercelPostgres(embeddings, { + ...config, + pool, + client, + }); + + await postgresqlVectorStore.ensureTableInDatabase(); + + return postgresqlVectorStore; + } + + /** + * Method to add documents to the vector store. It converts the documents into + * vectors, and adds them to the store. + * + * @param documents - Array of `Document` instances. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] } + ): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Generates the SQL placeholders for a specific row at the provided index. + * + * @param index - The index of the row for which placeholders need to be generated. + * @returns The SQL placeholders for the row values. + */ + protected generatePlaceholderForRowAt( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + row: (string | Record)[], + index: number + ): string { + const base = index * row.length; + return `(${row.map((_, j) => `$${base + 1 + j}`)})`; + } + + /** + * Constructs the SQL query for inserting rows into the specified table. + * + * @param rows - The rows of data to be inserted, consisting of values and records. + * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. + * @returns The complete SQL INSERT INTO query string. + */ + protected async runInsertQuery( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + rows: (string | Record)[][], + useIdColumn: boolean + ) { + const values = rows.map((row, j) => + this.generatePlaceholderForRowAt(row, j) + ); + const flatValues = rows.flat(); + return this.client.query( + ` + INSERT INTO ${this.tableName} ( + ${useIdColumn ? `${this.idColumnName},` : ""} + ${this.contentColumnName}, + ${this.vectorColumnName}, + ${this.metadataColumnName} + ) VALUES ${values.join(", ")} + ON CONFLICT (${this.idColumnName}) + DO UPDATE + SET + ${this.contentColumnName} = EXCLUDED.${this.contentColumnName}, + ${this.vectorColumnName} = EXCLUDED.${this.vectorColumnName}, + ${this.metadataColumnName} = EXCLUDED.${this.metadataColumnName} + RETURNING ${this.idColumnName}`, + flatValues + ); + } + + /** + * Method to add vectors to the vector store. It converts the vectors into + * rows and inserts them into the database. + * + * @param vectors - Array of vectors. + * @param documents - Array of `Document` instances. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ): Promise { + if (options?.ids !== undefined && options?.ids.length !== vectors.length) { + throw new Error( + `If provided, the length of "ids" must be the same as the number of vectors.` + ); + } + const rows = vectors.map((embedding, idx) => { + const embeddingString = `[${embedding.join(",")}]`; + const row = [ + documents[idx].pageContent, + embeddingString, + documents[idx].metadata, + ]; + if (options?.ids) { + return [options.ids[idx], ...row]; + } + return row; + }); + + const chunkSize = 500; + + const ids = []; + + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + try { + const result = await this.runInsertQuery( + chunk, + options?.ids !== undefined + ); + ids.push(...result.rows.map((row) => row[this.idColumnName])); + } catch (e) { + console.error(e); + throw new Error(`Error inserting: ${(e as Error).message}`); + } + } + return ids; + } + + /** + * Method to perform a similarity search in the vector store. It returns + * the `k` most similar documents to the query vector, along with their + * similarity scores. + * + * @param query - Query vector. + * @param k - Number of most similar documents to return. + * @param filter - Optional filter to apply to the search. + * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const embeddingString = `[${query.join(",")}]`; + const _filter: this["FilterType"] = filter ?? {}; + const whereClauses = []; + const values = [embeddingString, k]; + let paramCount = values.length; + + for (const [key, value] of Object.entries(_filter)) { + if (typeof value === "object" && value !== null) { + const currentParamCount = paramCount; + const placeholders = value.in + .map((_, index) => `$${currentParamCount + index + 1}`) + .join(","); + whereClauses.push( + `${this.metadataColumnName}->>'${key}' IN (${placeholders})` + ); + values.push(...value.in); + paramCount += value.in.length; + } else { + paramCount += 1; + whereClauses.push( + `${this.metadataColumnName}->>'${key}' = $${paramCount}` + ); + values.push(value); + } + } + + const whereClause = whereClauses.length + ? `WHERE ${whereClauses.join(" AND ")}` + : ""; + + const queryString = ` + SELECT *, ${this.vectorColumnName} <=> $1 as "_distance" + FROM ${this.tableName} + ${whereClause} + ORDER BY "_distance" ASC + LIMIT $2;`; + + const documents = (await this.client.query(queryString, values)).rows; + const results = [] as [Document, number][]; + for (const doc of documents) { + if (doc._distance != null && doc[this.contentColumnName] != null) { + const document = new Document({ + pageContent: doc[this.contentColumnName], + metadata: doc[this.metadataColumnName], + }); + results.push([document, doc._distance]); + } + } + return results; + } + + async delete(params: { ids?: string[]; deleteAll?: boolean }): Promise { + if (params.ids !== undefined) { + await this.client.query( + `DELETE FROM ${this.tableName} WHERE ${ + this.idColumnName + } IN (${params.ids.map((_, idx) => `$${idx + 1}`)})`, + params.ids + ); + } else if (params.deleteAll) { + await this.client.query(`TRUNCATE TABLE ${this.tableName}`); + } + } + + /** + * Method to ensure the existence of the table in the database. It creates + * the table if it does not already exist. + * + * @returns Promise that resolves when the table has been ensured. + */ + async ensureTableInDatabase(): Promise { + await this.client.query(`CREATE EXTENSION IF NOT EXISTS vector;`); + await this.client.query(`CREATE EXTENSION IF NOT EXISTS "uuid-ossp";`); + await this.client.query(`CREATE TABLE IF NOT EXISTS "${this.tableName}" ( + "${this.idColumnName}" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, + "${this.contentColumnName}" text, + "${this.metadataColumnName}" jsonb, + "${this.vectorColumnName}" vector + );`); + } + + /** + * Static method to create a new `VercelPostgres` instance from an + * array of texts and their metadata. It converts the texts into + * `Document` instances and adds them to the store. + * + * @param texts - Array of texts. + * @param metadatas - Array of metadata objects or a single metadata object. + * @param embeddings - Embeddings instance. + * @param fields - `VercelPostgres` configuration options. + * @returns Promise that resolves with a new instance of `VercelPostgres`. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: Partial & { + postgresConnectionOptions?: VercelPostgresPoolConfig; + } + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return this.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a new `VercelPostgres` instance from an + * array of `Document` instances. It adds the documents to the store. + * + * @param docs - Array of `Document` instances. + * @param embeddings - Embeddings instance. + * @param fields - `VercelPostgres` configuration options. + * @returns Promise that resolves with a new instance of `VercelPostgres`. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: Partial & { + postgresConnectionOptions?: VercelPostgresPoolConfig; + } + ): Promise { + const instance = await this.initialize(embeddings, dbConfig); + await instance.addDocuments(docs); + + return instance; + } + + /** + * Closes all the clients in the pool and terminates the pool. + * + * @returns Promise that resolves when all clients are closed and the pool is terminated. + */ + async end(): Promise { + await this.client?.release(); + return this.pool.end(); + } +} diff --git a/libs/langchain-community/src/vectorstores/voy.ts b/libs/langchain-community/src/vectorstores/voy.ts new file mode 100644 index 000000000000..78bf0a19bee2 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/voy.ts @@ -0,0 +1,191 @@ +import type { Voy as VoyOriginClient, SearchResult } from "voy-search"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +export type VoyClient = Omit< + VoyOriginClient, + "remove" | "size" | "serialize" | "free" +>; + +/** + * Internal interface for storing documents mappings. + */ +interface InternalDoc { + embeddings: number[]; + document: Document; +} + +/** + * Class that extends `VectorStore`. It allows to perform similarity search using + * Voi similarity search engine. The class requires passing Voy Client as an input parameter. + */ +export class VoyVectorStore extends VectorStore { + client: VoyClient; + + numDimensions: number | null = null; + + docstore: InternalDoc[] = []; + + _vectorstoreType(): string { + return "voi"; + } + + constructor(client: VoyClient, embeddings: Embeddings) { + super(embeddings, {}); + this.client = client; + this.embeddings = embeddings; + } + + /** + * Adds documents to the Voy database. The documents are embedded using embeddings provided while instantiating the class. + * @param documents An array of `Document` instances associated with the vectors. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + if (documents.length === 0) { + return; + } + + const firstVector = ( + await this.embeddings.embedDocuments(texts.slice(0, 1)) + )[0]; + if (this.numDimensions === null) { + this.numDimensions = firstVector.length; + } else if (this.numDimensions !== firstVector.length) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${this.numDimensions})` + ); + } + const restResults = await this.embeddings.embedDocuments(texts.slice(1)); + await this.addVectors([firstVector, ...restResults], documents); + } + + /** + * Adds vectors to the Voy database. The vectors are associated with + * the provided documents. + * @param vectors An array of vectors to be added to the database. + * @param documents An array of `Document` instances associated with the vectors. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length === 0) { + return; + } + if (this.numDimensions === null) { + this.numDimensions = vectors[0].length; + } + + if (vectors.length !== documents.length) { + throw new Error(`Vectors and metadata must have the same length`); + } + if (!vectors.every((v) => v.length === this.numDimensions)) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${this.numDimensions})` + ); + } + + vectors.forEach((item, idx) => { + const doc = documents[idx]; + this.docstore.push({ embeddings: item, document: doc }); + }); + const embeddings = this.docstore.map((item, idx) => ({ + id: String(idx), + embeddings: item.embeddings, + title: "", + url: "", + })); + this.client.index({ embeddings }); + } + + /** + * Searches for vectors in the Voy database that are similar to the + * provided query vector. + * @param query The query vector. + * @param k The number of similar vectors to return. + * @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score. + */ + async similaritySearchVectorWithScore(query: number[], k: number) { + if (this.numDimensions === null) { + throw new Error("There aren't any elements in the index yet."); + } + if (query.length !== this.numDimensions) { + throw new Error( + `Query vector must have the same length as the number of dimensions (${this.numDimensions})` + ); + } + const itemsToQuery = Math.min(this.docstore.length, k); + if (itemsToQuery > this.docstore.length) { + console.warn( + `k (${k}) is greater than the number of elements in the index (${this.docstore.length}), setting k to ${itemsToQuery}` + ); + } + const results: SearchResult = this.client.search( + new Float32Array(query), + itemsToQuery + ); + return results.neighbors.map( + ({ id }, idx) => + [this.docstore[parseInt(id, 10)].document, idx] as [Document, number] + ); + } + + /** + * Method to delete data from the Voy index. It can delete data based + * on specific IDs or a filter. + * @param params Object that includes either an array of IDs or a filter for the data to be deleted. + * @returns Promise that resolves when the deletion is complete. + */ + async delete(params: { deleteAll?: boolean }): Promise { + if (params.deleteAll === true) { + await this.client.clear(); + } else { + throw new Error(`You must provide a "deleteAll" parameter.`); + } + } + + /** + * Creates a new `VoyVectorStore` instance from an array of text strings. The text + * strings are converted to `Document` instances and added to the Voy + * database. + * @param texts An array of text strings. + * @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array. + * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. + * @param client An instance of Voy client to use in the underlying operations. + * @returns A promise that resolves with a new `VoyVectorStore` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + client: VoyClient + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return VoyVectorStore.fromDocuments(docs, embeddings, client); + } + + /** + * Creates a new `VoyVectorStore` instance from an array of `Document` instances. + * The documents are added to the Voy database. + * @param docs An array of `Document` instances. + * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. + * @param client An instance of Voy client to use in the underlying operations. + * @returns A promise that resolves with a new `VoyVectorStore` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + client: VoyClient + ): Promise { + const instance = new VoyVectorStore(client, embeddings); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/xata.ts b/libs/langchain-community/src/vectorstores/xata.ts new file mode 100644 index 000000000000..c87c21d7fb80 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/xata.ts @@ -0,0 +1,149 @@ +import { BaseClient } from "@xata.io/client"; +import { Embeddings } from "@langchain/core/embeddings"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents" + +/** + * Interface for the arguments required to create a XataClient. Includes + * the client instance and the table name. + */ +export interface XataClientArgs { + readonly client: XataClient; + readonly table: string; +} + +/** + * Type for the filter object used in Xata database queries. + */ +type XataFilter = object; + +/** + * Class for interacting with a Xata database as a VectorStore. Provides + * methods to add documents and vectors to the database, delete entries, + * and perform similarity searches. + */ +export class XataVectorSearch< + XataClient extends BaseClient +> extends VectorStore { + declare FilterType: XataFilter; + + private readonly client: XataClient; + + private readonly table: string; + + _vectorstoreType(): string { + return "xata"; + } + + constructor(embeddings: Embeddings, args: XataClientArgs) { + super(embeddings, args); + + this.client = args.client; + this.table = args.table; + } + + /** + * Method to add documents to the Xata database. Maps the page content of + * each document, embeds the documents using the embeddings, and adds the + * vectors to the database. + * @param documents Array of documents to be added. + * @param options Optional object containing an array of ids. + * @returns Promise resolving to an array of ids of the added documents. + */ + async addDocuments(documents: Document[], options?: { ids?: string[] }) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method to add vectors to the Xata database. Maps each vector to a row + * with the document's content, embedding, and metadata. Creates or + * replaces these rows in the Xata database. + * @param vectors Array of vectors to be added. + * @param documents Array of documents corresponding to the vectors. + * @param options Optional object containing an array of ids. + * @returns Promise resolving to an array of ids of the added vectors. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { + const rows = vectors + .map((embedding, idx) => ({ + content: documents[idx].pageContent, + embedding, + ...documents[idx].metadata, + })) + .map((row, idx) => { + if (options?.ids) { + return { id: options.ids[idx], ...row }; + } + return row; + }); + + const res = await this.client.db[this.table].createOrReplace(rows); + // Since we have an untyped BaseClient, it doesn't know the + // actual return type of the overload. + const results = res as unknown as { id: string }[]; + const returnedIds = results.map((row) => row.id); + return returnedIds; + } + + /** + * Method to delete entries from the Xata database. Deletes the entries + * with the provided ids. + * @param params Object containing an array of ids of the entries to be deleted. + * @returns Promise resolving to void. + */ + async delete(params: { ids: string[] }): Promise { + const { ids } = params; + await this.client.db[this.table].delete(ids); + } + + /** + * Method to perform a similarity search in the Xata database. Returns the + * k most similar documents along with their scores. + * @param query Query vector for the similarity search. + * @param k Number of most similar documents to return. + * @param filter Optional filter for the search. + * @returns Promise resolving to an array of tuples, each containing a Document and its score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: XataFilter | undefined + ): Promise<[Document, number][]> { + const { records } = await this.client.db[this.table].vectorSearch( + "embedding", + query, + { + size: k, + filter, + } + ); + + return ( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + records?.map((record: any) => [ + new Document({ + pageContent: record.content, + metadata: Object.fromEntries( + Object.entries(record).filter( + ([key]) => + key !== "content" && + key !== "embedding" && + key !== "xata" && + key !== "id" + ) + ), + }), + record.xata.score, + ]) ?? [] + ); + } +} diff --git a/yarn.lock b/yarn.lock index 15c5ef829f5e..0cfb1d44b958 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8139,12 +8139,14 @@ __metadata: typescript: ~5.1.6 typesense: ^1.5.3 usearch: ^1.1.1 + uuid: ^9.0.0 vectordb: ^0.1.4 voy-search: 0.6.2 weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 youtube-transcript: ^1.0.6 youtubei.js: ^5.8.0 + zod: ^3.22.3 peerDependencies: "@aws-crypto/sha256-js": ^5.0.0 "@aws-sdk/client-bedrock-runtime": ^3.422.0 From 1c328e8694c4653912def99e015aac2eb313946c Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 00:38:27 -0800 Subject: [PATCH 05/22] Lint, format build --- libs/langchain-community/.gitignore | 3 - libs/langchain-community/package.json | 6 +- .../scripts/create-entrypoints.js | 2 +- .../src/cache/cloudflare_kv.ts | 6 +- libs/langchain-community/src/cache/momento.ts | 6 +- .../src/callbacks/handlers/llmonitor.ts | 10 +- .../src/chat_models/baiduwenxin.ts | 14 +- .../src/chat_models/bedrock/web.ts | 15 +- .../src/chat_models/cloudflare_workersai.ts | 5 +- .../src/chat_models/fireworks.ts | 12 +- .../src/chat_models/googlepalm.ts | 7 +- .../src/chat_models/googlevertexai/common.ts | 10 +- .../src/chat_models/iflytek_xinghuo/common.ts | 16 +- .../src/chat_models/llama_cpp.ts | 15 +- .../src/chat_models/minimax.ts | 7 +- .../src/chat_models/ollama.ts | 7 +- .../src/chat_models/portkey.ts | 6 +- .../tests/chatbaiduwenxin.int.test.ts | 2 +- .../chat_models/tests/chatbedrock.int.test.ts | 2 +- .../src/chat_models/yandex.ts | 5 +- libs/langchain-community/src/index.ts | 1 + .../src/llms/aleph_alpha.ts | 4 +- .../src/llms/bedrock/web.ts | 2 +- .../src/llms/cloudflare_workersai.ts | 2 +- libs/langchain-community/src/llms/cohere.ts | 2 +- .../langchain-community/src/llms/fireworks.ts | 10 +- .../src/llms/googlepalm.ts | 4 +- .../src/llms/googlevertexai/common.ts | 8 +- .../src/llms/gradient_ai.ts | 10 +- libs/langchain-community/src/llms/hf.ts | 4 +- .../langchain-community/src/llms/llama_cpp.ts | 10 +- libs/langchain-community/src/llms/ollama.ts | 2 +- libs/langchain-community/src/llms/portkey.ts | 2 +- libs/langchain-community/src/llms/raycast.ts | 4 +- .../langchain-community/src/llms/replicate.ts | 2 +- .../src/llms/sagemaker_endpoint.ts | 10 +- .../tests/cloudflare_workersai.int.test.ts | 2 +- .../src/llms/tests/ollama.int.test.ts | 2 +- .../src/llms/watsonx_ai.ts | 10 +- libs/langchain-community/src/llms/writer.ts | 2 +- libs/langchain-community/src/llms/yandex.ts | 4 +- .../src/retrievers/chaindesk.ts | 10 +- .../src/retrievers/databerry.ts | 10 +- .../src/retrievers/supabase.ts | 5 +- .../src/retrievers/tavily_search_api.ts | 5 +- .../langchain-community/src/tools/aiplugin.ts | 4 +- libs/langchain-community/src/tools/aws_sfn.ts | 4 +- .../src/tools/bingserpapi.ts | 4 +- .../src/tools/brave_search.ts | 4 +- libs/langchain-community/src/tools/connery.ts | 7 +- .../src/tools/dadjokeapi.ts | 2 +- .../src/tools/dataforseo_api_search.ts | 4 +- .../src/tools/gmail/base.ts | 4 +- .../src/tools/google_custom_search.ts | 4 +- .../src/tools/google_places.ts | 4 +- libs/langchain-community/src/tools/ifttt.ts | 4 +- .../src/tools/searchapi.ts | 4 +- .../src/tools/searxng_search.ts | 2 +- libs/langchain-community/src/tools/serpapi.ts | 2 +- libs/langchain-community/src/tools/serper.ts | 4 +- .../src/tools/wikipedia_query_run.ts | 2 +- .../src/tools/wolframalpha.ts | 2 +- .../src/types/openai-types.ts | 172 ------------------ .../src/util/googlevertexai-connection.ts | 5 +- .../src/vectorstores/cassandra.ts | 5 +- .../src/vectorstores/clickhouse.ts | 2 +- .../src/vectorstores/cloudflare_vectorize.ts | 7 +- .../src/vectorstores/convex.ts | 2 +- .../src/vectorstores/elasticsearch.ts | 2 +- .../src/vectorstores/lancedb.ts | 2 +- .../src/vectorstores/milvus.ts | 2 +- .../src/vectorstores/myscale.ts | 2 +- .../src/vectorstores/neo4j_vector.ts | 2 +- .../src/vectorstores/opensearch.ts | 2 +- .../src/vectorstores/prisma.ts | 2 +- .../src/vectorstores/qdrant.ts | 2 +- .../src/vectorstores/redis.ts | 2 +- .../src/vectorstores/rockset.ts | 2 +- .../src/vectorstores/singlestore.ts | 2 +- .../vectorstores/tests/clickhouse.int.test.ts | 2 +- .../src/vectorstores/tests/convex.int.test.ts | 43 ----- .../tests/convex/convex/_generated/api.d.ts | 39 ---- .../tests/convex/convex/_generated/api.js | 23 --- .../convex/convex/_generated/dataModel.d.ts | 56 ------ .../convex/convex/_generated/server.d.ts | 143 --------------- .../tests/convex/convex/_generated/server.js | 90 --------- .../tests/convex/convex/langchain/db.ts | 1 - .../vectorstores/tests/convex/convex/lib.ts | 45 ----- .../tests/convex/convex/schema.ts | 15 -- .../vectorstores/tests/convex/package.json | 8 - .../tests/singlestore.int.test.ts | 2 +- .../src/vectorstores/tigris.ts | 2 +- .../src/vectorstores/typeorm.ts | 2 +- .../src/vectorstores/typesense.ts | 7 +- .../src/vectorstores/vectara.ts | 2 +- .../src/vectorstores/vercel_postgres.ts | 2 +- .../src/vectorstores/voy.ts | 2 +- .../src/vectorstores/xata.ts | 2 +- yarn.lock | 16 ++ 99 files changed, 256 insertions(+), 805 deletions(-) create mode 100644 libs/langchain-community/src/index.ts delete mode 100644 libs/langchain-community/src/types/openai-types.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/convex/package.json diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index 34d8c555299c..ac4231d97dad 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -310,6 +310,3 @@ cache/upstash_redis.d.ts graphs/neo4j_graph.cjs graphs/neo4j_graph.js graphs/neo4j_graph.d.ts -index.cjs -index.js -index.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index ec3fcd3362fe..1fbb67b17711 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -35,6 +35,7 @@ "dependencies": { "@langchain/core": "~0.0.9", "@langchain/openai": "~0.0.1", + "langsmith": "~0.0.48", "uuid": "^9.0.0", "zod": "^3.22.3" }, @@ -1429,9 +1430,6 @@ "cache/upstash_redis.d.ts", "graphs/neo4j_graph.cjs", "graphs/neo4j_graph.js", - "graphs/neo4j_graph.d.ts", - "index.cjs", - "index.js", - "index.d.ts" + "graphs/neo4j_graph.d.ts" ] } diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index 9d1440a726f7..64e2c738a76d 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -206,7 +206,7 @@ const updateJsonFile = (relativePath, updateFunction) => { }; const generateFiles = () => { - const files = [...Object.entries(entrypoints), ["index", "index"]].flatMap( + const files = [...Object.entries(entrypoints)].flatMap( ([key, value]) => { const nrOfDots = key.split("/").length - 1; const relativePath = "../".repeat(nrOfDots) || "./"; diff --git a/libs/langchain-community/src/cache/cloudflare_kv.ts b/libs/langchain-community/src/cache/cloudflare_kv.ts index e03013b0b992..7e3e11eded77 100644 --- a/libs/langchain-community/src/cache/cloudflare_kv.ts +++ b/libs/langchain-community/src/cache/cloudflare_kv.ts @@ -1,9 +1,11 @@ import type { KVNamespace } from "@cloudflare/workers-types"; -import { BaseCache, +import { + BaseCache, getCacheKey, serializeGeneration, - deserializeStoredGeneration, } from "@langchain/core/caches"; + deserializeStoredGeneration, +} from "@langchain/core/caches"; import { Generation } from "@langchain/core/outputs"; /** diff --git a/libs/langchain-community/src/cache/momento.ts b/libs/langchain-community/src/cache/momento.ts index 0877a022ef4d..9243d5efbd34 100644 --- a/libs/langchain-community/src/cache/momento.ts +++ b/libs/langchain-community/src/cache/momento.ts @@ -6,10 +6,12 @@ import { InvalidArgumentError, } from "@gomomento/sdk-core"; -import { BaseCache, +import { + BaseCache, deserializeStoredGeneration, getCacheKey, - serializeGeneration, } from "@langchain/core/caches"; + serializeGeneration, +} from "@langchain/core/caches"; import { Generation } from "@langchain/core/outputs"; import { ensureCacheExists } from "../util/momento.js"; diff --git a/libs/langchain-community/src/callbacks/handlers/llmonitor.ts b/libs/langchain-community/src/callbacks/handlers/llmonitor.ts index f87ecea05599..8359704322e6 100644 --- a/libs/langchain-community/src/callbacks/handlers/llmonitor.ts +++ b/libs/langchain-community/src/callbacks/handlers/llmonitor.ts @@ -3,16 +3,16 @@ import { LLMonitorOptions, ChatMessage, cJSON } from "llmonitor/types"; import { BaseRun, RunUpdate as BaseRunUpdate, KVMap } from "langsmith/schemas"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { - BaseMessage, -} from "@langchain/core/messages"; +import { BaseMessage } from "@langchain/core/messages"; import { ChainValues } from "@langchain/core/utils/types"; import { LLMResult, Generation } from "@langchain/core/outputs"; -import { BaseCallbackHandler, BaseCallbackHandlerInput } from "@langchain/core/callbacks/base"; +import { + BaseCallbackHandler, + BaseCallbackHandlerInput, +} from "@langchain/core/callbacks/base"; import { Serialized } from "../../load/serializable.js"; - type Role = "user" | "ai" | "system" | "function" | "tool"; // Langchain Helpers diff --git a/libs/langchain-community/src/chat_models/baiduwenxin.ts b/libs/langchain-community/src/chat_models/baiduwenxin.ts index 46d0a6d952ca..b618af9a0bad 100644 --- a/libs/langchain-community/src/chat_models/baiduwenxin.ts +++ b/libs/langchain-community/src/chat_models/baiduwenxin.ts @@ -1,13 +1,9 @@ -import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; import { - AIMessage, - BaseMessage, - ChatMessage, -} from "@langchain/core/messages"; -import { - ChatGeneration, - ChatResult -} from "@langchain/core/outputs"; + BaseChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; +import { AIMessage, BaseMessage, ChatMessage } from "@langchain/core/messages"; +import { ChatGeneration, ChatResult } from "@langchain/core/outputs"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; diff --git a/libs/langchain-community/src/chat_models/bedrock/web.ts b/libs/langchain-community/src/chat_models/bedrock/web.ts index f76518d00cd5..35581ed68483 100644 --- a/libs/langchain-community/src/chat_models/bedrock/web.ts +++ b/libs/langchain-community/src/chat_models/bedrock/web.ts @@ -5,13 +5,11 @@ import { fromUtf8, toUtf8 } from "@smithy/util-utf8"; import { Sha256 } from "@aws-crypto/sha256-js"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { - BaseBedrockInput, - BedrockLLMInputOutputAdapter, - type CredentialType, -} from "../../util/bedrock.js"; + SimpleChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { AIMessageChunk, BaseMessage, @@ -20,6 +18,11 @@ import { } from "@langchain/core/messages"; import { ChatGenerationChunk } from "@langchain/core/outputs"; +import { + BaseBedrockInput, + BedrockLLMInputOutputAdapter, + type CredentialType, +} from "../../util/bedrock.js"; import type { SerializedFields } from "../../load/map_keys.js"; function convertOneMessageToText( diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts index dd34ecf2d177..72d793d11a8c 100644 --- a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -1,4 +1,7 @@ -import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + SimpleChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; import { AIMessageChunk, diff --git a/libs/langchain-community/src/chat_models/fireworks.ts b/libs/langchain-community/src/chat_models/fireworks.ts index dfb225c74c4e..f40894b12de2 100644 --- a/libs/langchain-community/src/chat_models/fireworks.ts +++ b/libs/langchain-community/src/chat_models/fireworks.ts @@ -1,8 +1,12 @@ import type { BaseChatModelParams } from "@langchain/core/language_models/chat_models"; -import type { OpenAIClient, ChatOpenAICallOptions, OpenAIChatInput } from "@langchain/openai"; +import { + type OpenAIClient, + type ChatOpenAICallOptions, + type OpenAIChatInput, + type OpenAICoreRequestOptions, + ChatOpenAI, +} from "@langchain/openai"; -import type { OpenAICoreRequestOptions } from "../types/openai-types.js"; -import { ChatOpenAI } from "@langchain/openai"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; type FireworksUnsupportedArgs = @@ -52,7 +56,7 @@ export class ChatFireworks extends ChatOpenAI { } lc_serializable = true; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; fireworksApiKey?: string; diff --git a/libs/langchain-community/src/chat_models/googlepalm.ts b/libs/langchain-community/src/chat_models/googlepalm.ts index 0d05b306531e..21839a578ace 100644 --- a/libs/langchain-community/src/chat_models/googlepalm.ts +++ b/libs/langchain-community/src/chat_models/googlepalm.ts @@ -10,7 +10,10 @@ import { } from "@langchain/core/messages"; import { ChatResult } from "@langchain/core/outputs"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + BaseChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; export type BaseMessageExamplePair = { input: BaseMessage; @@ -120,7 +123,7 @@ export class ChatGooglePaLM } lc_serializable = true; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; get lc_secrets(): { [key: string]: string } | undefined { diff --git a/libs/langchain-community/src/chat_models/googlevertexai/common.ts b/libs/langchain-community/src/chat_models/googlevertexai/common.ts index 86cbec62f2d5..7d55deb143d4 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/common.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/common.ts @@ -1,17 +1,17 @@ import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { +import { AIMessage, AIMessageChunk, BaseMessage, ChatMessage, } from "@langchain/core/messages"; -import { - ChatGeneration, +import { + ChatGeneration, ChatGenerationChunk, ChatResult, - LLMResult + LLMResult, } from "@langchain/core/outputs"; import { @@ -213,7 +213,7 @@ export class BaseChatGoogleVertexAI GoogleVertexAIChatPrediction, AuthOptions >; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; get lc_aliases(): Record { diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts index 7596c64a954a..9f736e1916d0 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts @@ -1,16 +1,12 @@ import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { - AIMessage, - BaseMessage, - ChatMessage, -} from "@langchain/core/messages"; -import { - ChatGeneration, - ChatResult -} from "@langchain/core/outputs"; +import { AIMessage, BaseMessage, ChatMessage } from "@langchain/core/messages"; +import { ChatGeneration, ChatResult } from "@langchain/core/outputs"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { IterableReadableStream } from "@langchain/core/utils/stream"; -import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + BaseChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; import { BaseWebSocketStream, WebSocketStreamOptions, diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts index 0579e8b39204..f17b5be3ea39 100644 --- a/libs/langchain-community/src/chat_models/llama_cpp.ts +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -5,12 +5,10 @@ import { type ConversationInteraction, } from "node-llama-cpp"; -import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; import { - LlamaBaseCppInputs, - createLlamaModel, - createLlamaContext, -} from "../util/llama_cpp.js"; + SimpleChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { @@ -19,6 +17,11 @@ import { ChatMessage, } from "@langchain/core/messages"; import { ChatGenerationChunk } from "@langchain/core/outputs"; +import { + LlamaBaseCppInputs, + createLlamaModel, + createLlamaContext, +} from "../util/llama_cpp.js"; /** * Note that the modelPath is the only required parameter. For testing you @@ -80,7 +83,7 @@ export class ChatLlamaCpp extends SimpleChatModel { _session: LlamaChatSession | null; lc_serializable = true; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; static lc_name() { diff --git a/libs/langchain-community/src/chat_models/minimax.ts b/libs/langchain-community/src/chat_models/minimax.ts index fb5d9c422010..2435f1f83fd1 100644 --- a/libs/langchain-community/src/chat_models/minimax.ts +++ b/libs/langchain-community/src/chat_models/minimax.ts @@ -1,6 +1,9 @@ import type { OpenAIClient } from "@langchain/openai"; -import { BaseChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + BaseChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; import { AIMessage, BaseMessage, @@ -328,7 +331,7 @@ export class ChatMinimax } lc_serializable = true; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; minimaxGroupId?: string; diff --git a/libs/langchain-community/src/chat_models/ollama.ts b/libs/langchain-community/src/chat_models/ollama.ts index a7a1c8cda3b7..aaab895ea998 100644 --- a/libs/langchain-community/src/chat_models/ollama.ts +++ b/libs/langchain-community/src/chat_models/ollama.ts @@ -1,4 +1,7 @@ -import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models"; +import { + SimpleChatModel, + type BaseChatModelParams, +} from "@langchain/core/language_models/chat_models"; import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { @@ -55,7 +58,7 @@ export class ChatOllama } lc_serializable = true; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; model = "llama2"; diff --git a/libs/langchain-community/src/chat_models/portkey.ts b/libs/langchain-community/src/chat_models/portkey.ts index e026a5a16ded..ceb6d6996e12 100644 --- a/libs/langchain-community/src/chat_models/portkey.ts +++ b/libs/langchain-community/src/chat_models/portkey.ts @@ -12,10 +12,10 @@ import { SystemMessage, SystemMessageChunk, } from "@langchain/core/messages"; -import { +import { ChatResult, ChatGeneration, - ChatGenerationChunk + ChatGenerationChunk, } from "@langchain/core/outputs"; import { BaseChatModel } from "@langchain/core/language_models/chat_models"; @@ -80,7 +80,7 @@ export class PortkeyChat extends BaseChatModel { llms?: [LLMOptions] | null = undefined; session: PortkeySession; - + lc_namespace = ["langchain-community", "chat_models", this._llmType()]; constructor(init?: Partial) { diff --git a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts index 8c15dabc7393..228f2c8f7865 100644 --- a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts @@ -1,6 +1,6 @@ import { test, expect } from "@jest/globals"; -import { ChatBaiduWenxin } from "../baiduwenxin.js"; import { SystemMessage, HumanMessage } from "@langchain/core/messages"; +import { ChatBaiduWenxin } from "../baiduwenxin.js"; interface TestConfig { modelName: string | undefined; diff --git a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts index 2b218cd746e2..7415d67b9f45 100644 --- a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts +++ b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts @@ -2,8 +2,8 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { test, expect } from "@jest/globals"; -import { BedrockChat } from "../bedrock/web.js"; import { HumanMessage } from "@langchain/core/messages"; +import { BedrockChat } from "../bedrock/web.js"; // void testChatModel( // "Test Bedrock chat model: Llama2 13B v1", diff --git a/libs/langchain-community/src/chat_models/yandex.ts b/libs/langchain-community/src/chat_models/yandex.ts index 459bd0d6dd4e..dd982510609e 100644 --- a/libs/langchain-community/src/chat_models/yandex.ts +++ b/libs/langchain-community/src/chat_models/yandex.ts @@ -1,8 +1,5 @@ import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; -import { - AIMessage, - BaseMessage -} from "@langchain/core/messages"; +import { AIMessage, BaseMessage } from "@langchain/core/messages"; import { ChatResult, ChatGeneration } from "@langchain/core/outputs"; import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; diff --git a/libs/langchain-community/src/index.ts b/libs/langchain-community/src/index.ts new file mode 100644 index 000000000000..23ca3db4bfe4 --- /dev/null +++ b/libs/langchain-community/src/index.ts @@ -0,0 +1 @@ +// Empty \ No newline at end of file diff --git a/libs/langchain-community/src/llms/aleph_alpha.ts b/libs/langchain-community/src/llms/aleph_alpha.ts index c3731ecd1dd8..f53e0f19484c 100644 --- a/libs/langchain-community/src/llms/aleph_alpha.ts +++ b/libs/langchain-community/src/llms/aleph_alpha.ts @@ -50,9 +50,9 @@ export interface AlephAlphaInput extends BaseLLMParams { */ export class AlephAlpha extends LLM implements AlephAlphaInput { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + model = "luminous-base"; maximum_tokens = 64; diff --git a/libs/langchain-community/src/llms/bedrock/web.ts b/libs/langchain-community/src/llms/bedrock/web.ts index 9f9d280e6967..a5d56edd36e0 100644 --- a/libs/langchain-community/src/llms/bedrock/web.ts +++ b/libs/langchain-community/src/llms/bedrock/web.ts @@ -51,7 +51,7 @@ export class Bedrock extends LLM implements BaseBedrockInput { streaming = false; lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; get lc_aliases(): Record { diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts index bba69550aa2b..fe9b878ef26f 100644 --- a/libs/langchain-community/src/llms/cloudflare_workersai.ts +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -40,7 +40,7 @@ export class CloudflareWorkersAI } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; constructor(fields?: CloudflareWorkersAIInput & BaseLLMParams) { diff --git a/libs/langchain-community/src/llms/cohere.ts b/libs/langchain-community/src/llms/cohere.ts index 234b98fb8663..2f9e86d573d3 100644 --- a/libs/langchain-community/src/llms/cohere.ts +++ b/libs/langchain-community/src/llms/cohere.ts @@ -54,7 +54,7 @@ export class Cohere extends LLM implements CohereInput { } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; temperature = 0; diff --git a/libs/langchain-community/src/llms/fireworks.ts b/libs/langchain-community/src/llms/fireworks.ts index e2d82bb2bd41..ae53c84eb693 100644 --- a/libs/langchain-community/src/llms/fireworks.ts +++ b/libs/langchain-community/src/llms/fireworks.ts @@ -1,4 +1,10 @@ -import { type OpenAIClient, type OpenAICallOptions, type OpenAIInput, type OpenAICoreRequestOptions, OpenAI } from "@langchain/openai"; +import { + type OpenAIClient, + type OpenAICallOptions, + type OpenAIInput, + type OpenAICoreRequestOptions, + OpenAI, +} from "@langchain/openai"; import type { BaseLLMParams } from "@langchain/core/language_models/llms"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; @@ -39,7 +45,7 @@ export class Fireworks extends OpenAI { } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; fireworksApiKey?: string; diff --git a/libs/langchain-community/src/llms/googlepalm.ts b/libs/langchain-community/src/llms/googlepalm.ts index 82c1f9249402..626a0902b54c 100644 --- a/libs/langchain-community/src/llms/googlepalm.ts +++ b/libs/langchain-community/src/llms/googlepalm.ts @@ -85,9 +85,9 @@ export interface GooglePaLMTextInput extends BaseLLMParams { */ export class GooglePaLM extends LLM implements GooglePaLMTextInput { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "GOOGLE_PALM_API_KEY", diff --git a/libs/langchain-community/src/llms/googlevertexai/common.ts b/libs/langchain-community/src/llms/googlevertexai/common.ts index 00dc82ca6eed..58ab3007ac94 100644 --- a/libs/langchain-community/src/llms/googlevertexai/common.ts +++ b/libs/langchain-community/src/llms/googlevertexai/common.ts @@ -1,5 +1,9 @@ import { BaseLLM } from "@langchain/core/language_models/llms"; -import { Generation, GenerationChunk, LLMResult } from "@langchain/core/outputs"; +import { + Generation, + GenerationChunk, + LLMResult, +} from "@langchain/core/outputs"; import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; @@ -56,7 +60,7 @@ export class BaseGoogleVertexAI implements GoogleVertexAIBaseLLMInput { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; model = "text-bison"; diff --git a/libs/langchain-community/src/llms/gradient_ai.ts b/libs/langchain-community/src/llms/gradient_ai.ts index a315ae21a223..028df1ff87ae 100644 --- a/libs/langchain-community/src/llms/gradient_ai.ts +++ b/libs/langchain-community/src/llms/gradient_ai.ts @@ -1,5 +1,9 @@ import { Gradient } from "@gradientai/nodejs-sdk"; -import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { + type BaseLLMCallOptions, + type BaseLLMParams, + LLM, +} from "@langchain/core/language_models/llms"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** @@ -56,9 +60,9 @@ export class GradientLLM extends LLM { workspaceId?: string; inferenceParameters?: Record; - + lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; // Gradient AI does not export the BaseModel type. Once it does, we can use it here. diff --git a/libs/langchain-community/src/llms/hf.ts b/libs/langchain-community/src/llms/hf.ts index 7f5020ed06af..d2833193065b 100644 --- a/libs/langchain-community/src/llms/hf.ts +++ b/libs/langchain-community/src/llms/hf.ts @@ -57,9 +57,9 @@ export interface HFInput { */ export class HuggingFaceInference extends LLM implements HFInput { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "HUGGINGFACEHUB_API_KEY", diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts index 55085ddeec6f..d5877fd34588 100644 --- a/libs/langchain-community/src/llms/llama_cpp.ts +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -1,5 +1,9 @@ import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"; -import { LLM, type BaseLLMCallOptions, type BaseLLMParams } from "@langchain/core/language_models/llms"; +import { + LLM, + type BaseLLMCallOptions, + type BaseLLMParams, +} from "@langchain/core/language_models/llms"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { GenerationChunk } from "@langchain/core/outputs"; @@ -31,9 +35,9 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions { */ export class LlamaCpp extends LLM { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + declare CallOptions: LlamaCppCallOptions; static inputs: LlamaCppInputs; diff --git a/libs/langchain-community/src/llms/ollama.ts b/libs/langchain-community/src/llms/ollama.ts index 6e857bbec0d1..31fb52e6b100 100644 --- a/libs/langchain-community/src/llms/ollama.ts +++ b/libs/langchain-community/src/llms/ollama.ts @@ -38,7 +38,7 @@ export class Ollama extends LLM implements OllamaInput { } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; model = "llama2"; diff --git a/libs/langchain-community/src/llms/portkey.ts b/libs/langchain-community/src/llms/portkey.ts index 3eb6be719fc9..f2158c1eb720 100644 --- a/libs/langchain-community/src/llms/portkey.ts +++ b/libs/langchain-community/src/llms/portkey.ts @@ -96,7 +96,7 @@ export class Portkey extends BaseLLM { llms?: [LLMOptions] | null = undefined; session: PortkeySession; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; constructor(init?: Partial) { diff --git a/libs/langchain-community/src/llms/raycast.ts b/libs/langchain-community/src/llms/raycast.ts index 257298079b5b..f2bcee6b85d5 100644 --- a/libs/langchain-community/src/llms/raycast.ts +++ b/libs/langchain-community/src/llms/raycast.ts @@ -20,9 +20,9 @@ const wait = (ms: number) => */ export class RaycastAI extends LLM implements RaycastAIInput { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + /** * The model to use for generating text. */ diff --git a/libs/langchain-community/src/llms/replicate.ts b/libs/langchain-community/src/llms/replicate.ts index 80df0d2bc211..7656993e1b49 100644 --- a/libs/langchain-community/src/llms/replicate.ts +++ b/libs/langchain-community/src/llms/replicate.ts @@ -50,7 +50,7 @@ export class Replicate extends LLM implements ReplicateInput { } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; model: ReplicateInput["model"]; diff --git a/libs/langchain-community/src/llms/sagemaker_endpoint.ts b/libs/langchain-community/src/llms/sagemaker_endpoint.ts index a8f63140d6f5..10aa46484a19 100644 --- a/libs/langchain-community/src/llms/sagemaker_endpoint.ts +++ b/libs/langchain-community/src/llms/sagemaker_endpoint.ts @@ -6,7 +6,11 @@ import { } from "@aws-sdk/client-sagemaker-runtime"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { GenerationChunk } from "@langchain/core/outputs"; -import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { + type BaseLLMCallOptions, + type BaseLLMParams, + LLM, +} from "@langchain/core/language_models/llms"; /** * A handler class to transform input from LLM to a format that SageMaker @@ -107,9 +111,9 @@ export interface SageMakerEndpointInput extends BaseLLMParams { */ export class SageMakerEndpoint extends LLM { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + static lc_name() { return "SageMakerEndpoint"; } diff --git a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts index 802ccf659970..e43b953cee0c 100644 --- a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts +++ b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts @@ -1,6 +1,6 @@ import { test } from "@jest/globals"; -import { CloudflareWorkersAI } from "../cloudflare_workersai.js"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; +import { CloudflareWorkersAI } from "../cloudflare_workersai.js"; test("Test CloudflareWorkersAI", async () => { const model = new CloudflareWorkersAI({}); diff --git a/libs/langchain-community/src/llms/tests/ollama.int.test.ts b/libs/langchain-community/src/llms/tests/ollama.int.test.ts index 13688fe85b22..b4c3d66118b4 100644 --- a/libs/langchain-community/src/llms/tests/ollama.int.test.ts +++ b/libs/langchain-community/src/llms/tests/ollama.int.test.ts @@ -1,10 +1,10 @@ import { test } from "@jest/globals"; -import { Ollama } from "../ollama.js"; import { PromptTemplate } from "@langchain/core/prompts"; import { BytesOutputParser, StringOutputParser, } from "@langchain/core/output_parsers"; +import { Ollama } from "../ollama.js"; test.skip("test call", async () => { const ollama = new Ollama({}); diff --git a/libs/langchain-community/src/llms/watsonx_ai.ts b/libs/langchain-community/src/llms/watsonx_ai.ts index 7abf9585f820..bf64a470be3a 100644 --- a/libs/langchain-community/src/llms/watsonx_ai.ts +++ b/libs/langchain-community/src/llms/watsonx_ai.ts @@ -1,4 +1,8 @@ -import { type BaseLLMCallOptions, type BaseLLMParams, LLM } from "@langchain/core/language_models/llms"; +import { + type BaseLLMCallOptions, + type BaseLLMParams, + LLM, +} from "@langchain/core/language_models/llms"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** @@ -53,9 +57,9 @@ const endpointConstructor = (region: string, version: string) => export class WatsonxAI extends LLM { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + static lc_name() { return "WatsonxAI"; } diff --git a/libs/langchain-community/src/llms/writer.ts b/libs/langchain-community/src/llms/writer.ts index 8404cfc2088e..aa7a7a63b9c0 100644 --- a/libs/langchain-community/src/llms/writer.ts +++ b/libs/langchain-community/src/llms/writer.ts @@ -68,7 +68,7 @@ export class Writer extends LLM implements WriterInput { } lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; apiKey: string; diff --git a/libs/langchain-community/src/llms/yandex.ts b/libs/langchain-community/src/llms/yandex.ts index a98157f70848..6482035aa128 100644 --- a/libs/langchain-community/src/llms/yandex.ts +++ b/libs/langchain-community/src/llms/yandex.ts @@ -34,9 +34,9 @@ export interface YandexGPTInputs extends BaseLLMParams { export class YandexGPT extends LLM implements YandexGPTInputs { lc_serializable = true; - + lc_namespace = ["langchain-community", "llms", this._llmType()]; - + static lc_name() { return "Yandex GPT"; } diff --git a/libs/langchain-community/src/retrievers/chaindesk.ts b/libs/langchain-community/src/retrievers/chaindesk.ts index 317006c8f29d..08d6cd946fa1 100644 --- a/libs/langchain-community/src/retrievers/chaindesk.ts +++ b/libs/langchain-community/src/retrievers/chaindesk.ts @@ -1,6 +1,12 @@ -import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { + BaseRetriever, + type BaseRetrieverInput, +} from "@langchain/core/retrievers"; import { Document } from "@langchain/core/documents"; -import { AsyncCaller, type AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { + AsyncCaller, + type AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; export interface ChaindeskRetrieverArgs extends AsyncCallerParams, diff --git a/libs/langchain-community/src/retrievers/databerry.ts b/libs/langchain-community/src/retrievers/databerry.ts index c38fbf5816e3..6b34541121cd 100644 --- a/libs/langchain-community/src/retrievers/databerry.ts +++ b/libs/langchain-community/src/retrievers/databerry.ts @@ -1,6 +1,12 @@ -import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { + BaseRetriever, + type BaseRetrieverInput, +} from "@langchain/core/retrievers"; import { Document } from "@langchain/core/documents"; -import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { + AsyncCaller, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; /** * Interface for the arguments required to create a new instance of diff --git a/libs/langchain-community/src/retrievers/supabase.ts b/libs/langchain-community/src/retrievers/supabase.ts index ba4f66bff283..022bfc339570 100644 --- a/libs/langchain-community/src/retrievers/supabase.ts +++ b/libs/langchain-community/src/retrievers/supabase.ts @@ -1,7 +1,10 @@ import type { SupabaseClient } from "@supabase/supabase-js"; import { Embeddings } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; -import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { + BaseRetriever, + type BaseRetrieverInput, +} from "@langchain/core/retrievers"; import { CallbackManagerForRetrieverRun, Callbacks, diff --git a/libs/langchain-community/src/retrievers/tavily_search_api.ts b/libs/langchain-community/src/retrievers/tavily_search_api.ts index d8ba3e112cc7..86d22ce1d048 100644 --- a/libs/langchain-community/src/retrievers/tavily_search_api.ts +++ b/libs/langchain-community/src/retrievers/tavily_search_api.ts @@ -1,6 +1,9 @@ import { Document } from "@langchain/core/documents"; import { CallbackManagerForRetrieverRun } from "@langchain/core/callbacks/manager"; -import { BaseRetriever, type BaseRetrieverInput } from "@langchain/core/retrievers"; +import { + BaseRetriever, + type BaseRetrieverInput, +} from "@langchain/core/retrievers"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** diff --git a/libs/langchain-community/src/tools/aiplugin.ts b/libs/langchain-community/src/tools/aiplugin.ts index 5e50f4309c0e..394d557df7ff 100644 --- a/libs/langchain-community/src/tools/aiplugin.ts +++ b/libs/langchain-community/src/tools/aiplugin.ts @@ -18,9 +18,9 @@ export class AIPluginTool extends Tool implements AIPluginToolParams { static lc_name() { return "AIPluginTool"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } private _name: string; diff --git a/libs/langchain-community/src/tools/aws_sfn.ts b/libs/langchain-community/src/tools/aws_sfn.ts index 3c11430cf28d..4af70e7d784b 100644 --- a/libs/langchain-community/src/tools/aws_sfn.ts +++ b/libs/langchain-community/src/tools/aws_sfn.ts @@ -41,9 +41,9 @@ export class StartExecutionAWSSfnTool extends Tool { public name: string; public description: string; - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } constructor({ diff --git a/libs/langchain-community/src/tools/bingserpapi.ts b/libs/langchain-community/src/tools/bingserpapi.ts index 5c0bec62f6a1..734994c42fab 100644 --- a/libs/langchain-community/src/tools/bingserpapi.ts +++ b/libs/langchain-community/src/tools/bingserpapi.ts @@ -12,9 +12,9 @@ class BingSerpAPI extends Tool { static lc_name() { return "BingSerpAPI"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/brave_search.ts b/libs/langchain-community/src/tools/brave_search.ts index 10e1380ff365..ebd42496bf9e 100644 --- a/libs/langchain-community/src/tools/brave_search.ts +++ b/libs/langchain-community/src/tools/brave_search.ts @@ -19,9 +19,9 @@ export class BraveSearch extends Tool { static lc_name() { return "BraveSearch"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "brave-search"; diff --git a/libs/langchain-community/src/tools/connery.ts b/libs/langchain-community/src/tools/connery.ts index 86f28093414c..307da0cfc4ca 100644 --- a/libs/langchain-community/src/tools/connery.ts +++ b/libs/langchain-community/src/tools/connery.ts @@ -1,4 +1,7 @@ -import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { + AsyncCaller, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Tool } from "@langchain/core/tools"; @@ -66,7 +69,7 @@ type RunActionResult = { */ export class ConneryAction extends Tool { get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name: string; diff --git a/libs/langchain-community/src/tools/dadjokeapi.ts b/libs/langchain-community/src/tools/dadjokeapi.ts index c367ad8ac481..a8c970267449 100644 --- a/libs/langchain-community/src/tools/dadjokeapi.ts +++ b/libs/langchain-community/src/tools/dadjokeapi.ts @@ -12,7 +12,7 @@ class DadJokeAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "dadjoke"; diff --git a/libs/langchain-community/src/tools/dataforseo_api_search.ts b/libs/langchain-community/src/tools/dataforseo_api_search.ts index 450da1eecc4b..05d3d7934aa2 100644 --- a/libs/langchain-community/src/tools/dataforseo_api_search.ts +++ b/libs/langchain-community/src/tools/dataforseo_api_search.ts @@ -100,9 +100,9 @@ export class DataForSeoAPISearch extends Tool { static lc_name() { return "DataForSeoAPISearch"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "dataforseo-api-wrapper"; diff --git a/libs/langchain-community/src/tools/gmail/base.ts b/libs/langchain-community/src/tools/gmail/base.ts index eedf3776bc2f..5009768fcbb1 100644 --- a/libs/langchain-community/src/tools/gmail/base.ts +++ b/libs/langchain-community/src/tools/gmail/base.ts @@ -45,9 +45,9 @@ export abstract class GmailBaseTool extends StructuredTool { name = "Gmail"; description = "A tool to send and view emails through Gmail"; - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } protected gmail: gmail_v1.Gmail; diff --git a/libs/langchain-community/src/tools/google_custom_search.ts b/libs/langchain-community/src/tools/google_custom_search.ts index 47b5ab6c024c..4c4ba2ea8cbc 100644 --- a/libs/langchain-community/src/tools/google_custom_search.ts +++ b/libs/langchain-community/src/tools/google_custom_search.ts @@ -24,9 +24,9 @@ export class GoogleCustomSearch extends Tool { apiKey: "GOOGLE_API_KEY", }; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "google-custom-search"; diff --git a/libs/langchain-community/src/tools/google_places.ts b/libs/langchain-community/src/tools/google_places.ts index 849e812dde95..ecf4aebbbcf3 100644 --- a/libs/langchain-community/src/tools/google_places.ts +++ b/libs/langchain-community/src/tools/google_places.ts @@ -15,9 +15,9 @@ export class GooglePlacesAPI extends Tool { static lc_name() { return "GooglePlacesAPI"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } get lc_secrets(): { [key: string]: string } | undefined { diff --git a/libs/langchain-community/src/tools/ifttt.ts b/libs/langchain-community/src/tools/ifttt.ts index 72ea904f048b..231f3d97b32a 100644 --- a/libs/langchain-community/src/tools/ifttt.ts +++ b/libs/langchain-community/src/tools/ifttt.ts @@ -44,9 +44,9 @@ export class IFTTTWebhook extends Tool { static lc_name() { return "IFTTTWebhook"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } private url: string; diff --git a/libs/langchain-community/src/tools/searchapi.ts b/libs/langchain-community/src/tools/searchapi.ts index 201354730299..b404e686fe7b 100644 --- a/libs/langchain-community/src/tools/searchapi.ts +++ b/libs/langchain-community/src/tools/searchapi.ts @@ -66,9 +66,9 @@ export class SearchApi extends Tool { static lc_name() { return "SearchApi"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/searxng_search.ts b/libs/langchain-community/src/tools/searxng_search.ts index 7f2a404bfee6..6be674fef1ce 100644 --- a/libs/langchain-community/src/tools/searxng_search.ts +++ b/libs/langchain-community/src/tools/searxng_search.ts @@ -117,7 +117,7 @@ export class SearxngSearch extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "searxng-search"; diff --git a/libs/langchain-community/src/tools/serpapi.ts b/libs/langchain-community/src/tools/serpapi.ts index 635afa8bb4a1..06ac92d619ba 100644 --- a/libs/langchain-community/src/tools/serpapi.ts +++ b/libs/langchain-community/src/tools/serpapi.ts @@ -295,7 +295,7 @@ export class SerpAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } toJSON() { diff --git a/libs/langchain-community/src/tools/serper.ts b/libs/langchain-community/src/tools/serper.ts index ad9d38b4184a..5c9327938ec2 100644 --- a/libs/langchain-community/src/tools/serper.ts +++ b/libs/langchain-community/src/tools/serper.ts @@ -21,9 +21,9 @@ export class Serper extends Tool { static lc_name() { return "Serper"; } - + get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/wikipedia_query_run.ts b/libs/langchain-community/src/tools/wikipedia_query_run.ts index 0d8f145fafe3..9106989cf381 100644 --- a/libs/langchain-community/src/tools/wikipedia_query_run.ts +++ b/libs/langchain-community/src/tools/wikipedia_query_run.ts @@ -70,7 +70,7 @@ export class WikipediaQueryRun extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } name = "wikipedia-api"; diff --git a/libs/langchain-community/src/tools/wolframalpha.ts b/libs/langchain-community/src/tools/wolframalpha.ts index 730087e22c59..2e5390a834f0 100644 --- a/libs/langchain-community/src/tools/wolframalpha.ts +++ b/libs/langchain-community/src/tools/wolframalpha.ts @@ -23,7 +23,7 @@ export class WolframAlphaTool extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain-community", "tools"]; } static lc_name() { diff --git a/libs/langchain-community/src/types/openai-types.ts b/libs/langchain-community/src/types/openai-types.ts deleted file mode 100644 index cf4aa3f80665..000000000000 --- a/libs/langchain-community/src/types/openai-types.ts +++ /dev/null @@ -1,172 +0,0 @@ -import type { OpenAIClient } from "@langchain/openai"; - -import { TiktokenModel } from "js-tiktoken/lite"; -import { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; - -// reexport this type from the included package so we can easily override and extend it if needed in the future -// also makes it easier for folks to import this type without digging around into the dependent packages -export type { TiktokenModel }; - -export declare interface OpenAIBaseInput { - /** Sampling temperature to use */ - temperature: number; - - /** - * Maximum number of tokens to generate in the completion. -1 returns as many - * tokens as possible given the prompt and the model's maximum context size. - */ - maxTokens?: number; - - /** Total probability mass of tokens to consider at each step */ - topP: number; - - /** Penalizes repeated tokens according to frequency */ - frequencyPenalty: number; - - /** Penalizes repeated tokens */ - presencePenalty: number; - - /** Number of completions to generate for each prompt */ - n: number; - - /** Dictionary used to adjust the probability of specific tokens being generated */ - logitBias?: Record; - - /** Unique string identifier representing your end-user, which can help OpenAI to monitor and detect abuse. */ - user?: string; - - /** Whether to stream the results or not. Enabling disables tokenUsage reporting */ - streaming: boolean; - - /** Model name to use */ - modelName: string; - - /** Holds any additional parameters that are valid to pass to {@link - * https://platform.openai.com/docs/api-reference/completions/create | - * `openai.createCompletion`} that are not explicitly specified on this class. - */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - modelKwargs?: Record; - - /** List of stop words to use when generating */ - stop?: string[]; - - /** - * Timeout to use when making requests to OpenAI. - */ - timeout?: number; - - /** - * API key to use when making requests to OpenAI. Defaults to the value of - * `OPENAI_API_KEY` environment variable. - */ - openAIApiKey?: string; -} - -// TODO use OpenAI.Core.RequestOptions when SDK is updated to make it available -export type OpenAICoreRequestOptions< - Req extends object = Record -> = { - path?: string; - query?: Req | undefined; - body?: Req | undefined; - headers?: Record | undefined; - - maxRetries?: number; - stream?: boolean | undefined; - timeout?: number; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - httpAgent?: any; - signal?: AbortSignal | undefined | null; - idempotencyKey?: string; -}; - -export interface OpenAICallOptions extends BaseLanguageModelCallOptions { - /** - * Additional options to pass to the underlying axios request. - */ - options?: OpenAICoreRequestOptions; -} - -/** - * Input to OpenAI class. - */ -export declare interface OpenAIInput extends OpenAIBaseInput { - /** Generates `bestOf` completions server side and returns the "best" */ - bestOf?: number; - - /** Batch size to use when passing multiple documents to generate */ - batchSize: number; -} - -/** - * @deprecated Use "baseURL", "defaultHeaders", and "defaultParams" instead. - */ -export interface LegacyOpenAIInput { - /** @deprecated Use baseURL instead */ - basePath?: string; - /** @deprecated Use defaultHeaders and defaultQuery instead */ - baseOptions?: { - headers?: Record; - params?: Record; - }; -} - -export interface OpenAIChatInput extends OpenAIBaseInput { - /** ChatGPT messages to pass as a prefix to the prompt */ - prefixMessages?: OpenAIClient.Chat.CreateChatCompletionRequestMessage[]; -} - -export declare interface AzureOpenAIInput { - /** - * API version to use when making requests to Azure OpenAI. - */ - azureOpenAIApiVersion?: string; - - /** - * API key to use when making requests to Azure OpenAI. - */ - azureOpenAIApiKey?: string; - - /** - * Azure OpenAI API instance name to use when making requests to Azure OpenAI. - * this is the name of the instance you created in the Azure portal. - * e.g. "my-openai-instance" - * this will be used in the endpoint URL: https://my-openai-instance.openai.azure.com/openai/deployments/{DeploymentName}/ - */ - azureOpenAIApiInstanceName?: string; - - /** - * Azure OpenAI API deployment name to use for completions when making requests to Azure OpenAI. - * This is the name of the deployment you created in the Azure portal. - * e.g. "my-openai-deployment" - * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ - */ - azureOpenAIApiDeploymentName?: string; - - /** - * Azure OpenAI API deployment name to use for embedding when making requests to Azure OpenAI. - * This is the name of the deployment you created in the Azure portal. - * This will fallback to azureOpenAIApiDeploymentName if not provided. - * e.g. "my-openai-deployment" - * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ - */ - azureOpenAIApiEmbeddingsDeploymentName?: string; - - /** - * Azure OpenAI API deployment name to use for completions when making requests to Azure OpenAI. - * Completions are only available for gpt-3.5-turbo and text-davinci-003 deployments. - * This is the name of the deployment you created in the Azure portal. - * This will fallback to azureOpenAIApiDeploymentName if not provided. - * e.g. "my-openai-deployment" - * this will be used in the endpoint URL: https://{InstanceName}.openai.azure.com/openai/deployments/my-openai-deployment/ - */ - azureOpenAIApiCompletionsDeploymentName?: string; - - /** - * Custom endpoint for Azure OpenAI API. This is useful in case you have a deployment in another region. - * e.g. setting this value to "https://westeurope.api.cognitive.microsoft.com/openai/deployments" - * will be result in the endpoint URL: https://westeurope.api.cognitive.microsoft.com/openai/deployments/{DeploymentName}/ - */ - azureOpenAIBasePath?: string; -} diff --git a/libs/langchain-community/src/util/googlevertexai-connection.ts b/libs/langchain-community/src/util/googlevertexai-connection.ts index 96ed10ebca5c..f440abe364ac 100644 --- a/libs/langchain-community/src/util/googlevertexai-connection.ts +++ b/libs/langchain-community/src/util/googlevertexai-connection.ts @@ -1,5 +1,8 @@ import { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base"; -import { AsyncCaller, AsyncCallerCallOptions } from "@langchain/core/utils/async_caller"; +import { + AsyncCaller, + AsyncCallerCallOptions, +} from "@langchain/core/utils/async_caller"; import { GenerationChunk } from "@langchain/core/outputs"; import type { GoogleVertexAIBaseLLMInput, diff --git a/libs/langchain-community/src/vectorstores/cassandra.ts b/libs/langchain-community/src/vectorstores/cassandra.ts index 4a90a38e56cd..85feef551e48 100644 --- a/libs/langchain-community/src/vectorstores/cassandra.ts +++ b/libs/langchain-community/src/vectorstores/cassandra.ts @@ -1,7 +1,10 @@ /* eslint-disable prefer-template */ import { Client as CassandraClient, DseClientOptions } from "cassandra-driver"; -import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { + AsyncCaller, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; diff --git a/libs/langchain-community/src/vectorstores/clickhouse.ts b/libs/langchain-community/src/vectorstores/clickhouse.ts index a74a05d8a49d..4cb9bd127e7c 100644 --- a/libs/langchain-community/src/vectorstores/clickhouse.ts +++ b/libs/langchain-community/src/vectorstores/clickhouse.ts @@ -3,7 +3,7 @@ import { ClickHouseClient, createClient } from "@clickhouse/client"; import { format } from "mysql2"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Arguments for the ClickHouseStore class, which include the host, port, diff --git a/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts index 66da3188608e..b6204f916119 100644 --- a/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts +++ b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts @@ -6,8 +6,11 @@ import { } from "@cloudflare/workers-types"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" -import { AsyncCaller, type AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { Document } from "@langchain/core/documents"; +import { + AsyncCaller, + type AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; import { chunkArray } from "../util/chunk.js"; export interface VectorizeLibArgs extends AsyncCallerParams { diff --git a/libs/langchain-community/src/vectorstores/convex.ts b/libs/langchain-community/src/vectorstores/convex.ts index 0e36b00d52d8..72d3661a8b2c 100644 --- a/libs/langchain-community/src/vectorstores/convex.ts +++ b/libs/langchain-community/src/vectorstores/convex.ts @@ -16,7 +16,7 @@ import { } from "convex/server"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Type that defines the config required to initialize the diff --git a/libs/langchain-community/src/vectorstores/elasticsearch.ts b/libs/langchain-community/src/vectorstores/elasticsearch.ts index d8b10bedb03c..f10369270892 100644 --- a/libs/langchain-community/src/vectorstores/elasticsearch.ts +++ b/libs/langchain-community/src/vectorstores/elasticsearch.ts @@ -2,7 +2,7 @@ import * as uuid from "uuid"; import { Client, estypes } from "@elastic/elasticsearch"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Type representing the k-nearest neighbors (k-NN) engine used in * Elasticsearch. diff --git a/libs/langchain-community/src/vectorstores/lancedb.ts b/libs/langchain-community/src/vectorstores/lancedb.ts index 0da70a879548..86b87a111758 100644 --- a/libs/langchain-community/src/vectorstores/lancedb.ts +++ b/libs/langchain-community/src/vectorstores/lancedb.ts @@ -1,7 +1,7 @@ import { Table } from "vectordb"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Defines the arguments for the LanceDB class constructor. It includes a diff --git a/libs/langchain-community/src/vectorstores/milvus.ts b/libs/langchain-community/src/vectorstores/milvus.ts index e1e33c39ed1a..b9978554da96 100644 --- a/libs/langchain-community/src/vectorstores/milvus.ts +++ b/libs/langchain-community/src/vectorstores/milvus.ts @@ -10,7 +10,7 @@ import { import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** diff --git a/libs/langchain-community/src/vectorstores/myscale.ts b/libs/langchain-community/src/vectorstores/myscale.ts index f288cac4b60e..316505414b57 100644 --- a/libs/langchain-community/src/vectorstores/myscale.ts +++ b/libs/langchain-community/src/vectorstores/myscale.ts @@ -3,7 +3,7 @@ import { ClickHouseClient, createClient } from "@clickhouse/client"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Arguments for the MyScaleStore class, which include the host, port, diff --git a/libs/langchain-community/src/vectorstores/neo4j_vector.ts b/libs/langchain-community/src/vectorstores/neo4j_vector.ts index a2f6482063d8..e35f24fde603 100644 --- a/libs/langchain-community/src/vectorstores/neo4j_vector.ts +++ b/libs/langchain-community/src/vectorstores/neo4j_vector.ts @@ -2,7 +2,7 @@ import neo4j from "neo4j-driver"; import * as uuid from "uuid"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; export type SearchType = "vector" | "hybrid"; diff --git a/libs/langchain-community/src/vectorstores/opensearch.ts b/libs/langchain-community/src/vectorstores/opensearch.ts index 5728298eadd6..0ea90dff456a 100644 --- a/libs/langchain-community/src/vectorstores/opensearch.ts +++ b/libs/langchain-community/src/vectorstores/opensearch.ts @@ -2,7 +2,7 @@ import { Client, RequestParams, errors } from "@opensearch-project/opensearch"; import * as uuid from "uuid"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; type OpenSearchEngine = "nmslib" | "hnsw"; type OpenSearchSpaceType = "l2" | "cosinesimil" | "ip"; diff --git a/libs/langchain-community/src/vectorstores/prisma.ts b/libs/langchain-community/src/vectorstores/prisma.ts index 6e2ec8d8692c..170a41d96367 100644 --- a/libs/langchain-community/src/vectorstores/prisma.ts +++ b/libs/langchain-community/src/vectorstores/prisma.ts @@ -1,6 +1,6 @@ import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { Callbacks } from "@langchain/core/callbacks/manager"; const IdColumnSymbol = Symbol("id"); diff --git a/libs/langchain-community/src/vectorstores/qdrant.ts b/libs/langchain-community/src/vectorstores/qdrant.ts index 86ed978bbc39..21062e1e86b5 100644 --- a/libs/langchain-community/src/vectorstores/qdrant.ts +++ b/libs/langchain-community/src/vectorstores/qdrant.ts @@ -4,7 +4,7 @@ import { v4 as uuid } from "uuid"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** diff --git a/libs/langchain-community/src/vectorstores/redis.ts b/libs/langchain-community/src/vectorstores/redis.ts index 63402c7f72af..5df94f4646b4 100644 --- a/libs/langchain-community/src/vectorstores/redis.ts +++ b/libs/langchain-community/src/vectorstores/redis.ts @@ -7,7 +7,7 @@ import type { import { SchemaFieldTypes, VectorAlgorithms } from "redis"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; // Adapated from internal redis types which aren't exported /** diff --git a/libs/langchain-community/src/vectorstores/rockset.ts b/libs/langchain-community/src/vectorstores/rockset.ts index 2f68213bd592..04a93f4f6689 100644 --- a/libs/langchain-community/src/vectorstores/rockset.ts +++ b/libs/langchain-community/src/vectorstores/rockset.ts @@ -4,7 +4,7 @@ import { Collection } from "@rockset/client/dist/codegen/api.js"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Generic Rockset vector storage error */ diff --git a/libs/langchain-community/src/vectorstores/singlestore.ts b/libs/langchain-community/src/vectorstores/singlestore.ts index 70871e8713ca..34abd51598a0 100644 --- a/libs/langchain-community/src/vectorstores/singlestore.ts +++ b/libs/langchain-community/src/vectorstores/singlestore.ts @@ -10,7 +10,7 @@ import { format } from "mysql2"; import { createPool } from "mysql2/promise"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type Metadata = Record; diff --git a/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts index 2eb210f8fb18..1c4d78c3022f 100644 --- a/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts @@ -1,10 +1,10 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { ClickHouseStore } from "../clickhouse.js"; // Import OpenAIEmbeddings if you have a valid OpenAI API key import { HuggingFaceInferenceEmbeddings } from "../../embeddings/hf.js"; -import { Document } from "@langchain/core/documents"; test.skip("ClickHouseStore.fromText", async () => { const vectorStore = await ClickHouseStore.fromTexts( diff --git a/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts b/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts deleted file mode 100644 index b985d658d049..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts +++ /dev/null @@ -1,43 +0,0 @@ -/* eslint-disable no-process-env */ - -import { ConvexHttpClient } from "convex/browser"; - -import { expect, test } from "@jest/globals"; -// eslint-disable-next-line import/no-relative-packages -import { api } from "./convex/convex/_generated/api.js"; - -// To run these tests at least once, follow these steps: -// -// 1. `cd langchain/src/vectorstores/tests/convex` -// 2. `npx convex dev --once` -// 3. `cd ../../../..` -// 3. `cp src/vectorstores/tests/convex/.env.local .env` -// 4. Add your OpenAI key to `.env` (see `.env.example`) -// 5. `yarn test:single src/vectorstores/tests/convex.int.test.ts` -// -// If you're developing these tests, after you've done the above: -// -// In `langchain/src/vectorstores/tests/convex` run `npx convex dev` -// In `langchain` run `yarn test:single src/vectorstores/tests/convex.int.test.ts` - -describe.skip("Convex Vectorstore", () => { - test("Convex ingest, similaritySearch", async () => { - const client = new ConvexHttpClient(process.env.CONVEX_URL as string); - const openAIApiKey = process.env.OPENAI_API_KEY as string; - - await client.mutation(api.lib.reset); - - await client.action(api.lib.ingest, { - openAIApiKey, - texts: ["Hello world", "Bye bye", "hello nice world"], - metadatas: [{ id: 2 }, { id: 1 }, { id: 3 }], - }); - - const metadatas = await client.action(api.lib.similaritySearch, { - openAIApiKey, - query: "hello world", - }); - - expect(metadatas).toEqual([{ id: 2 }, { id: 3 }, { id: 1 }]); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts deleted file mode 100644 index 0f668d5477a8..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts +++ /dev/null @@ -1,39 +0,0 @@ -/* eslint-disable */ -/** - * Generated `api` utility. - * - * THIS CODE IS AUTOMATICALLY GENERATED. - * - * Generated by convex@1.3.1. - * To regenerate, run `npx convex dev`. - * @module - */ - -import type { - ApiFromModules, - FilterApi, - FunctionReference, -} from "convex/server"; -import type * as langchain_db from "../langchain/db"; -import type * as lib from "../lib"; - -/** - * A utility for referencing Convex functions in your app's API. - * - * Usage: - * ```js - * const myFunctionReference = api.myModule.myFunction; - * ``` - */ -declare const fullApi: ApiFromModules<{ - "langchain/db": typeof langchain_db; - lib: typeof lib; -}>; -export declare const api: FilterApi< - typeof fullApi, - FunctionReference ->; -export declare const internal: FilterApi< - typeof fullApi, - FunctionReference ->; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js deleted file mode 100644 index 3f4ee5427ff9..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js +++ /dev/null @@ -1,23 +0,0 @@ -/* eslint-disable */ -/** - * Generated `api` utility. - * - * THIS CODE IS AUTOMATICALLY GENERATED. - * - * Generated by convex@1.3.1. - * To regenerate, run `npx convex dev`. - * @module - */ - -import { anyApi } from "convex/server"; - -/** - * A utility for referencing Convex functions in your app's API. - * - * Usage: - * ```js - * const myFunctionReference = api.myModule.myFunction; - * ``` - */ -export const api = anyApi; -export const internal = anyApi; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts deleted file mode 100644 index 68d087faa690..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts +++ /dev/null @@ -1,56 +0,0 @@ -/* eslint-disable */ -/** - * Generated data model types. - * - * THIS CODE IS AUTOMATICALLY GENERATED. - * - * Generated by convex@1.3.1. - * To regenerate, run `npx convex dev`. - * @module - */ - -import type { DataModelFromSchemaDefinition } from "convex/server"; -import type { DocumentByName, TableNamesInDataModel } from "convex/server"; -import type { GenericId } from "convex/values"; -import schema from "../schema"; - -/** - * The names of all of your Convex tables. - */ -export type TableNames = TableNamesInDataModel; - -/** - * The type of a document stored in Convex. - * - * @typeParam TableName - A string literal type of the table name (like "users"). - */ -export type Doc = DocumentByName< - DataModel, - TableName ->; - -/** - * An identifier for a document in Convex. - * - * Convex documents are uniquely identified by their `Id`, which is accessible - * on the `_id` field. To learn more, see [Document IDs](https://docs.convex.dev/using/document-ids). - * - * Documents can be loaded using `db.get(id)` in query and mutation functions. - * - * IDs are just strings at runtime, but this type can be used to distinguish them from other - * strings when type checking. - * - * @typeParam TableName - A string literal type of the table name (like "users"). - */ -export type Id = GenericId; - -/** - * A type describing your Convex data model. - * - * This type includes information about what tables you have, the type of - * documents stored in those tables, and the indexes defined on them. - * - * This type is used to parameterize methods like `queryGeneric` and - * `mutationGeneric` to make them type-safe. - */ -export type DataModel = DataModelFromSchemaDefinition; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts deleted file mode 100644 index 729d94c3989b..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts +++ /dev/null @@ -1,143 +0,0 @@ -/* eslint-disable */ -/** - * Generated utilities for implementing server-side Convex query and mutation functions. - * - * THIS CODE IS AUTOMATICALLY GENERATED. - * - * Generated by convex@1.3.1. - * To regenerate, run `npx convex dev`. - * @module - */ - -import { - ActionBuilder, - HttpActionBuilder, - MutationBuilder, - QueryBuilder, - GenericActionCtx, - GenericMutationCtx, - GenericQueryCtx, - GenericDatabaseReader, - GenericDatabaseWriter, -} from "convex/server"; -import type { DataModel } from "./dataModel.js"; - -/** - * Define a query in this Convex app's public API. - * - * This function will be allowed to read your Convex database and will be accessible from the client. - * - * @param func - The query function. It receives a {@link QueryCtx} as its first argument. - * @returns The wrapped query. Include this as an `export` to name it and make it accessible. - */ -export declare const query: QueryBuilder; - -/** - * Define a query that is only accessible from other Convex functions (but not from the client). - * - * This function will be allowed to read from your Convex database. It will not be accessible from the client. - * - * @param func - The query function. It receives a {@link QueryCtx} as its first argument. - * @returns The wrapped query. Include this as an `export` to name it and make it accessible. - */ -export declare const internalQuery: QueryBuilder; - -/** - * Define a mutation in this Convex app's public API. - * - * This function will be allowed to modify your Convex database and will be accessible from the client. - * - * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. - * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. - */ -export declare const mutation: MutationBuilder; - -/** - * Define a mutation that is only accessible from other Convex functions (but not from the client). - * - * This function will be allowed to modify your Convex database. It will not be accessible from the client. - * - * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. - * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. - */ -export declare const internalMutation: MutationBuilder; - -/** - * Define an action in this Convex app's public API. - * - * An action is a function which can execute any JavaScript code, including non-deterministic - * code and code with side-effects, like calling third-party services. - * They can be run in Convex's JavaScript environment or in Node.js using the "use node" directive. - * They can interact with the database indirectly by calling queries and mutations using the {@link ActionCtx}. - * - * @param func - The action. It receives an {@link ActionCtx} as its first argument. - * @returns The wrapped action. Include this as an `export` to name it and make it accessible. - */ -export declare const action: ActionBuilder; - -/** - * Define an action that is only accessible from other Convex functions (but not from the client). - * - * @param func - The function. It receives an {@link ActionCtx} as its first argument. - * @returns The wrapped function. Include this as an `export` to name it and make it accessible. - */ -export declare const internalAction: ActionBuilder; - -/** - * Define an HTTP action. - * - * This function will be used to respond to HTTP requests received by a Convex - * deployment if the requests matches the path and method where this action - * is routed. Be sure to route your action in `convex/http.js`. - * - * @param func - The function. It receives an {@link ActionCtx} as its first argument. - * @returns The wrapped function. Import this function from `convex/http.js` and route it to hook it up. - */ -export declare const httpAction: HttpActionBuilder; - -/** - * A set of services for use within Convex query functions. - * - * The query context is passed as the first argument to any Convex query - * function run on the server. - * - * This differs from the {@link MutationCtx} because all of the services are - * read-only. - */ -export type QueryCtx = GenericQueryCtx; - -/** - * A set of services for use within Convex mutation functions. - * - * The mutation context is passed as the first argument to any Convex mutation - * function run on the server. - */ -export type MutationCtx = GenericMutationCtx; - -/** - * A set of services for use within Convex action functions. - * - * The action context is passed as the first argument to any Convex action - * function run on the server. - */ -export type ActionCtx = GenericActionCtx; - -/** - * An interface to read from the database within Convex query functions. - * - * The two entry points are {@link DatabaseReader.get}, which fetches a single - * document by its {@link Id}, or {@link DatabaseReader.query}, which starts - * building a query. - */ -export type DatabaseReader = GenericDatabaseReader; - -/** - * An interface to read from and write to the database within Convex mutation - * functions. - * - * Convex guarantees that all writes within a single mutation are - * executed atomically, so you never have to worry about partial writes leaving - * your data in an inconsistent state. See [the Convex Guide](https://docs.convex.dev/understanding/convex-fundamentals/functions#atomicity-and-optimistic-concurrency-control) - * for the guarantees Convex provides your functions. - */ -export type DatabaseWriter = GenericDatabaseWriter; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js deleted file mode 100644 index fa2cb37267d2..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js +++ /dev/null @@ -1,90 +0,0 @@ -/* eslint-disable */ -/** - * Generated utilities for implementing server-side Convex query and mutation functions. - * - * THIS CODE IS AUTOMATICALLY GENERATED. - * - * Generated by convex@1.3.1. - * To regenerate, run `npx convex dev`. - * @module - */ - -import { - actionGeneric, - httpActionGeneric, - queryGeneric, - mutationGeneric, - internalActionGeneric, - internalMutationGeneric, - internalQueryGeneric, -} from "convex/server"; - -/** - * Define a query in this Convex app's public API. - * - * This function will be allowed to read your Convex database and will be accessible from the client. - * - * @param func - The query function. It receives a {@link QueryCtx} as its first argument. - * @returns The wrapped query. Include this as an `export` to name it and make it accessible. - */ -export const query = queryGeneric; - -/** - * Define a query that is only accessible from other Convex functions (but not from the client). - * - * This function will be allowed to read from your Convex database. It will not be accessible from the client. - * - * @param func - The query function. It receives a {@link QueryCtx} as its first argument. - * @returns The wrapped query. Include this as an `export` to name it and make it accessible. - */ -export const internalQuery = internalQueryGeneric; - -/** - * Define a mutation in this Convex app's public API. - * - * This function will be allowed to modify your Convex database and will be accessible from the client. - * - * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. - * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. - */ -export const mutation = mutationGeneric; - -/** - * Define a mutation that is only accessible from other Convex functions (but not from the client). - * - * This function will be allowed to modify your Convex database. It will not be accessible from the client. - * - * @param func - The mutation function. It receives a {@link MutationCtx} as its first argument. - * @returns The wrapped mutation. Include this as an `export` to name it and make it accessible. - */ -export const internalMutation = internalMutationGeneric; - -/** - * Define an action in this Convex app's public API. - * - * An action is a function which can execute any JavaScript code, including non-deterministic - * code and code with side-effects, like calling third-party services. - * They can be run in Convex's JavaScript environment or in Node.js using the "use node" directive. - * They can interact with the database indirectly by calling queries and mutations using the {@link ActionCtx}. - * - * @param func - The action. It receives an {@link ActionCtx} as its first argument. - * @returns The wrapped action. Include this as an `export` to name it and make it accessible. - */ -export const action = actionGeneric; - -/** - * Define an action that is only accessible from other Convex functions (but not from the client). - * - * @param func - The function. It receives an {@link ActionCtx} as its first argument. - * @returns The wrapped function. Include this as an `export` to name it and make it accessible. - */ -export const internalAction = internalActionGeneric; - -/** - * Define a Convex HTTP action. - * - * @param func - The function. It receives an {@link ActionCtx} as its first argument, and a `Request` object - * as its second. - * @returns The wrapped endpoint function. Route a URL path to this function in `convex/http.js`. - */ -export const httpAction = httpActionGeneric; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts deleted file mode 100644 index e09d4ecfe02d..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "../../../../../util/convex.js"; diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts deleted file mode 100644 index a47d55a4dce4..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts +++ /dev/null @@ -1,45 +0,0 @@ -// eslint-disable-next-line import/no-extraneous-dependencies -import { v } from "convex/values"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { ConvexVectorStore } from "../../../convex.js"; -import { action, mutation } from "./_generated/server.js"; - -export const reset = mutation({ - args: {}, - handler: async (ctx) => { - const documents = await ctx.db.query("documents").collect(); - await Promise.all(documents.map((document) => ctx.db.delete(document._id))); - }, -}); - -export const ingest = action({ - args: { - openAIApiKey: v.string(), - texts: v.array(v.string()), - metadatas: v.array(v.any()), - }, - handler: async (ctx, { openAIApiKey, texts, metadatas }) => { - await ConvexVectorStore.fromTexts( - texts, - metadatas, - new OpenAIEmbeddings({ openAIApiKey }), - { ctx } - ); - }, -}); - -export const similaritySearch = action({ - args: { - openAIApiKey: v.string(), - query: v.string(), - }, - handler: async (ctx, { openAIApiKey, query }) => { - const vectorStore = new ConvexVectorStore( - new OpenAIEmbeddings({ openAIApiKey }), - { ctx } - ); - - const result = await vectorStore.similaritySearch(query, 3); - return result.map(({ metadata }) => metadata); - }, -}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts deleted file mode 100644 index 72f7f31029be..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts +++ /dev/null @@ -1,15 +0,0 @@ -/* eslint-disable import/no-extraneous-dependencies */ - -import { defineSchema, defineTable } from "convex/server"; -import { v } from "convex/values"; - -export default defineSchema({ - documents: defineTable({ - embedding: v.array(v.number()), - text: v.string(), - metadata: v.any(), - }).vectorIndex("byEmbedding", { - vectorField: "embedding", - dimensions: 1536, - }), -}); diff --git a/libs/langchain-community/src/vectorstores/tests/convex/package.json b/libs/langchain-community/src/vectorstores/tests/convex/package.json deleted file mode 100644 index afd00749387f..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/convex/package.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "name": "langchain-convex-tests", - "version": "0.0.1", - "type": "module", - "dependencies": { - "convex": "1.4.1" - } -} diff --git a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts index ec8ae55c3337..aacfd5dd71b4 100644 --- a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts @@ -2,8 +2,8 @@ /* eslint-disable import/no-extraneous-dependencies */ import { test, expect } from "@jest/globals"; import { OpenAIEmbeddings } from "@langchain/openai"; -import { SingleStoreVectorStore } from "../singlestore.js"; import { Document } from "@langchain/core/documents"; +import { SingleStoreVectorStore } from "../singlestore.js"; test.skip("SingleStoreVectorStore", async () => { expect(process.env.SINGLESTORE_HOST).toBeDefined(); diff --git a/libs/langchain-community/src/vectorstores/tigris.ts b/libs/langchain-community/src/vectorstores/tigris.ts index 5913f6c08f37..e4bc57623991 100644 --- a/libs/langchain-community/src/vectorstores/tigris.ts +++ b/libs/langchain-community/src/vectorstores/tigris.ts @@ -2,7 +2,7 @@ import * as uuid from "uuid"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Type definition for the arguments required to initialize a diff --git a/libs/langchain-community/src/vectorstores/typeorm.ts b/libs/langchain-community/src/vectorstores/typeorm.ts index 13fc7b758bd4..22d2d85e1f95 100644 --- a/libs/langchain-community/src/vectorstores/typeorm.ts +++ b/libs/langchain-community/src/vectorstores/typeorm.ts @@ -2,7 +2,7 @@ import { Metadata } from "@opensearch-project/opensearch/api/types.js"; import { DataSource, DataSourceOptions, EntitySchema } from "typeorm"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; /** diff --git a/libs/langchain-community/src/vectorstores/typesense.ts b/libs/langchain-community/src/vectorstores/typesense.ts index a9ce04a1ae20..d914a6cc9eee 100644 --- a/libs/langchain-community/src/vectorstores/typesense.ts +++ b/libs/langchain-community/src/vectorstores/typesense.ts @@ -6,8 +6,11 @@ import type { } from "typesense/lib/Typesense/Documents.js"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" -import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller"; +import { Document } from "@langchain/core/documents"; +import { + AsyncCaller, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; /** * Interface for the response hit from a vector search in Typesense. diff --git a/libs/langchain-community/src/vectorstores/vectara.ts b/libs/langchain-community/src/vectorstores/vectara.ts index 8e45a9c1aefa..34c8ed5a9579 100644 --- a/libs/langchain-community/src/vectorstores/vectara.ts +++ b/libs/langchain-community/src/vectorstores/vectara.ts @@ -2,7 +2,7 @@ import * as uuid from "uuid"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { FakeEmbeddings } from "../util/testing.js"; diff --git a/libs/langchain-community/src/vectorstores/vercel_postgres.ts b/libs/langchain-community/src/vectorstores/vercel_postgres.ts index 6cab962fa0c2..782428673b2a 100644 --- a/libs/langchain-community/src/vectorstores/vercel_postgres.ts +++ b/libs/langchain-community/src/vectorstores/vercel_postgres.ts @@ -6,7 +6,7 @@ import { } from "@vercel/postgres"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; type Metadata = Record>; diff --git a/libs/langchain-community/src/vectorstores/voy.ts b/libs/langchain-community/src/vectorstores/voy.ts index 78bf0a19bee2..a1a341eb7701 100644 --- a/libs/langchain-community/src/vectorstores/voy.ts +++ b/libs/langchain-community/src/vectorstores/voy.ts @@ -1,7 +1,7 @@ import type { Voy as VoyOriginClient, SearchResult } from "voy-search"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; export type VoyClient = Omit< VoyOriginClient, diff --git a/libs/langchain-community/src/vectorstores/xata.ts b/libs/langchain-community/src/vectorstores/xata.ts index c87c21d7fb80..5b5c1d23bc5c 100644 --- a/libs/langchain-community/src/vectorstores/xata.ts +++ b/libs/langchain-community/src/vectorstores/xata.ts @@ -1,7 +1,7 @@ import { BaseClient } from "@xata.io/client"; import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; -import { Document } from "@langchain/core/documents" +import { Document } from "@langchain/core/documents"; /** * Interface for the arguments required to create a XataClient. Includes diff --git a/yarn.lock b/yarn.lock index 0cfb1d44b958..5d968b3ad48a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8106,6 +8106,7 @@ __metadata: jest: ^29.5.0 jest-environment-node: ^29.6.4 jsdom: ^22.1.0 + langsmith: ^0.0.49 llmonitor: ^0.5.9 lodash: ^4.17.21 mammoth: ^1.5.1 @@ -23571,6 +23572,21 @@ __metadata: languageName: unknown linkType: soft +"langsmith@npm:^0.0.49": + version: 0.0.49 + resolution: "langsmith@npm:0.0.49" + dependencies: + "@types/uuid": ^9.0.1 + commander: ^10.0.1 + p-queue: ^6.6.2 + p-retry: 4 + uuid: ^9.0.0 + bin: + langsmith: dist/cli/main.cjs + checksum: 9976d9fe1e4d4ace5041af08d3271dff61d7a87fbd88523b52274817704d282c46a48187cc73af0f7e440dbe4db5da1d221966d1136a71cbfa6115e5159242a8 + languageName: node + linkType: hard + "langsmith@npm:~0.0.48": version: 0.0.48 resolution: "langsmith@npm:0.0.48" From 850ce9e670d10215e24346922ddbffe545ed092f Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 02:10:15 -0800 Subject: [PATCH 06/22] More refactoring --- langchain/src/cache/cloudflare_kv.ts | 78 +- langchain/src/cache/momento.ts | 174 +--- langchain/src/cache/upstash_redis.ts | 92 +- langchain/src/callbacks/handlers/llmonitor.ts | 341 +------ .../src/callbacks/tests/llmonitor.int.test.ts | 6 +- langchain/src/chat_models/baiduwenxin.ts | 543 +---------- langchain/src/chat_models/bedrock/index.ts | 39 +- langchain/src/chat_models/bedrock/web.ts | 432 +-------- .../src/chat_models/cloudflare_workersai.ts | 248 +---- langchain/src/chat_models/fireworks.ts | 138 +-- langchain/src/chat_models/googlepalm.ts | 341 +------ .../src/chat_models/googlevertexai/common.ts | 400 -------- .../src/chat_models/googlevertexai/index.ts | 65 +- .../src/chat_models/googlevertexai/web.ts | 67 +- .../src/chat_models/iflytek_xinghuo/common.ts | 486 ---------- .../src/chat_models/iflytek_xinghuo/index.ts | 44 +- .../src/chat_models/iflytek_xinghuo/web.ts | 50 +- langchain/src/chat_models/llama_cpp.ts | 323 +------ langchain/src/chat_models/minimax.ts | 881 +----------------- langchain/src/chat_models/ollama.ts | 299 +----- langchain/src/chat_models/portkey.ts | 183 +--- langchain/src/chat_models/yandex.ts | 143 +-- langchain/src/embeddings/bedrock.ts | 143 +-- .../src/embeddings/cloudflare_workersai.ts | 95 +- langchain/src/embeddings/cohere.ts | 156 +--- langchain/src/embeddings/googlepalm.ts | 108 +-- langchain/src/embeddings/googlevertexai.ts | 146 +-- langchain/src/embeddings/gradient_ai.ts | 119 +-- langchain/src/embeddings/hf.ts | 78 +- langchain/src/embeddings/hf_transformers.ts | 106 +-- langchain/src/embeddings/llama_cpp.ts | 104 +-- langchain/src/embeddings/minimax.ts | 223 +---- langchain/src/embeddings/ollama.ts | 149 +-- langchain/src/embeddings/tensorflow.ts | 92 +- langchain/src/embeddings/voyage.ts | 153 +-- langchain/src/graphs/neo4j_graph.ts | 287 +----- .../src/graphs/tests/neo4j_graph.int.test.ts | 56 -- langchain/src/llms/ai21.ts | 200 +--- langchain/src/llms/aleph_alpha.ts | 299 +----- langchain/src/llms/bedrock/web.ts | 357 +------ langchain/src/llms/cloudflare_workersai.ts | 190 +--- langchain/src/llms/cohere.ts | 130 +-- langchain/src/llms/fireworks.ts | 141 +-- langchain/src/llms/googlepalm.ts | 204 +--- langchain/src/llms/googlevertexai/common.ts | 231 ----- langchain/src/llms/googlevertexai/index.ts | 67 +- langchain/src/llms/googlevertexai/web.ts | 67 +- langchain/src/llms/gradient_ai.ts | 137 +-- langchain/src/llms/hf.ts | 156 +--- langchain/src/llms/llama_cpp.ts | 117 +-- langchain/src/llms/ollama.ts | 246 +---- langchain/src/llms/portkey.ts | 180 +--- langchain/src/llms/raycast.ts | 100 +- langchain/src/llms/replicate.ts | 159 +--- langchain/src/llms/sagemaker_endpoint.ts | 284 +----- langchain/src/llms/watsonx_ai.ts | 195 +--- langchain/src/llms/writer.ts | 173 +--- langchain/src/llms/yandex.ts | 124 +-- langchain/src/retrievers/amazon_kendra.ts | 318 +------ langchain/src/retrievers/chaindesk.ts | 98 +- langchain/src/retrievers/databerry.ts | 95 +- langchain/src/retrievers/metal.ts | 71 +- langchain/src/retrievers/supabase.ts | 239 +---- langchain/src/retrievers/tavily_search_api.ts | 141 +-- langchain/src/retrievers/vespa.ts | 93 +- langchain/src/retrievers/zep.ts | 170 +--- langchain/src/tools/IFTTTWebhook.ts | 80 +- langchain/src/tools/aiplugin.ts | 82 +- langchain/src/tools/aws_sfn.ts | 226 +---- langchain/src/tools/bingserpapi.ts | 79 +- langchain/src/tools/brave_search.ts | 78 +- langchain/src/tools/connery.ts | 354 +------ langchain/src/tools/dadjokeapi.ts | 45 +- langchain/src/tools/dataforseo_api_search.ts | 379 +------- langchain/src/tools/gmail/base.ts | 75 -- langchain/src/tools/gmail/create_draft.ts | 74 -- langchain/src/tools/gmail/descriptions.ts | 119 --- langchain/src/tools/gmail/get_message.ts | 95 -- langchain/src/tools/gmail/get_thread.ts | 105 --- langchain/src/tools/gmail/index.ts | 13 +- langchain/src/tools/gmail/search.ts | 135 --- langchain/src/tools/gmail/send_message.ts | 84 -- langchain/src/tools/google_custom_search.ts | 84 +- langchain/src/tools/google_places.ts | 97 +- langchain/src/tools/searchapi.ts | 205 +--- langchain/src/tools/searxng_search.ts | 259 +---- langchain/src/tools/serpapi.ts | 506 +--------- langchain/src/tools/serper.ts | 108 +-- langchain/src/tools/wikipedia_query_run.ts | 182 +--- langchain/src/tools/wolframalpha.ts | 42 +- libs/langchain-community/.gitignore | 18 +- .../cache/cloudflare_kv.cjs | 1 + .../cache/cloudflare_kv.d.ts | 1 + .../cache/cloudflare_kv.js | 1 + libs/langchain-community/cache/momento.cjs | 1 + libs/langchain-community/cache/momento.d.ts | 1 + libs/langchain-community/cache/momento.js | 1 + .../cache/upstash_redis.cjs | 1 + .../cache/upstash_redis.d.ts | 1 + .../cache/upstash_redis.js | 1 + libs/langchain-community/package.json | 42 +- .../scripts/create-entrypoints.js | 6 +- .../src/{cache => caches}/cloudflare_kv.ts | 0 .../src/{cache => caches}/momento.ts | 2 +- .../{cache => caches}/tests/momento.test.ts | 0 .../tests/upstash_redis.int.test.ts | 0 .../tests/upstash_redis.test.ts | 0 .../src/{cache => caches}/upstash_redis.ts | 0 .../src/chat_models/bedrock/index.ts | 2 +- .../src/chat_models/bedrock/web.ts | 2 +- .../src/chat_models/cloudflare_workersai.ts | 2 +- .../src/chat_models/googlevertexai/common.ts | 2 +- .../src/chat_models/googlevertexai/index.ts | 4 +- .../src/chat_models/googlevertexai/web.ts | 4 +- .../src/chat_models/iflytek_xinghuo/common.ts | 2 +- .../src/chat_models/iflytek_xinghuo/index.ts | 2 +- .../src/chat_models/iflytek_xinghuo/web.ts | 2 +- .../src/chat_models/llama_cpp.ts | 2 +- .../src/chat_models/ollama.ts | 2 +- .../src/embeddings/bedrock.ts | 2 +- .../src/embeddings/cloudflare_workersai.ts | 2 +- .../src/embeddings/cohere.ts | 2 +- .../src/embeddings/googlevertexai.ts | 4 +- .../src/embeddings/gradient_ai.ts | 2 +- .../src/embeddings/hf_transformers.ts | 2 +- .../src/embeddings/llama_cpp.ts | 2 +- .../src/embeddings/minimax.ts | 2 +- .../src/embeddings/ollama.ts | 2 +- .../src/embeddings/voyage.ts | 2 +- .../src/llms/bedrock/index.ts | 2 +- .../src/llms/bedrock/web.ts | 2 +- .../src/llms/cloudflare_workersai.ts | 2 +- .../src/llms/googlevertexai/common.ts | 2 +- .../src/llms/googlevertexai/index.ts | 4 +- .../src/llms/googlevertexai/web.ts | 4 +- .../langchain-community/src/llms/llama_cpp.ts | 2 +- libs/langchain-community/src/llms/ollama.ts | 2 +- .../src/load/import_constants.ts | 3 - .../src/load/import_map.ts | 3 + .../src/load/import_type.d.ts | 9 - .../src/{util => utils}/bedrock.ts | 0 .../src/{util => utils}/chunk.ts | 0 .../src/{util => utils}/convex.ts | 0 .../src/{util => utils}/event-source-parse.ts | 0 .../googlevertexai-connection.ts | 0 .../{util => utils}/googlevertexai-gauth.ts | 0 .../{util => utils}/googlevertexai-webauth.ts | 0 .../iflytek_websocket_stream.ts | 0 .../src/{util => utils}/llama_cpp.ts | 0 .../src/{util => utils}/momento.ts | 0 .../src/{util => utils}/ollama.ts | 0 .../src/{util => utils}/testing.ts | 0 .../src/{util => utils}/time.ts | 0 .../src/vectorstores/cloudflare_vectorize.ts | 2 +- .../vectorstores/tests/analyticdb.int.test.ts | 169 ---- .../vectorstores/tests/cassandra.int.test.ts | 363 -------- .../src/vectorstores/tests/chroma.int.test.ts | 163 ---- .../src/vectorstores/tests/chroma.test.ts | 135 --- .../vectorstores/tests/clickhouse.int.test.ts | 99 -- .../tests/elasticsearch.int.test.ts | 111 --- .../vectorstores/tests/lancedb.int.test.ts | 45 - .../src/vectorstores/tests/milvus.int.test.ts | 169 ---- .../vectorstores/tests/myscale.int.test.ts | 90 -- .../tests/neo4j_vector.int.test.ts | 471 ---------- .../vectorstores/tests/opensearch.int.test.ts | 44 - .../src/vectorstores/tests/qdrant.int.test.ts | 50 - .../src/vectorstores/tests/qdrant.test.ts | 33 - .../src/vectorstores/tests/redis.int.test.ts | 71 -- .../src/vectorstores/tests/redis.test.ts | 222 ----- .../tests/singlestore.int.test.ts | 175 ---- .../src/vectorstores/tests/tigris.test.ts | 76 -- .../vectorstores/tests/typeorm.int.test.ts | 51 - .../src/vectorstores/tests/typesense.test.ts | 127 --- .../vectorstores/tests/vectara.int.test.ts | 225 ----- .../tests/vercel_postgres.int.test.ts | 129 --- .../src/vectorstores/tests/voy.int.test.ts | 49 - .../src/vectorstores/tests/voy.test.ts | 56 -- .../src/vectorstores/tests/xata.int.test.ts | 166 ---- .../src/vectorstores/vectara.ts | 2 +- 179 files changed, 163 insertions(+), 19166 deletions(-) delete mode 100644 langchain/src/chat_models/googlevertexai/common.ts delete mode 100644 langchain/src/chat_models/iflytek_xinghuo/common.ts delete mode 100644 langchain/src/graphs/tests/neo4j_graph.int.test.ts delete mode 100644 langchain/src/llms/googlevertexai/common.ts delete mode 100644 langchain/src/tools/gmail/base.ts delete mode 100644 langchain/src/tools/gmail/create_draft.ts delete mode 100644 langchain/src/tools/gmail/descriptions.ts delete mode 100644 langchain/src/tools/gmail/get_message.ts delete mode 100644 langchain/src/tools/gmail/get_thread.ts delete mode 100644 langchain/src/tools/gmail/search.ts delete mode 100644 langchain/src/tools/gmail/send_message.ts create mode 100644 libs/langchain-community/cache/cloudflare_kv.cjs create mode 100644 libs/langchain-community/cache/cloudflare_kv.d.ts create mode 100644 libs/langchain-community/cache/cloudflare_kv.js create mode 100644 libs/langchain-community/cache/momento.cjs create mode 100644 libs/langchain-community/cache/momento.d.ts create mode 100644 libs/langchain-community/cache/momento.js create mode 100644 libs/langchain-community/cache/upstash_redis.cjs create mode 100644 libs/langchain-community/cache/upstash_redis.d.ts create mode 100644 libs/langchain-community/cache/upstash_redis.js rename libs/langchain-community/src/{cache => caches}/cloudflare_kv.ts (100%) rename libs/langchain-community/src/{cache => caches}/momento.ts (98%) rename libs/langchain-community/src/{cache => caches}/tests/momento.test.ts (100%) rename libs/langchain-community/src/{cache => caches}/tests/upstash_redis.int.test.ts (100%) rename libs/langchain-community/src/{cache => caches}/tests/upstash_redis.test.ts (100%) rename libs/langchain-community/src/{cache => caches}/upstash_redis.ts (100%) rename libs/langchain-community/src/{util => utils}/bedrock.ts (100%) rename libs/langchain-community/src/{util => utils}/chunk.ts (100%) rename libs/langchain-community/src/{util => utils}/convex.ts (100%) rename libs/langchain-community/src/{util => utils}/event-source-parse.ts (100%) rename libs/langchain-community/src/{util => utils}/googlevertexai-connection.ts (100%) rename libs/langchain-community/src/{util => utils}/googlevertexai-gauth.ts (100%) rename libs/langchain-community/src/{util => utils}/googlevertexai-webauth.ts (100%) rename libs/langchain-community/src/{util => utils}/iflytek_websocket_stream.ts (100%) rename libs/langchain-community/src/{util => utils}/llama_cpp.ts (100%) rename libs/langchain-community/src/{util => utils}/momento.ts (100%) rename libs/langchain-community/src/{util => utils}/ollama.ts (100%) rename libs/langchain-community/src/{util => utils}/testing.ts (100%) rename libs/langchain-community/src/{util => utils}/time.ts (100%) delete mode 100644 libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/chroma.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/qdrant.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/redis.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/redis.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/tigris.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/typesense.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/voy.int.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/voy.test.ts delete mode 100644 libs/langchain-community/src/vectorstores/tests/xata.int.test.ts diff --git a/langchain/src/cache/cloudflare_kv.ts b/langchain/src/cache/cloudflare_kv.ts index d438c7cd7cc5..db657589e95b 100644 --- a/langchain/src/cache/cloudflare_kv.ts +++ b/langchain/src/cache/cloudflare_kv.ts @@ -1,77 +1 @@ -import type { KVNamespace } from "@cloudflare/workers-types"; - -import { BaseCache, Generation } from "../schema/index.js"; -import { - getCacheKey, - serializeGeneration, - deserializeStoredGeneration, -} from "./base.js"; - -/** - * Represents a specific implementation of a caching mechanism using Cloudflare KV - * as the underlying storage system. It extends the `BaseCache` class and - * overrides its methods to provide the Cloudflare KV-specific logic. - * @example - * ```typescript - * // Example of using OpenAI with Cloudflare KV as cache in a Cloudflare Worker - * const cache = new CloudflareKVCache(env.KV_NAMESPACE); - * const model = new ChatAnthropic({ - * cache, - * }); - * const response = await model.invoke("How are you today?"); - * return new Response(JSON.stringify(response), { - * headers: { "content-type": "application/json" }, - * }); - * - * ``` - */ -export class CloudflareKVCache extends BaseCache { - private binding: KVNamespace; - - constructor(binding: KVNamespace) { - super(); - this.binding = binding; - } - - /** - * Retrieves data from the cache. It constructs a cache key from the given - * `prompt` and `llmKey`, and retrieves the corresponding value from the - * Cloudflare KV namespace. - * @param prompt The prompt used to construct the cache key. - * @param llmKey The LLM key used to construct the cache key. - * @returns An array of Generations if found, null otherwise. - */ - public async lookup(prompt: string, llmKey: string) { - let idx = 0; - let key = getCacheKey(prompt, llmKey, String(idx)); - let value = await this.binding.get(key); - const generations: Generation[] = []; - - while (value) { - generations.push(deserializeStoredGeneration(JSON.parse(value))); - idx += 1; - key = getCacheKey(prompt, llmKey, String(idx)); - value = await this.binding.get(key); - } - - return generations.length > 0 ? generations : null; - } - - /** - * Updates the cache with new data. It constructs a cache key from the - * given `prompt` and `llmKey`, and stores the `value` in the Cloudflare KV - * namespace. - * @param prompt The prompt used to construct the cache key. - * @param llmKey The LLM key used to construct the cache key. - * @param value The value to be stored in the cache. - */ - public async update(prompt: string, llmKey: string, value: Generation[]) { - for (let i = 0; i < value.length; i += 1) { - const key = getCacheKey(prompt, llmKey, String(i)); - await this.binding.put( - key, - JSON.stringify(serializeGeneration(value[i])) - ); - } - } -} +export * from "@langchain/community/caches/cloudflare_kv"; \ No newline at end of file diff --git a/langchain/src/cache/momento.ts b/langchain/src/cache/momento.ts index 3c452a429e45..3694d0d2154b 100644 --- a/langchain/src/cache/momento.ts +++ b/langchain/src/cache/momento.ts @@ -1,173 +1 @@ -/* eslint-disable no-instanceof/no-instanceof */ -import { - ICacheClient, - CacheGet, - CacheSet, - InvalidArgumentError, -} from "@gomomento/sdk-core"; - -import { BaseCache, Generation } from "../schema/index.js"; -import { - deserializeStoredGeneration, - getCacheKey, - serializeGeneration, -} from "./base.js"; -import { ensureCacheExists } from "../util/momento.js"; - -/** - * The settings to instantiate the Momento standard cache. - */ -export interface MomentoCacheProps { - /** - * The Momento cache client. - */ - client: ICacheClient; - /** - * The name of the cache to use to store the data. - */ - cacheName: string; - /** - * The time to live for the cache items. If not specified, - * the cache client default is used. - */ - ttlSeconds?: number; - /** - * If true, ensure that the cache exists before returning. - * If false, the cache is not checked for existence. - * Defaults to true. - */ - ensureCacheExists?: true; -} - -/** - * A cache that uses Momento as the backing store. - * See https://gomomento.com. - * @example - * ```typescript - * const cache = new MomentoCache({ - * client: new CacheClient({ - * configuration: Configurations.Laptop.v1(), - * credentialProvider: CredentialProvider.fromEnvironmentVariable({ - * environmentVariableName: "MOMENTO_API_KEY", - * }), - * defaultTtlSeconds: 60 * 60 * 24, // Cache TTL set to 24 hours. - * }), - * cacheName: "langchain", - * }); - * // Initialize the OpenAI model with Momento cache for caching responses - * const model = new ChatOpenAI({ - * cache, - * }); - * await model.invoke("How are you today?"); - * const cachedValues = await cache.lookup("How are you today?", "llmKey"); - * ``` - */ -export class MomentoCache extends BaseCache { - private client: ICacheClient; - - private readonly cacheName: string; - - private readonly ttlSeconds?: number; - - private constructor(props: MomentoCacheProps) { - super(); - this.client = props.client; - this.cacheName = props.cacheName; - - this.validateTtlSeconds(props.ttlSeconds); - this.ttlSeconds = props.ttlSeconds; - } - - /** - * Create a new standard cache backed by Momento. - * - * @param {MomentoCacheProps} props The settings to instantiate the cache. - * @param {ICacheClient} props.client The Momento cache client. - * @param {string} props.cacheName The name of the cache to use to store the data. - * @param {number} props.ttlSeconds The time to live for the cache items. If not specified, - * the cache client default is used. - * @param {boolean} props.ensureCacheExists If true, ensure that the cache exists before returning. - * If false, the cache is not checked for existence. Defaults to true. - * @throws {@link InvalidArgumentError} if {@link props.ttlSeconds} is not strictly positive. - * @returns The Momento-backed cache. - */ - public static async fromProps( - props: MomentoCacheProps - ): Promise { - const instance = new MomentoCache(props); - if (props.ensureCacheExists || props.ensureCacheExists === undefined) { - await ensureCacheExists(props.client, props.cacheName); - } - return instance; - } - - /** - * Validate the user-specified TTL, if provided, is strictly positive. - * @param ttlSeconds The TTL to validate. - */ - private validateTtlSeconds(ttlSeconds?: number): void { - if (ttlSeconds !== undefined && ttlSeconds <= 0) { - throw new InvalidArgumentError("ttlSeconds must be positive."); - } - } - - /** - * Lookup LLM generations in cache by prompt and associated LLM key. - * @param prompt The prompt to lookup. - * @param llmKey The LLM key to lookup. - * @returns The generations associated with the prompt and LLM key, or null if not found. - */ - public async lookup( - prompt: string, - llmKey: string - ): Promise { - const key = getCacheKey(prompt, llmKey); - const getResponse = await this.client.get(this.cacheName, key); - - if (getResponse instanceof CacheGet.Hit) { - const value = getResponse.valueString(); - const parsedValue = JSON.parse(value); - if (!Array.isArray(parsedValue)) { - return null; - } - return JSON.parse(value).map(deserializeStoredGeneration); - } else if (getResponse instanceof CacheGet.Miss) { - return null; - } else if (getResponse instanceof CacheGet.Error) { - throw getResponse.innerException(); - } else { - throw new Error(`Unknown response type: ${getResponse.toString()}`); - } - } - - /** - * Update the cache with the given generations. - * - * Note this overwrites any existing generations for the given prompt and LLM key. - * - * @param prompt The prompt to update. - * @param llmKey The LLM key to update. - * @param value The generations to store. - */ - public async update( - prompt: string, - llmKey: string, - value: Generation[] - ): Promise { - const key = getCacheKey(prompt, llmKey); - const setResponse = await this.client.set( - this.cacheName, - key, - JSON.stringify(value.map(serializeGeneration)), - { ttl: this.ttlSeconds } - ); - - if (setResponse instanceof CacheSet.Success) { - // pass - } else if (setResponse instanceof CacheSet.Error) { - throw setResponse.innerException(); - } else { - throw new Error(`Unknown response type: ${setResponse.toString()}`); - } - } -} +export * from "@langchain/community/caches/momento"; \ No newline at end of file diff --git a/langchain/src/cache/upstash_redis.ts b/langchain/src/cache/upstash_redis.ts index 7f1660d6606d..55e717462226 100644 --- a/langchain/src/cache/upstash_redis.ts +++ b/langchain/src/cache/upstash_redis.ts @@ -1,91 +1 @@ -import { Redis, type RedisConfigNodejs } from "@upstash/redis"; - -import { BaseCache, Generation, StoredGeneration } from "../schema/index.js"; -import { - deserializeStoredGeneration, - getCacheKey, - serializeGeneration, -} from "./base.js"; - -export type UpstashRedisCacheProps = { - /** - * The config to use to instantiate an Upstash Redis client. - */ - config?: RedisConfigNodejs; - /** - * An existing Upstash Redis client. - */ - client?: Redis; -}; - -/** - * A cache that uses Upstash as the backing store. - * See https://docs.upstash.com/redis. - * @example - * ```typescript - * const cache = new UpstashRedisCache({ - * config: { - * url: "UPSTASH_REDIS_REST_URL", - * token: "UPSTASH_REDIS_REST_TOKEN", - * }, - * }); - * // Initialize the OpenAI model with Upstash Redis cache for caching responses - * const model = new ChatOpenAI({ - * cache, - * }); - * await model.invoke("How are you today?"); - * const cachedValues = await cache.lookup("How are you today?", "llmKey"); - * ``` - */ -export class UpstashRedisCache extends BaseCache { - private redisClient: Redis; - - constructor(props: UpstashRedisCacheProps) { - super(); - const { config, client } = props; - - if (client) { - this.redisClient = client; - } else if (config) { - this.redisClient = new Redis(config); - } else { - throw new Error( - `Upstash Redis caches require either a config object or a pre-configured client.` - ); - } - } - - /** - * Lookup LLM generations in cache by prompt and associated LLM key. - */ - public async lookup(prompt: string, llmKey: string) { - let idx = 0; - let key = getCacheKey(prompt, llmKey, String(idx)); - let value = await this.redisClient.get(key); - const generations: Generation[] = []; - - while (value) { - generations.push(deserializeStoredGeneration(value)); - idx += 1; - key = getCacheKey(prompt, llmKey, String(idx)); - value = await this.redisClient.get(key); - } - - return generations.length > 0 ? generations : null; - } - - /** - * Update the cache with the given generations. - * - * Note this overwrites any existing generations for the given prompt and LLM key. - */ - public async update(prompt: string, llmKey: string, value: Generation[]) { - for (let i = 0; i < value.length; i += 1) { - const key = getCacheKey(prompt, llmKey, String(i)); - await this.redisClient.set( - key, - JSON.stringify(serializeGeneration(value[i])) - ); - } - } -} +export * from "@langchain/community/caches/upstash_redis"; \ No newline at end of file diff --git a/langchain/src/callbacks/handlers/llmonitor.ts b/langchain/src/callbacks/handlers/llmonitor.ts index 9453d62ed7ef..792e7468d1a8 100644 --- a/langchain/src/callbacks/handlers/llmonitor.ts +++ b/langchain/src/callbacks/handlers/llmonitor.ts @@ -1,340 +1 @@ -import monitor from "llmonitor"; -import { LLMonitorOptions, ChatMessage, cJSON } from "llmonitor/types"; - -import { BaseRun, RunUpdate as BaseRunUpdate, KVMap } from "langsmith/schemas"; - -import { getEnvironmentVariable } from "../../util/env.js"; - -import { - BaseMessage, - ChainValues, - Generation, - LLMResult, -} from "../../schema/index.js"; -import { Serialized } from "../../load/serializable.js"; - -import { BaseCallbackHandler, BaseCallbackHandlerInput } from "../base.js"; - -type Role = "user" | "ai" | "system" | "function" | "tool"; - -// Langchain Helpers -// Input can be either a single message, an array of message, or an array of array of messages (batch requests) - -const parseRole = (id: string[]): Role => { - const roleHint = id[id.length - 1]; - - if (roleHint.includes("Human")) return "user"; - if (roleHint.includes("System")) return "system"; - if (roleHint.includes("AI")) return "ai"; - if (roleHint.includes("Function")) return "function"; - if (roleHint.includes("Tool")) return "tool"; - - return "ai"; -}; - -type Message = BaseMessage | Generation | string; - -type OutputMessage = ChatMessage | string; - -const PARAMS_TO_CAPTURE = [ - "stop", - "stop_sequences", - "function_call", - "functions", - "tools", - "tool_choice", - "response_format", -]; - -export const convertToLLMonitorMessages = ( - input: Message | Message[] | Message[][] -): OutputMessage | OutputMessage[] | OutputMessage[][] => { - const parseMessage = (raw: Message): OutputMessage => { - if (typeof raw === "string") return raw; - // sometimes the message is nested in a "message" property - if ("message" in raw) return parseMessage(raw.message as Message); - - // Serialize - const message = JSON.parse(JSON.stringify(raw)); - - try { - // "id" contains an array describing the constructor, with last item actual schema type - const role = parseRole(message.id); - - const obj = message.kwargs; - const text = message.text ?? obj.content; - - return { - role, - text, - ...(obj.additional_kwargs ?? {}), - }; - } catch (e) { - // if parsing fails, return the original message - return message.text ?? message; - } - }; - - if (Array.isArray(input)) { - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore Confuses the compiler - return input.length === 1 - ? convertToLLMonitorMessages(input[0]) - : input.map(convertToLLMonitorMessages); - } - return parseMessage(input); -}; - -const parseInput = (rawInput: Record) => { - if (!rawInput) return null; - - const { input, inputs, question } = rawInput; - - if (input) return input; - if (inputs) return inputs; - if (question) return question; - - return rawInput; -}; - -const parseOutput = (rawOutput: Record) => { - if (!rawOutput) return null; - - const { text, output, answer, result } = rawOutput; - - if (text) return text; - if (answer) return answer; - if (output) return output; - if (result) return result; - - return rawOutput; -}; - -const parseExtraAndName = ( - llm: Serialized, - extraParams?: KVMap, - metadata?: KVMap -) => { - const params = { - ...(extraParams?.invocation_params ?? {}), - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore this is a valid property - ...(llm?.kwargs ?? {}), - ...(metadata || {}), - }; - - const { model, model_name, modelName, model_id, userId, userProps, ...rest } = - params; - - const name = model || modelName || model_name || model_id || llm.id.at(-1); - - // Filter rest to only include params we want to capture - const extra = Object.fromEntries( - Object.entries(rest).filter( - ([key]) => - PARAMS_TO_CAPTURE.includes(key) || - ["string", "number", "boolean"].includes(typeof rest[key]) - ) - ) as cJSON; - - return { name, extra, userId, userProps }; -}; - -export interface Run extends BaseRun { - id: string; - child_runs: this[]; - child_execution_order: number; -} - -export interface RunUpdate extends BaseRunUpdate { - events: BaseRun["events"]; -} - -export interface LLMonitorHandlerFields - extends BaseCallbackHandlerInput, - LLMonitorOptions {} - -export class LLMonitorHandler - extends BaseCallbackHandler - implements LLMonitorHandlerFields -{ - name = "llmonitor_handler"; - - monitor: typeof monitor; - - constructor(fields: LLMonitorHandlerFields = {}) { - super(fields); - - this.monitor = monitor; - - if (fields) { - const { appId, apiUrl, verbose } = fields; - - this.monitor.init({ - verbose, - appId: appId ?? getEnvironmentVariable("LLMONITOR_APP_ID"), - apiUrl: apiUrl ?? getEnvironmentVariable("LLMONITOR_API_URL"), - }); - } - } - - async handleLLMStart( - llm: Serialized, - prompts: string[], - runId: string, - parentRunId?: string, - extraParams?: KVMap, - tags?: string[], - metadata?: KVMap - ): Promise { - const { name, extra, userId, userProps } = parseExtraAndName( - llm, - extraParams, - metadata - ); - - await this.monitor.trackEvent("llm", "start", { - runId, - parentRunId, - name, - input: convertToLLMonitorMessages(prompts), - extra, - userId, - userProps, - tags, - runtime: "langchain-js", - }); - } - - async handleChatModelStart( - llm: Serialized, - messages: BaseMessage[][], - runId: string, - parentRunId?: string, - extraParams?: KVMap, - tags?: string[], - metadata?: KVMap - ): Promise { - const { name, extra, userId, userProps } = parseExtraAndName( - llm, - extraParams, - metadata - ); - - await this.monitor.trackEvent("llm", "start", { - runId, - parentRunId, - name, - input: convertToLLMonitorMessages(messages), - extra, - userId, - userProps, - tags, - runtime: "langchain-js", - }); - } - - async handleLLMEnd(output: LLMResult, runId: string): Promise { - const { generations, llmOutput } = output; - - await this.monitor.trackEvent("llm", "end", { - runId, - output: convertToLLMonitorMessages(generations), - tokensUsage: { - completion: llmOutput?.tokenUsage?.completionTokens, - prompt: llmOutput?.tokenUsage?.promptTokens, - }, - }); - } - - async handleLLMError(error: Error, runId: string): Promise { - await this.monitor.trackEvent("llm", "error", { - runId, - error, - }); - } - - async handleChainStart( - chain: Serialized, - inputs: ChainValues, - runId: string, - parentRunId?: string, - tags?: string[], - metadata?: KVMap - ): Promise { - const { agentName, userId, userProps, ...rest } = metadata || {}; - - // allow the user to specify an agent name - const name = agentName || chain.id.at(-1); - - // Attempt to automatically detect if this is an agent or chain - const runType = - agentName || ["AgentExecutor", "PlanAndExecute"].includes(name) - ? "agent" - : "chain"; - - await this.monitor.trackEvent(runType, "start", { - runId, - parentRunId, - name, - userId, - userProps, - input: parseInput(inputs) as cJSON, - extra: rest, - tags, - runtime: "langchain-js", - }); - } - - async handleChainEnd(outputs: ChainValues, runId: string): Promise { - await this.monitor.trackEvent("chain", "end", { - runId, - output: parseOutput(outputs) as cJSON, - }); - } - - async handleChainError(error: Error, runId: string): Promise { - await this.monitor.trackEvent("chain", "error", { - runId, - error, - }); - } - - async handleToolStart( - tool: Serialized, - input: string, - runId: string, - parentRunId?: string, - tags?: string[], - metadata?: KVMap - ): Promise { - const { toolName, userId, userProps, ...rest } = metadata || {}; - const name = toolName || tool.id.at(-1); - - await this.monitor.trackEvent("tool", "start", { - runId, - parentRunId, - name, - userId, - userProps, - input, - extra: rest, - tags, - runtime: "langchain-js", - }); - } - - async handleToolEnd(output: string, runId: string): Promise { - await this.monitor.trackEvent("tool", "end", { - runId, - output, - }); - } - - async handleToolError(error: Error, runId: string): Promise { - await this.monitor.trackEvent("tool", "error", { - runId, - error, - }); - } -} +export * from "@langchain/community/callbacks/handlers/llmonitor"; \ No newline at end of file diff --git a/langchain/src/callbacks/tests/llmonitor.int.test.ts b/langchain/src/callbacks/tests/llmonitor.int.test.ts index eb796840d66c..62c589f501d8 100644 --- a/langchain/src/callbacks/tests/llmonitor.int.test.ts +++ b/langchain/src/callbacks/tests/llmonitor.int.test.ts @@ -16,7 +16,7 @@ import { Calculator } from "../../tools/calculator.js"; import { initializeAgentExecutorWithOptions } from "../../agents/initialize.js"; -test("Test traced agent with openai functions", async () => { +test.skip("Test traced agent with openai functions", async () => { const tools = [new Calculator()]; const chat = new ChatOpenAI({ modelName: "gpt-3.5-turbo", temperature: 0 }); @@ -41,7 +41,7 @@ test("Test traced agent with openai functions", async () => { console.log(result); }); -test("Test traced chain with tags", async () => { +test.skip("Test traced chain with tags", async () => { const llm = new OpenAI(); const qaPrompt = new PromptTemplate({ template: "Q: {question} A:", @@ -75,7 +75,7 @@ test("Test traced chain with tags", async () => { ); }); -test("Test traced chat call with tags", async () => { +test.skip("Test traced chat call with tags", async () => { const chat = new ChatOpenAI({ callbacks: [new LLMonitorHandler({ verbose: true })], }); diff --git a/langchain/src/chat_models/baiduwenxin.ts b/langchain/src/chat_models/baiduwenxin.ts index 060da0298e76..af27928d6d21 100644 --- a/langchain/src/chat_models/baiduwenxin.ts +++ b/langchain/src/chat_models/baiduwenxin.ts @@ -1,542 +1 @@ -import { BaseChatModel, BaseChatModelParams } from "./base.js"; -import { - AIMessage, - BaseMessage, - ChatGeneration, - ChatMessage, - ChatResult, -} from "../schema/index.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Type representing the role of a message in the Wenxin chat model. - */ -export type WenxinMessageRole = "assistant" | "user"; - -/** - * Interface representing a message in the Wenxin chat model. - */ -interface WenxinMessage { - role: WenxinMessageRole; - content: string; -} - -/** - * Interface representing the usage of tokens in a chat completion. - */ -interface TokenUsage { - completionTokens?: number; - promptTokens?: number; - totalTokens?: number; -} - -/** - * Interface representing a request for a chat completion. - */ -interface ChatCompletionRequest { - messages: WenxinMessage[]; - stream?: boolean; - user_id?: string; - temperature?: number; - top_p?: number; - penalty_score?: number; - system?: string; -} - -/** - * Interface representing a response from a chat completion. - */ -interface ChatCompletionResponse { - id: string; - object: string; - created: number; - result: string; - need_clear_history: boolean; - usage: TokenUsage; -} - -/** - * Interface defining the input to the ChatBaiduWenxin class. - */ -declare interface BaiduWenxinChatInput { - /** Model name to use. Available options are: ERNIE-Bot, ERNIE-Bot-turbo, ERNIE-Bot-4 - * @default "ERNIE-Bot-turbo" - */ - modelName: string; - - /** Whether to stream the results or not. Defaults to false. */ - streaming?: boolean; - - /** Messages to pass as a prefix to the prompt */ - prefixMessages?: WenxinMessage[]; - - /** - * ID of the end-user who made requests. - */ - userId?: string; - - /** - * API key to use when making requests. Defaults to the value of - * `BAIDU_API_KEY` environment variable. - */ - baiduApiKey?: string; - - /** - * Secret key to use when making requests. Defaults to the value of - * `BAIDU_SECRET_KEY` environment variable. - */ - baiduSecretKey?: string; - - /** Amount of randomness injected into the response. Ranges - * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / - * multiple choice, and temp closer to 1 for creative - * and generative tasks. Defaults to 0.95. - */ - temperature?: number; - - /** Total probability mass of tokens to consider at each step. Range - * from 0 to 1.0. Defaults to 0.8. - */ - topP?: number; - - /** Penalizes repeated tokens according to frequency. Range - * from 1.0 to 2.0. Defaults to 1.0. - */ - penaltyScore?: number; -} - -/** - * Function that extracts the custom role of a generic chat message. - * @param message Chat message from which to extract the custom role. - * @returns The custom role of the chat message. - */ -function extractGenericMessageCustomRole(message: ChatMessage) { - if (message.role !== "assistant" && message.role !== "user") { - console.warn(`Unknown message role: ${message.role}`); - } - - return message.role as WenxinMessageRole; -} - -/** - * Function that converts a base message to a Wenxin message role. - * @param message Base message to convert. - * @returns The Wenxin message role. - */ -function messageToWenxinRole(message: BaseMessage): WenxinMessageRole { - const type = message._getType(); - switch (type) { - case "ai": - return "assistant"; - case "human": - return "user"; - case "system": - throw new Error("System messages should not be here"); - case "function": - throw new Error("Function messages not supported"); - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -/** - * Wrapper around Baidu ERNIE large language models that use the Chat endpoint. - * - * To use you should have the `BAIDU_API_KEY` and `BAIDU_SECRET_KEY` - * environment variable set. - * - * @augments BaseLLM - * @augments BaiduERNIEInput - * @example - * ```typescript - * const ernieTurbo = new ChatBaiduWenxin({ - * baiduApiKey: "YOUR-API-KEY", - * baiduSecretKey: "YOUR-SECRET-KEY", - * }); - * - * const ernie = new ChatBaiduWenxin({ - * modelName: "ERNIE-Bot", - * temperature: 1, - * baiduApiKey: "YOUR-API-KEY", - * baiduSecretKey: "YOUR-SECRET-KEY", - * }); - * - * const messages = [new HumanMessage("Hello")]; - * - * let res = await ernieTurbo.call(messages); - * - * res = await ernie.call(messages); - * ``` - */ -export class ChatBaiduWenxin - extends BaseChatModel - implements BaiduWenxinChatInput -{ - static lc_name() { - return "ChatBaiduWenxin"; - } - - get callKeys(): string[] { - return ["stop", "signal", "options"]; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - baiduApiKey: "BAIDU_API_KEY", - baiduSecretKey: "BAIDU_SECRET_KEY", - }; - } - - get lc_aliases(): { [key: string]: string } | undefined { - return undefined; - } - - lc_serializable = true; - - baiduApiKey?: string; - - baiduSecretKey?: string; - - accessToken: string; - - streaming = false; - - prefixMessages?: WenxinMessage[]; - - userId?: string; - - modelName = "ERNIE-Bot-turbo"; - - apiUrl: string; - - temperature?: number | undefined; - - topP?: number | undefined; - - penaltyScore?: number | undefined; - - constructor(fields?: Partial & BaseChatModelParams) { - super(fields ?? {}); - - this.baiduApiKey = - fields?.baiduApiKey ?? getEnvironmentVariable("BAIDU_API_KEY"); - if (!this.baiduApiKey) { - throw new Error("Baidu API key not found"); - } - - this.baiduSecretKey = - fields?.baiduSecretKey ?? getEnvironmentVariable("BAIDU_SECRET_KEY"); - if (!this.baiduSecretKey) { - throw new Error("Baidu Secret key not found"); - } - - this.streaming = fields?.streaming ?? this.streaming; - this.prefixMessages = fields?.prefixMessages ?? this.prefixMessages; - this.userId = fields?.userId ?? this.userId; - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.penaltyScore = fields?.penaltyScore ?? this.penaltyScore; - - this.modelName = fields?.modelName ?? this.modelName; - - if (this.modelName === "ERNIE-Bot") { - this.apiUrl = - "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions"; - } else if (this.modelName === "ERNIE-Bot-turbo") { - this.apiUrl = - "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant"; - } else if (this.modelName === "ERNIE-Bot-4") { - this.apiUrl = - "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro"; - } else { - throw new Error(`Invalid model name: ${this.modelName}`); - } - } - - /** - * Method that retrieves the access token for making requests to the Baidu - * API. - * @param options Optional parsed call options. - * @returns The access token for making requests to the Baidu API. - */ - async getAccessToken(options?: this["ParsedCallOptions"]) { - const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.baiduApiKey}&client_secret=${this.baiduSecretKey}`; - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - signal: options?.signal, - }); - if (!response.ok) { - const text = await response.text(); - const error = new Error( - `Baidu get access token failed with status code ${response.status}, response: ${text}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).response = response; - throw error; - } - const json = await response.json(); - return json.access_token; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams(): Omit { - return { - stream: this.streaming, - user_id: this.userId, - temperature: this.temperature, - top_p: this.topP, - penalty_score: this.penaltyScore, - }; - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams() { - return { - model_name: this.modelName, - ...this.invocationParams(), - }; - } - - /** @ignore */ - async _generate( - messages: BaseMessage[], - options?: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const tokenUsage: TokenUsage = {}; - - const params = this.invocationParams(); - - // Wenxin requires the system message to be put in the params, not messages array - const systemMessage = messages.find( - (message) => message._getType() === "system" - ); - if (systemMessage) { - // eslint-disable-next-line no-param-reassign - messages = messages.filter((message) => message !== systemMessage); - params.system = systemMessage.text; - } - const messagesMapped: WenxinMessage[] = messages.map((message) => ({ - role: messageToWenxinRole(message), - content: message.text, - })); - - const data = params.stream - ? await new Promise((resolve, reject) => { - let response: ChatCompletionResponse; - let rejected = false; - let resolved = false; - this.completionWithRetry( - { - ...params, - messages: messagesMapped, - }, - true, - options?.signal, - (event) => { - const data = JSON.parse(event.data); - - if (data?.error_code) { - if (rejected) { - return; - } - rejected = true; - reject(new Error(data?.error_msg)); - return; - } - - const message = data as { - id: string; - object: string; - created: number; - sentence_id?: number; - is_end: boolean; - result: string; - need_clear_history: boolean; - usage: TokenUsage; - }; - - // on the first message set the response properties - if (!response) { - response = { - id: message.id, - object: message.object, - created: message.created, - result: message.result, - need_clear_history: message.need_clear_history, - usage: message.usage, - }; - } else { - response.result += message.result; - response.created = message.created; - response.need_clear_history = message.need_clear_history; - response.usage = message.usage; - } - - // TODO this should pass part.index to the callback - // when that's supported there - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(message.result ?? ""); - - if (message.is_end) { - if (resolved || rejected) { - return; - } - resolved = true; - resolve(response); - } - } - ).catch((error) => { - if (!rejected) { - rejected = true; - reject(error); - } - }); - }) - : await this.completionWithRetry( - { - ...params, - messages: messagesMapped, - }, - false, - options?.signal - ).then((data) => { - if (data?.error_code) { - throw new Error(data?.error_msg); - } - return data; - }); - - const { - completion_tokens: completionTokens, - prompt_tokens: promptTokens, - total_tokens: totalTokens, - } = data.usage ?? {}; - - if (completionTokens) { - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + completionTokens; - } - - if (promptTokens) { - tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; - } - - if (totalTokens) { - tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; - } - - const generations: ChatGeneration[] = []; - const text = data.result ?? ""; - generations.push({ - text, - message: new AIMessage(text), - }); - return { - generations, - llmOutput: { tokenUsage }, - }; - } - - /** @ignore */ - async completionWithRetry( - request: ChatCompletionRequest, - stream: boolean, - signal?: AbortSignal, - onmessage?: (event: MessageEvent) => void - ) { - // The first run will get the accessToken - if (!this.accessToken) { - this.accessToken = await this.getAccessToken(); - } - - const makeCompletionRequest = async () => { - const url = `${this.apiUrl}?access_token=${this.accessToken}`; - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(request), - signal, - }); - - if (!stream) { - return response.json(); - } else { - if (response.body) { - // response will not be a stream if an error occurred - if ( - !response.headers - .get("content-type") - ?.startsWith("text/event-stream") - ) { - onmessage?.( - new MessageEvent("message", { - data: await response.text(), - }) - ); - return; - } - - const reader = response.body.getReader(); - - const decoder = new TextDecoder("utf-8"); - let data = ""; - - let continueReading = true; - while (continueReading) { - const { done, value } = await reader.read(); - if (done) { - continueReading = false; - break; - } - data += decoder.decode(value); - - let continueProcessing = true; - while (continueProcessing) { - const newlineIndex = data.indexOf("\n"); - if (newlineIndex === -1) { - continueProcessing = false; - break; - } - const line = data.slice(0, newlineIndex); - data = data.slice(newlineIndex + 1); - - if (line.startsWith("data:")) { - const event = new MessageEvent("message", { - data: line.slice("data:".length).trim(), - }); - onmessage?.(event); - } - } - } - } - } - }; - return this.caller.call(makeCompletionRequest); - } - - _llmType() { - return "baiduwenxin"; - } - - /** @ignore */ - _combineLLMOutput() { - return []; - } -} +export * from "@langchain/community/chat_models/baiduwenxin"; \ No newline at end of file diff --git a/langchain/src/chat_models/bedrock/index.ts b/langchain/src/chat_models/bedrock/index.ts index 04fabc096d00..64061633047e 100644 --- a/langchain/src/chat_models/bedrock/index.ts +++ b/langchain/src/chat_models/bedrock/index.ts @@ -1,38 +1 @@ -import { defaultProvider } from "@aws-sdk/credential-provider-node"; -import { BaseBedrockInput } from "../../util/bedrock.js"; -import { BedrockChat as BaseBedrockChat } from "./web.js"; -import { BaseChatModelParams } from "../base.js"; - -/** - * @example - * ```typescript - * const model = new BedrockChat({ - * model: "anthropic.claude-v2", - * region: "us-east-1", - * }); - * const res = await model.invoke([{ content: "Tell me a joke" }]); - * console.log(res); - * ``` - */ -export class BedrockChat extends BaseBedrockChat { - static lc_name() { - return "BedrockChat"; - } - - constructor(fields?: Partial & BaseChatModelParams) { - super({ - ...fields, - credentials: fields?.credentials ?? defaultProvider(), - }); - } -} - -export { - convertMessagesToPromptAnthropic, - convertMessagesToPrompt, -} from "./web.js"; - -/** - * @deprecated Use `BedrockChat` instead. - */ -export const ChatBedrock = BedrockChat; +export * from "@langchain/community/chat_models/bedrock"; \ No newline at end of file diff --git a/langchain/src/chat_models/bedrock/web.ts b/langchain/src/chat_models/bedrock/web.ts index 0431f5baa359..c21182a1ce2d 100644 --- a/langchain/src/chat_models/bedrock/web.ts +++ b/langchain/src/chat_models/bedrock/web.ts @@ -1,431 +1 @@ -import { SignatureV4 } from "@smithy/signature-v4"; -import { HttpRequest } from "@smithy/protocol-http"; -import { EventStreamCodec } from "@smithy/eventstream-codec"; -import { fromUtf8, toUtf8 } from "@smithy/util-utf8"; -import { Sha256 } from "@aws-crypto/sha256-js"; - -import { - BaseBedrockInput, - BedrockLLMInputOutputAdapter, - type CredentialType, -} from "../../util/bedrock.js"; -import { getEnvironmentVariable } from "../../util/env.js"; -import { SimpleChatModel, BaseChatModelParams } from "../base.js"; -import { CallbackManagerForLLMRun } from "../../callbacks/manager.js"; -import { - AIMessageChunk, - BaseMessage, - AIMessage, - ChatGenerationChunk, - ChatMessage, -} from "../../schema/index.js"; -import type { SerializedFields } from "../../load/map_keys.js"; - -function convertOneMessageToText( - message: BaseMessage, - humanPrompt: string, - aiPrompt: string -): string { - if (message._getType() === "human") { - return `${humanPrompt} ${message.content}`; - } else if (message._getType() === "ai") { - return `${aiPrompt} ${message.content}`; - } else if (message._getType() === "system") { - return `${humanPrompt} ${message.content}`; - } else if (ChatMessage.isInstance(message)) { - return `\n\n${ - message.role[0].toUpperCase() + message.role.slice(1) - }: {message.content}`; - } - throw new Error(`Unknown role: ${message._getType()}`); -} - -export function convertMessagesToPromptAnthropic( - messages: BaseMessage[], - humanPrompt = "\n\nHuman:", - aiPrompt = "\n\nAssistant:" -): string { - const messagesCopy = [...messages]; - - if ( - messagesCopy.length === 0 || - messagesCopy[messagesCopy.length - 1]._getType() !== "ai" - ) { - messagesCopy.push(new AIMessage({ content: "" })); - } - - return messagesCopy - .map((message) => convertOneMessageToText(message, humanPrompt, aiPrompt)) - .join(""); -} - -/** - * Function that converts an array of messages into a single string prompt - * that can be used as input for a chat model. It delegates the conversion - * logic to the appropriate provider-specific function. - * @param messages Array of messages to be converted. - * @param options Options to be used during the conversion. - * @returns A string prompt that can be used as input for a chat model. - */ -export function convertMessagesToPrompt( - messages: BaseMessage[], - provider: string -): string { - if (provider === "anthropic") { - return convertMessagesToPromptAnthropic(messages); - } - throw new Error(`Provider ${provider} does not support chat.`); -} - -/** - * A type of Large Language Model (LLM) that interacts with the Bedrock - * service. It extends the base `LLM` class and implements the - * `BaseBedrockInput` interface. The class is designed to authenticate and - * interact with the Bedrock service, which is a part of Amazon Web - * Services (AWS). It uses AWS credentials for authentication and can be - * configured with various parameters such as the model to use, the AWS - * region, and the maximum number of tokens to generate. - * @example - * ```typescript - * const model = new BedrockChat({ - * model: "anthropic.claude-v2", - * region: "us-east-1", - * }); - * const res = await model.invoke([{ content: "Tell me a joke" }]); - * console.log(res); - * ``` - */ -export class BedrockChat extends SimpleChatModel implements BaseBedrockInput { - model = "amazon.titan-tg1-large"; - - region: string; - - credentials: CredentialType; - - temperature?: number | undefined = undefined; - - maxTokens?: number | undefined = undefined; - - fetchFn: typeof fetch; - - endpointHost?: string; - - /** @deprecated */ - stopSequences?: string[]; - - modelKwargs?: Record; - - codec: EventStreamCodec = new EventStreamCodec(toUtf8, fromUtf8); - - streaming = false; - - lc_serializable = true; - - get lc_aliases(): Record { - return { - model: "model_id", - region: "region_name", - }; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - "credentials.accessKeyId": "BEDROCK_AWS_ACCESS_KEY_ID", - "credentials.secretAccessKey": "BEDROCK_AWS_SECRET_ACCESS_KEY", - }; - } - - get lc_attributes(): SerializedFields | undefined { - return { region: this.region }; - } - - _llmType() { - return "bedrock"; - } - - static lc_name() { - return "BedrockChat"; - } - - constructor(fields?: Partial & BaseChatModelParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - const allowedModels = ["ai21", "anthropic", "amazon", "cohere", "meta"]; - if (!allowedModels.includes(this.model.split(".")[0])) { - throw new Error( - `Unknown model: '${this.model}', only these are supported: ${allowedModels}` - ); - } - const region = - fields?.region ?? getEnvironmentVariable("AWS_DEFAULT_REGION"); - if (!region) { - throw new Error( - "Please set the AWS_DEFAULT_REGION environment variable or pass it to the constructor as the region field." - ); - } - this.region = region; - - const credentials = fields?.credentials; - if (!credentials) { - throw new Error( - "Please set the AWS credentials in the 'credentials' field." - ); - } - this.credentials = credentials; - - this.temperature = fields?.temperature ?? this.temperature; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.fetchFn = fields?.fetchFn ?? fetch.bind(globalThis); - this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl; - this.stopSequences = fields?.stopSequences; - this.modelKwargs = fields?.modelKwargs; - this.streaming = fields?.streaming ?? this.streaming; - } - - /** Call out to Bedrock service model. - Arguments: - prompt: The prompt to pass into the model. - - Returns: - The string generated by the model. - - Example: - response = model.call("Tell me a joke.") - */ - async _call( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const service = "bedrock-runtime"; - const endpointHost = - this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; - const provider = this.model.split(".")[0]; - if (this.streaming) { - const stream = this._streamResponseChunks(messages, options, runManager); - let finalResult: ChatGenerationChunk | undefined; - for await (const chunk of stream) { - if (finalResult === undefined) { - finalResult = chunk; - } else { - finalResult = finalResult.concat(chunk); - } - } - const messageContent = finalResult?.message.content; - if (messageContent && typeof messageContent !== "string") { - throw new Error( - "Non-string output for ChatBedrock is currently not supported." - ); - } - return messageContent ?? ""; - } - - const response = await this._signedFetch(messages, options, { - bedrockMethod: "invoke", - endpointHost, - provider, - }); - const json = await response.json(); - if (!response.ok) { - throw new Error( - `Error ${response.status}: ${json.message ?? JSON.stringify(json)}` - ); - } - const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); - return text; - } - - async _signedFetch( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - fields: { - bedrockMethod: "invoke" | "invoke-with-response-stream"; - endpointHost: string; - provider: string; - } - ) { - const { bedrockMethod, endpointHost, provider } = fields; - const inputBody = BedrockLLMInputOutputAdapter.prepareInput( - provider, - convertMessagesToPromptAnthropic(messages), - this.maxTokens, - this.temperature, - options.stop ?? this.stopSequences, - this.modelKwargs, - fields.bedrockMethod - ); - - const url = new URL( - `https://${endpointHost}/model/${this.model}/${bedrockMethod}` - ); - - const request = new HttpRequest({ - hostname: url.hostname, - path: url.pathname, - protocol: url.protocol, - method: "POST", // method must be uppercase - body: JSON.stringify(inputBody), - query: Object.fromEntries(url.searchParams.entries()), - headers: { - // host is required by AWS Signature V4: https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - host: url.host, - accept: "application/json", - "content-type": "application/json", - }, - }); - - const signer = new SignatureV4({ - credentials: this.credentials, - service: "bedrock", - region: this.region, - sha256: Sha256, - }); - - const signedRequest = await signer.sign(request); - - // Send request to AWS using the low-level fetch API - const response = await this.caller.callWithOptions( - { signal: options.signal }, - async () => - this.fetchFn(url, { - headers: signedRequest.headers, - body: signedRequest.body, - method: signedRequest.method, - }) - ); - return response; - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const provider = this.model.split(".")[0]; - const service = "bedrock-runtime"; - - const endpointHost = - this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; - - const bedrockMethod = - provider === "anthropic" || provider === "cohere" || provider === "meta" - ? "invoke-with-response-stream" - : "invoke"; - - const response = await this._signedFetch(messages, options, { - bedrockMethod, - endpointHost, - provider, - }); - - if (response.status < 200 || response.status >= 300) { - throw Error( - `Failed to access underlying url '${endpointHost}': got ${ - response.status - } ${response.statusText}: ${await response.text()}` - ); - } - - if ( - provider === "anthropic" || - provider === "cohere" || - provider === "meta" - ) { - const reader = response.body?.getReader(); - const decoder = new TextDecoder(); - for await (const chunk of this._readChunks(reader)) { - const event = this.codec.decode(chunk); - if ( - (event.headers[":event-type"] !== undefined && - event.headers[":event-type"].value !== "chunk") || - event.headers[":content-type"].value !== "application/json" - ) { - throw Error(`Failed to get event chunk: got ${chunk}`); - } - const body = JSON.parse(decoder.decode(event.body)); - if (body.message) { - throw new Error(body.message); - } - if (body.bytes !== undefined) { - const chunkResult = JSON.parse( - decoder.decode( - Uint8Array.from(atob(body.bytes), (m) => m.codePointAt(0) ?? 0) - ) - ); - const text = BedrockLLMInputOutputAdapter.prepareOutput( - provider, - chunkResult - ); - yield new ChatGenerationChunk({ - text, - message: new AIMessageChunk({ content: text }), - }); - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(text); - } - } - } else { - const json = await response.json(); - const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); - yield new ChatGenerationChunk({ - text, - message: new AIMessageChunk({ content: text }), - }); - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(text); - } - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - _readChunks(reader: any) { - function _concatChunks(a: Uint8Array, b: Uint8Array) { - const newBuffer = new Uint8Array(a.length + b.length); - newBuffer.set(a); - newBuffer.set(b, a.length); - return newBuffer; - } - - function getMessageLength(buffer: Uint8Array) { - if (buffer.byteLength === 0) return 0; - const view = new DataView( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength - ); - - return view.getUint32(0, false); - } - - return { - async *[Symbol.asyncIterator]() { - let readResult = await reader.read(); - - let buffer: Uint8Array = new Uint8Array(0); - while (!readResult.done) { - const chunk: Uint8Array = readResult.value; - - buffer = _concatChunks(buffer, chunk); - let messageLength = getMessageLength(buffer); - - while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) { - yield buffer.slice(0, messageLength); - buffer = buffer.slice(messageLength); - messageLength = getMessageLength(buffer); - } - - readResult = await reader.read(); - } - }, - }; - } - - _combineLLMOutput() { - return {}; - } -} - -/** - * @deprecated Use `BedrockChat` instead. - */ -export const ChatBedrock = BedrockChat; +export * from "@langchain/community/chat_models/bedrock/web"; \ No newline at end of file diff --git a/langchain/src/chat_models/cloudflare_workersai.ts b/langchain/src/chat_models/cloudflare_workersai.ts index b8d2f5971814..702fbab8dc60 100644 --- a/langchain/src/chat_models/cloudflare_workersai.ts +++ b/langchain/src/chat_models/cloudflare_workersai.ts @@ -1,247 +1 @@ -import { SimpleChatModel, BaseChatModelParams } from "./base.js"; -import { BaseLanguageModelCallOptions } from "../base_language/index.js"; -import { - AIMessageChunk, - BaseMessage, - ChatGenerationChunk, - ChatMessage, -} from "../schema/index.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { CloudflareWorkersAIInput } from "../llms/cloudflare_workersai.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; - -/** - * An interface defining the options for a Cloudflare Workers AI call. It extends - * the BaseLanguageModelCallOptions interface. - */ -export interface ChatCloudflareWorkersAICallOptions - extends BaseLanguageModelCallOptions {} - -/** - * A class that enables calls to the Cloudflare Workers AI API to access large language - * models in a chat-like fashion. It extends the SimpleChatModel class and - * implements the CloudflareWorkersAIInput interface. - * @example - * ```typescript - * const model = new ChatCloudflareWorkersAI({ - * model: "@cf/meta/llama-2-7b-chat-int8", - * cloudflareAccountId: process.env.CLOUDFLARE_ACCOUNT_ID, - * cloudflareApiToken: process.env.CLOUDFLARE_API_TOKEN - * }); - * - * const response = await model.invoke([ - * ["system", "You are a helpful assistant that translates English to German."], - * ["human", `Translate "I love programming".`] - * ]); - * - * console.log(response); - * ``` - */ -export class ChatCloudflareWorkersAI - extends SimpleChatModel - implements CloudflareWorkersAIInput -{ - static lc_name() { - return "ChatCloudflareWorkersAI"; - } - - lc_serializable = true; - - model = "@cf/meta/llama-2-7b-chat-int8"; - - cloudflareAccountId?: string; - - cloudflareApiToken?: string; - - baseUrl: string; - - streaming = false; - - constructor(fields?: CloudflareWorkersAIInput & BaseChatModelParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - this.streaming = fields?.streaming ?? this.streaming; - this.cloudflareAccountId = - fields?.cloudflareAccountId ?? - getEnvironmentVariable("CLOUDFLARE_ACCOUNT_ID"); - this.cloudflareApiToken = - fields?.cloudflareApiToken ?? - getEnvironmentVariable("CLOUDFLARE_API_TOKEN"); - this.baseUrl = - fields?.baseUrl ?? - `https://api.cloudflare.com/client/v4/accounts/${this.cloudflareAccountId}/ai/run`; - if (this.baseUrl.endsWith("/")) { - this.baseUrl = this.baseUrl.slice(0, -1); - } - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - cloudflareApiToken: "CLOUDFLARE_API_TOKEN", - }; - } - - _llmType() { - return "cloudflare"; - } - - /** Get the identifying parameters for this LLM. */ - get identifyingParams() { - return { model: this.model }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams(_options?: this["ParsedCallOptions"]) { - return { - model: this.model, - }; - } - - _combineLLMOutput() { - return {}; - } - - /** - * Method to validate the environment. - */ - validateEnvironment() { - if (!this.cloudflareAccountId) { - throw new Error( - `No Cloudflare account ID found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_ACCOUNT_ID" in your environment variables.` - ); - } - if (!this.cloudflareApiToken) { - throw new Error( - `No Cloudflare API key found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_API_KEY" in your environment variables.` - ); - } - } - - async _request( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - stream?: boolean - ) { - this.validateEnvironment(); - const url = `${this.baseUrl}/${this.model}`; - const headers = { - Authorization: `Bearer ${this.cloudflareApiToken}`, - "Content-Type": "application/json", - }; - - const formattedMessages = this._formatMessages(messages); - - const data = { messages: formattedMessages, stream }; - return this.caller.call(async () => { - const response = await fetch(url, { - method: "POST", - headers, - body: JSON.stringify(data), - signal: options.signal, - }); - if (!response.ok) { - const error = new Error( - `Cloudflare LLM call failed with status code ${response.status}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).response = response; - throw error; - } - return response; - }); - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const response = await this._request(messages, options, true); - if (!response.body) { - throw new Error("Empty response from Cloudflare. Please try again."); - } - const stream = convertEventStreamToIterableReadableDataStream( - response.body - ); - for await (const chunk of stream) { - if (chunk !== "[DONE]") { - const parsedChunk = JSON.parse(chunk); - const generationChunk = new ChatGenerationChunk({ - message: new AIMessageChunk({ content: parsedChunk.response }), - text: parsedChunk.response, - }); - yield generationChunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(generationChunk.text ?? ""); - } - } - } - - protected _formatMessages( - messages: BaseMessage[] - ): { role: string; content: string }[] { - const formattedMessages = messages.map((message) => { - let role; - if (message._getType() === "human") { - role = "user"; - } else if (message._getType() === "ai") { - role = "assistant"; - } else if (message._getType() === "system") { - role = "system"; - } else if (ChatMessage.isInstance(message)) { - role = message.role; - } else { - console.warn( - `Unsupported message type passed to Cloudflare: "${message._getType()}"` - ); - role = "user"; - } - if (typeof message.content !== "string") { - throw new Error( - "ChatCloudflareWorkersAI currently does not support non-string message content." - ); - } - return { - role, - content: message.content, - }; - }); - return formattedMessages; - } - - /** @ignore */ - async _call( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - if (!this.streaming) { - const response = await this._request(messages, options); - - const responseData = await response.json(); - - return responseData.result.response; - } else { - const stream = this._streamResponseChunks(messages, options, runManager); - let finalResult: ChatGenerationChunk | undefined; - for await (const chunk of stream) { - if (finalResult === undefined) { - finalResult = chunk; - } else { - finalResult = finalResult.concat(chunk); - } - } - const messageContent = finalResult?.message.content; - if (messageContent && typeof messageContent !== "string") { - throw new Error( - "Non-string output for ChatCloudflareWorkersAI is currently not supported." - ); - } - return messageContent ?? ""; - } - } -} +export * from "@langchain/community/chat_models/cloudflare_workersai"; \ No newline at end of file diff --git a/langchain/src/chat_models/fireworks.ts b/langchain/src/chat_models/fireworks.ts index 29e12cc34ea2..9903af0282f9 100644 --- a/langchain/src/chat_models/fireworks.ts +++ b/langchain/src/chat_models/fireworks.ts @@ -1,137 +1 @@ -import type { OpenAI as OpenAIClient } from "openai"; -import type { ChatOpenAICallOptions, OpenAIChatInput } from "./openai.js"; -import type { OpenAICoreRequestOptions } from "../types/openai-types.js"; -import type { BaseChatModelParams } from "./base.js"; -import { ChatOpenAI } from "./openai.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -type FireworksUnsupportedArgs = - | "frequencyPenalty" - | "presencePenalty" - | "logitBias" - | "functions"; - -type FireworksUnsupportedCallOptions = "functions" | "function_call" | "tools"; - -export type ChatFireworksCallOptions = Partial< - Omit ->; - -/** - * Wrapper around Fireworks API for large language models fine-tuned for chat - * - * Fireworks API is compatible to the OpenAI API with some limitations described in - * https://readme.fireworks.ai/docs/openai-compatibility. - * - * To use, you should have the `openai` package installed and - * the `FIREWORKS_API_KEY` environment variable set. - * @example - * ```typescript - * const model = new ChatFireworks({ - * temperature: 0.9, - * fireworksApiKey: "YOUR-API-KEY", - * }); - * - * const response = await model.invoke("Hello, how are you?"); - * console.log(response); - * ``` - */ -export class ChatFireworks extends ChatOpenAI { - static lc_name() { - return "ChatFireworks"; - } - - _llmType() { - return "fireworks"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - fireworksApiKey: "FIREWORKS_API_KEY", - }; - } - - lc_serializable = true; - - fireworksApiKey?: string; - - constructor( - fields?: Partial< - Omit - > & - BaseChatModelParams & { fireworksApiKey?: string } - ) { - const fireworksApiKey = - fields?.fireworksApiKey || getEnvironmentVariable("FIREWORKS_API_KEY"); - - if (!fireworksApiKey) { - throw new Error( - `Fireworks API key not found. Please set the FIREWORKS_API_KEY environment variable or provide the key into "fireworksApiKey"` - ); - } - - super({ - ...fields, - modelName: - fields?.modelName || "accounts/fireworks/models/llama-v2-13b-chat", - openAIApiKey: fireworksApiKey, - configuration: { - baseURL: "https://api.fireworks.ai/inference/v1", - }, - }); - - this.fireworksApiKey = fireworksApiKey; - } - - toJSON() { - const result = super.toJSON(); - - if ( - "kwargs" in result && - typeof result.kwargs === "object" && - result.kwargs != null - ) { - delete result.kwargs.openai_api_key; - delete result.kwargs.configuration; - } - - return result; - } - - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsStreaming, - options?: OpenAICoreRequestOptions - ): Promise>; - - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise; - - /** - * Calls the Fireworks API with retry logic in case of failures. - * @param request The request to send to the Fireworks API. - * @param options Optional configuration for the API call. - * @returns The response from the Fireworks API. - */ - async completionWithRetry( - request: - | OpenAIClient.Chat.ChatCompletionCreateParamsStreaming - | OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise< - | AsyncIterable - | OpenAIClient.Chat.Completions.ChatCompletion - > { - delete request.frequency_penalty; - delete request.presence_penalty; - delete request.logit_bias; - delete request.functions; - - if (request.stream === true) { - return super.completionWithRetry(request, options); - } - - return super.completionWithRetry(request, options); - } -} +export * from "@langchain/community/chat_models/fireworks"; \ No newline at end of file diff --git a/langchain/src/chat_models/googlepalm.ts b/langchain/src/chat_models/googlepalm.ts index 56cca1023943..ff52c6cd4cf2 100644 --- a/langchain/src/chat_models/googlepalm.ts +++ b/langchain/src/chat_models/googlepalm.ts @@ -1,340 +1 @@ -import { DiscussServiceClient } from "@google-ai/generativelanguage"; -import type { protos } from "@google-ai/generativelanguage"; -import { GoogleAuth } from "google-auth-library"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { - AIMessage, - BaseMessage, - ChatMessage, - ChatResult, - isBaseMessage, -} from "../schema/index.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { BaseChatModel, BaseChatModelParams } from "./base.js"; - -export type BaseMessageExamplePair = { - input: BaseMessage; - output: BaseMessage; -}; - -/** - * An interface defining the input to the ChatGooglePaLM class. - */ -export interface GooglePaLMChatInput extends BaseChatModelParams { - /** - * Model Name to use - * - * Note: The format must follow the pattern - `models/{model}` - */ - modelName?: string; - - /** - * Controls the randomness of the output. - * - * Values can range from [0.0,1.0], inclusive. A value closer to 1.0 - * will produce responses that are more varied and creative, while - * a value closer to 0.0 will typically result in less surprising - * responses from the model. - * - * Note: The default value varies by model - */ - temperature?: number; - - /** - * Top-p changes how the model selects tokens for output. - * - * Tokens are selected from most probable to least until the sum - * of their probabilities equals the top-p value. - * - * For example, if tokens A, B, and C have a probability of - * .3, .2, and .1 and the top-p value is .5, then the model will - * select either A or B as the next token (using temperature). - * - * Note: The default value varies by model - */ - topP?: number; - - /** - * Top-k changes how the model selects tokens for output. - * - * A top-k of 1 means the selected token is the most probable among - * all tokens in the model’s vocabulary (also called greedy decoding), - * while a top-k of 3 means that the next token is selected from - * among the 3 most probable tokens (using temperature). - * - * Note: The default value varies by model - */ - topK?: number; - - examples?: - | protos.google.ai.generativelanguage.v1beta2.IExample[] - | BaseMessageExamplePair[]; - - /** - * Google Palm API key to use - */ - apiKey?: string; -} - -function getMessageAuthor(message: BaseMessage) { - const type = message._getType(); - if (ChatMessage.isInstance(message)) { - return message.role; - } - return message.name ?? type; -} - -/** - * A class that wraps the Google Palm chat model. - * @example - * ```typescript - * const model = new ChatGooglePaLM({ - * apiKey: "", - * temperature: 0.7, - * modelName: "models/chat-bison-001", - * topK: 40, - * topP: 1, - * examples: [ - * { - * input: new HumanMessage("What is your favorite sock color?"), - * output: new AIMessage("My favorite sock color be arrrr-ange!"), - * }, - * ], - * }); - * const questions = [ - * new SystemMessage( - * "You are a funny assistant that answers in pirate language." - * ), - * new HumanMessage("What is your favorite food?"), - * ]; - * const res = await model.call(questions); - * console.log({ res }); - * ``` - */ -export class ChatGooglePaLM - extends BaseChatModel - implements GooglePaLMChatInput -{ - static lc_name() { - return "ChatGooglePaLM"; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "GOOGLE_PALM_API_KEY", - }; - } - - modelName = "models/chat-bison-001"; - - temperature?: number; // default value chosen based on model - - topP?: number; // default value chosen based on model - - topK?: number; // default value chosen based on model - - examples: protos.google.ai.generativelanguage.v1beta2.IExample[] = []; - - apiKey?: string; - - private client: DiscussServiceClient; - - constructor(fields?: GooglePaLMChatInput) { - super(fields ?? {}); - - this.modelName = fields?.modelName ?? this.modelName; - - this.temperature = fields?.temperature ?? this.temperature; - if (this.temperature && (this.temperature < 0 || this.temperature > 1)) { - throw new Error("`temperature` must be in the range of [0.0,1.0]"); - } - - this.topP = fields?.topP ?? this.topP; - if (this.topP && this.topP < 0) { - throw new Error("`topP` must be a positive integer"); - } - - this.topK = fields?.topK ?? this.topK; - if (this.topK && this.topK < 0) { - throw new Error("`topK` must be a positive integer"); - } - - this.examples = - fields?.examples?.map((example) => { - if ( - (isBaseMessage(example.input) && - typeof example.input.content !== "string") || - (isBaseMessage(example.output) && - typeof example.output.content !== "string") - ) { - throw new Error( - "GooglePaLM example messages may only have string content." - ); - } - return { - input: { - ...example.input, - content: example.input?.content as string, - }, - output: { - ...example.output, - content: example.output?.content as string, - }, - }; - }) ?? this.examples; - - this.apiKey = - fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); - if (!this.apiKey) { - throw new Error( - "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" - ); - } - - this.client = new DiscussServiceClient({ - authClient: new GoogleAuth().fromAPIKey(this.apiKey), - }); - } - - _combineLLMOutput() { - return []; - } - - _llmType() { - return "googlepalm"; - } - - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const palmMessages = await this.caller.callWithOptions( - { signal: options.signal }, - this._generateMessage.bind(this), - this._mapBaseMessagesToPalmMessages(messages), - this._getPalmContextInstruction(messages), - this.examples - ); - const chatResult = this._mapPalmMessagesToChatResult(palmMessages); - - // Google Palm doesn't provide streaming as of now. But to support streaming handlers - // we call the handler with entire response text - void runManager?.handleLLMNewToken( - chatResult.generations.length > 0 ? chatResult.generations[0].text : "" - ); - - return chatResult; - } - - protected async _generateMessage( - messages: protos.google.ai.generativelanguage.v1beta2.IMessage[], - context?: string, - examples?: protos.google.ai.generativelanguage.v1beta2.IExample[] - ): Promise { - const [palmMessages] = await this.client.generateMessage({ - candidateCount: 1, - model: this.modelName, - temperature: this.temperature, - topK: this.topK, - topP: this.topP, - prompt: { - context, - examples, - messages, - }, - }); - return palmMessages; - } - - protected _getPalmContextInstruction( - messages: BaseMessage[] - ): string | undefined { - // get the first message and checks if it's a system 'system' messages - const systemMessage = - messages.length > 0 && getMessageAuthor(messages[0]) === "system" - ? messages[0] - : undefined; - if ( - systemMessage?.content !== undefined && - typeof systemMessage.content !== "string" - ) { - throw new Error("Non-string system message content is not supported."); - } - return systemMessage?.content; - } - - protected _mapBaseMessagesToPalmMessages( - messages: BaseMessage[] - ): protos.google.ai.generativelanguage.v1beta2.IMessage[] { - // remove all 'system' messages - const nonSystemMessages = messages.filter( - (m) => getMessageAuthor(m) !== "system" - ); - - // requires alternate human & ai messages. Throw error if two messages are consecutive - nonSystemMessages.forEach((msg, index) => { - if (index < 1) return; - if ( - getMessageAuthor(msg) === getMessageAuthor(nonSystemMessages[index - 1]) - ) { - throw new Error( - `Google PaLM requires alternate messages between authors` - ); - } - }); - - return nonSystemMessages.map((m) => { - if (typeof m.content !== "string") { - throw new Error( - "ChatGooglePaLM does not support non-string message content." - ); - } - return { - author: getMessageAuthor(m), - content: m.content, - citationMetadata: { - citationSources: m.additional_kwargs.citationSources as - | protos.google.ai.generativelanguage.v1beta2.ICitationSource[] - | undefined, - }, - }; - }); - } - - protected _mapPalmMessagesToChatResult( - msgRes: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse - ): ChatResult { - if ( - msgRes.candidates && - msgRes.candidates.length > 0 && - msgRes.candidates[0] - ) { - const message = msgRes.candidates[0]; - return { - generations: [ - { - text: message.content ?? "", - message: new AIMessage({ - content: message.content ?? "", - name: message.author === null ? undefined : message.author, - additional_kwargs: { - citationSources: message.citationMetadata?.citationSources, - filters: msgRes.filters, // content filters applied - }, - }), - }, - ], - }; - } - // if rejected or error, return empty generations with reason in filters - return { - generations: [], - llmOutput: { - filters: msgRes.filters, - }, - }; - } -} +export * from "@langchain/community/chat_models/googlepalm"; \ No newline at end of file diff --git a/langchain/src/chat_models/googlevertexai/common.ts b/langchain/src/chat_models/googlevertexai/common.ts deleted file mode 100644 index 4ff8b170a271..000000000000 --- a/langchain/src/chat_models/googlevertexai/common.ts +++ /dev/null @@ -1,400 +0,0 @@ -import { BaseChatModel } from "../base.js"; -import { - AIMessage, - AIMessageChunk, - BaseMessage, - ChatGeneration, - ChatGenerationChunk, - ChatMessage, - ChatResult, - LLMResult, -} from "../../schema/index.js"; -import { - GoogleVertexAILLMConnection, - GoogleVertexAIStream, -} from "../../util/googlevertexai-connection.js"; -import { - GoogleVertexAIBaseLLMInput, - GoogleVertexAIBasePrediction, - GoogleVertexAILLMPredictions, - GoogleVertexAIModelParams, -} from "../../types/googlevertexai-types.js"; -import { BaseLanguageModelCallOptions } from "../../base_language/index.js"; -import { CallbackManagerForLLMRun } from "../../callbacks/index.js"; - -/** - * Represents a single "example" exchange that can be provided to - * help illustrate what a model response should look like. - */ -export interface ChatExample { - input: BaseMessage; - output: BaseMessage; -} - -/** - * Represents a single example exchange in the Google Vertex AI chat - * model. - */ -interface GoogleVertexAIChatExample { - input: GoogleVertexAIChatMessage; - output: GoogleVertexAIChatMessage; -} - -/** - * Represents the author of a chat message in the Google Vertex AI chat - * model. - */ -export type GoogleVertexAIChatAuthor = - | "user" // Represents the human for Code and CodeChat models - | "bot" // Represents the AI for Code models - | "system" // Represents the AI for CodeChat models - | "context"; // Represents contextual instructions - -export type GoogleVertexAIChatMessageFields = { - author?: GoogleVertexAIChatAuthor; - content: string; - name?: string; -}; - -/** - * Represents a chat message in the Google Vertex AI chat model. - */ -export class GoogleVertexAIChatMessage { - public author?: GoogleVertexAIChatAuthor; - - public content: string; - - public name?: string; - - constructor(fields: GoogleVertexAIChatMessageFields) { - this.author = fields.author; - this.content = fields.content; - this.name = fields.name; - } - - /** - * Extracts the role of a generic message and maps it to a Google Vertex - * AI chat author. - * @param message The chat message to extract the role from. - * @returns The role of the message mapped to a Google Vertex AI chat author. - */ - static extractGenericMessageCustomRole(message: ChatMessage) { - if ( - message.role !== "system" && - message.role !== "bot" && - message.role !== "user" && - message.role !== "context" - ) { - console.warn(`Unknown message role: ${message.role}`); - } - - return message.role as GoogleVertexAIChatAuthor; - } - - /** - * Maps a message type to a Google Vertex AI chat author. - * @param message The message to map. - * @param model The model to use for mapping. - * @returns The message type mapped to a Google Vertex AI chat author. - */ - static mapMessageTypeToVertexChatAuthor( - message: BaseMessage, - model: string - ): GoogleVertexAIChatAuthor { - const type = message._getType(); - switch (type) { - case "ai": - return model.startsWith("codechat-") ? "system" : "bot"; - case "human": - return "user"; - case "system": - throw new Error( - `System messages are only supported as the first passed message for Google Vertex AI.` - ); - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return GoogleVertexAIChatMessage.extractGenericMessageCustomRole( - message - ); - } - default: - throw new Error(`Unknown / unsupported message type: ${message}`); - } - } - - /** - * Creates a new Google Vertex AI chat message from a base message. - * @param message The base message to convert. - * @param model The model to use for conversion. - * @returns A new Google Vertex AI chat message. - */ - static fromChatMessage(message: BaseMessage, model: string) { - if (typeof message.content !== "string") { - throw new Error( - "ChatGoogleVertexAI does not support non-string message content." - ); - } - return new GoogleVertexAIChatMessage({ - author: GoogleVertexAIChatMessage.mapMessageTypeToVertexChatAuthor( - message, - model - ), - content: message.content, - }); - } -} - -/** - * Represents an instance of the Google Vertex AI chat model. - */ -export interface GoogleVertexAIChatInstance { - context?: string; - examples?: GoogleVertexAIChatExample[]; - messages: GoogleVertexAIChatMessage[]; -} - -/** - * Defines the prediction output of the Google Vertex AI chat model. - */ -export interface GoogleVertexAIChatPrediction - extends GoogleVertexAIBasePrediction { - candidates: GoogleVertexAIChatMessage[]; -} - -/** - * Defines the input to the Google Vertex AI chat model. - */ -export interface GoogleVertexAIChatInput - extends GoogleVertexAIBaseLLMInput { - /** Instructions how the model should respond */ - context?: string; - - /** Help the model understand what an appropriate response is */ - examples?: ChatExample[]; -} - -/** - * Base class for Google Vertex AI chat models. - * Implemented subclasses must provide a GoogleVertexAILLMConnection - * with appropriate auth client. - */ -export class BaseChatGoogleVertexAI - extends BaseChatModel - implements GoogleVertexAIChatInput -{ - lc_serializable = true; - - model = "chat-bison"; - - temperature = 0.2; - - maxOutputTokens = 1024; - - topP = 0.8; - - topK = 40; - - examples: ChatExample[] = []; - - connection: GoogleVertexAILLMConnection< - BaseLanguageModelCallOptions, - GoogleVertexAIChatInstance, - GoogleVertexAIChatPrediction, - AuthOptions - >; - - streamedConnection: GoogleVertexAILLMConnection< - BaseLanguageModelCallOptions, - GoogleVertexAIChatInstance, - GoogleVertexAIChatPrediction, - AuthOptions - >; - - get lc_aliases(): Record { - return { - model: "model_name", - }; - } - - constructor(fields?: GoogleVertexAIChatInput) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - this.temperature = fields?.temperature ?? this.temperature; - this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; - this.topP = fields?.topP ?? this.topP; - this.topK = fields?.topK ?? this.topK; - this.examples = fields?.examples ?? this.examples; - } - - _combineLLMOutput(): LLMResult["llmOutput"] { - // TODO: Combine the safetyAttributes - return []; - } - - async *_streamResponseChunks( - _messages: BaseMessage[], - _options: this["ParsedCallOptions"], - _runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - // Make the call as a streaming request - const instance: GoogleVertexAIChatInstance = this.createInstance(_messages); - const parameters = this.formatParameters(); - const result = await this.streamedConnection.request( - [instance], - parameters, - _options - ); - - // Get the streaming parser of the response - const stream = result.data as GoogleVertexAIStream; - - // Loop until the end of the stream - // During the loop, yield each time we get a chunk from the streaming parser - // that is either available or added to the queue - while (!stream.streamDone) { - const output = await stream.nextChunk(); - const chunk = - output !== null - ? BaseChatGoogleVertexAI.convertPredictionChunk(output) - : new ChatGenerationChunk({ - text: "", - message: new AIMessageChunk(""), - generationInfo: { finishReason: "stop" }, - }); - yield chunk; - } - } - - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"] - ): Promise { - const instance: GoogleVertexAIChatInstance = this.createInstance(messages); - const parameters: GoogleVertexAIModelParams = this.formatParameters(); - - const result = await this.connection.request( - [instance], - parameters, - options - ); - - const generations = - ( - result?.data as GoogleVertexAILLMPredictions - )?.predictions?.map((prediction) => - BaseChatGoogleVertexAI.convertPrediction(prediction) - ) ?? []; - return { - generations, - }; - } - - _llmType(): string { - return "vertexai"; - } - - /** - * Creates an instance of the Google Vertex AI chat model. - * @param messages The messages for the model instance. - * @returns A new instance of the Google Vertex AI chat model. - */ - createInstance(messages: BaseMessage[]): GoogleVertexAIChatInstance { - let context = ""; - let conversationMessages = messages; - if (messages[0]?._getType() === "system") { - if (typeof messages[0].content !== "string") { - throw new Error( - "ChatGoogleVertexAI does not support non-string message content." - ); - } - context = messages[0].content; - conversationMessages = messages.slice(1); - } - // https://cloud.google.com/vertex-ai/docs/generative-ai/chat/test-chat-prompts - if (conversationMessages.length % 2 === 0) { - throw new Error( - `Google Vertex AI requires an odd number of messages to generate a response.` - ); - } - const vertexChatMessages = conversationMessages.map((baseMessage, i) => { - const currMessage = GoogleVertexAIChatMessage.fromChatMessage( - baseMessage, - this.model - ); - const prevMessage = - i > 0 - ? GoogleVertexAIChatMessage.fromChatMessage( - conversationMessages[i - 1], - this.model - ) - : null; - - // https://cloud.google.com/vertex-ai/docs/generative-ai/chat/chat-prompts#messages - if (prevMessage && currMessage.author === prevMessage.author) { - throw new Error( - `Google Vertex AI requires AI and human messages to alternate.` - ); - } - return currMessage; - }); - - const examples = this.examples.map((example) => ({ - input: GoogleVertexAIChatMessage.fromChatMessage( - example.input, - this.model - ), - output: GoogleVertexAIChatMessage.fromChatMessage( - example.output, - this.model - ), - })); - - const instance: GoogleVertexAIChatInstance = { - context, - examples, - messages: vertexChatMessages, - }; - - return instance; - } - - formatParameters(): GoogleVertexAIModelParams { - return { - temperature: this.temperature, - topK: this.topK, - topP: this.topP, - maxOutputTokens: this.maxOutputTokens, - }; - } - - /** - * Converts a prediction from the Google Vertex AI chat model to a chat - * generation. - * @param prediction The prediction to convert. - * @returns The converted chat generation. - */ - static convertPrediction( - prediction: GoogleVertexAIChatPrediction - ): ChatGeneration { - const message = prediction?.candidates[0]; - return { - text: message?.content, - message: new AIMessage(message.content), - generationInfo: prediction, - }; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - static convertPredictionChunk(output: any): ChatGenerationChunk { - const generation: ChatGeneration = BaseChatGoogleVertexAI.convertPrediction( - output.outputs[0] - ); - return new ChatGenerationChunk({ - text: generation.text, - message: new AIMessageChunk(generation.message), - generationInfo: generation.generationInfo, - }); - } -} diff --git a/langchain/src/chat_models/googlevertexai/index.ts b/langchain/src/chat_models/googlevertexai/index.ts index e8a3a07da320..c116919b8417 100644 --- a/langchain/src/chat_models/googlevertexai/index.ts +++ b/langchain/src/chat_models/googlevertexai/index.ts @@ -1,64 +1 @@ -import { GoogleAuthOptions } from "google-auth-library"; -import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; -import { GAuthClient } from "../../util/googlevertexai-gauth.js"; - -/** - * Enables calls to the Google Cloud's Vertex AI API to access - * Large Language Models in a chat-like fashion. - * - * To use, you will need to have one of the following authentication - * methods in place: - * - You are logged into an account permitted to the Google Cloud project - * using Vertex AI. - * - You are running this on a machine using a service account permitted to - * the Google Cloud project using Vertex AI. - * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the - * path of a credentials file for a service account permitted to the - * Google Cloud project using Vertex AI. - * @example - * ```typescript - * const model = new ChatGoogleVertexAI({ - * temperature: 0.7, - * }); - * const result = await model.invoke("What is the capital of France?"); - * ``` - */ -export class ChatGoogleVertexAI extends BaseChatGoogleVertexAI { - static lc_name() { - return "ChatVertexAI"; - } - - constructor(fields?: GoogleVertexAIChatInput) { - super(fields); - - const client = new GAuthClient({ - scopes: "https://www.googleapis.com/auth/cloud-platform", - ...fields?.authOptions, - }); - - this.connection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - false - ); - - this.streamedConnection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - true - ); - } -} - -export type { - ChatExample, - GoogleVertexAIChatAuthor, - GoogleVertexAIChatInput, - GoogleVertexAIChatInstance, - GoogleVertexAIChatMessage, - GoogleVertexAIChatMessageFields, - GoogleVertexAIChatPrediction, -} from "./common.js"; +export * from "@langchain/community/chat_models/googlevertexai"; \ No newline at end of file diff --git a/langchain/src/chat_models/googlevertexai/web.ts b/langchain/src/chat_models/googlevertexai/web.ts index acbaa9144f4c..1bf66b9c05f0 100644 --- a/langchain/src/chat_models/googlevertexai/web.ts +++ b/langchain/src/chat_models/googlevertexai/web.ts @@ -1,66 +1 @@ -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; -import { - WebGoogleAuthOptions, - WebGoogleAuth, -} from "../../util/googlevertexai-webauth.js"; -import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; - -/** - * Enables calls to the Google Cloud's Vertex AI API to access - * Large Language Models in a chat-like fashion. - * - * This entrypoint and class are intended to be used in web environments like Edge - * functions where you do not have access to the file system. It supports passing - * service account credentials directly as a "GOOGLE_VERTEX_AI_WEB_CREDENTIALS" - * environment variable or directly as "authOptions.credentials". - * @example - * ```typescript - * const model = new ChatGoogleVertexAI({ - * temperature: 0.7, - * }); - * const result = await model.invoke( - * "How do I implement a binary search algorithm in Python?", - * ); - * ``` - */ -export class ChatGoogleVertexAI extends BaseChatGoogleVertexAI { - static lc_name() { - return "ChatVertexAI"; - } - - get lc_secrets(): { [key: string]: string } { - return { - "authOptions.credentials": "GOOGLE_VERTEX_AI_WEB_CREDENTIALS", - }; - } - - constructor(fields?: GoogleVertexAIChatInput) { - super(fields); - - const client = new WebGoogleAuth(fields?.authOptions); - - this.connection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - false - ); - - this.streamedConnection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - true - ); - } -} - -export type { - ChatExample, - GoogleVertexAIChatAuthor, - GoogleVertexAIChatInput, - GoogleVertexAIChatInstance, - GoogleVertexAIChatMessage, - GoogleVertexAIChatMessageFields, - GoogleVertexAIChatPrediction, -} from "./common.js"; +export * from "@langchain/community/chat_models/googlevertexai/web"; \ No newline at end of file diff --git a/langchain/src/chat_models/iflytek_xinghuo/common.ts b/langchain/src/chat_models/iflytek_xinghuo/common.ts deleted file mode 100644 index 6854dc22fdd9..000000000000 --- a/langchain/src/chat_models/iflytek_xinghuo/common.ts +++ /dev/null @@ -1,486 +0,0 @@ -import { CallbackManagerForLLMRun } from "../../callbacks/manager.js"; -import { - AIMessage, - BaseMessage, - ChatGeneration, - ChatMessage, - ChatResult, -} from "../../schema/index.js"; -import { getEnvironmentVariable } from "../../util/env.js"; -import { IterableReadableStream } from "../../util/stream.js"; -import { BaseChatModel, BaseChatModelParams } from "../base.js"; -import { - BaseWebSocketStream, - WebSocketStreamOptions, -} from "../../util/iflytek_websocket_stream.js"; - -/** - * Type representing the role of a message in the Xinghuo chat model. - */ -export type XinghuoMessageRole = "assistant" | "user"; - -/** - * Interface representing a message in the Xinghuo chat model. - */ -interface XinghuoMessage { - role: XinghuoMessageRole; - content: string; -} - -/** - * Interface representing the usage of tokens in a chat completion. - */ -interface TokenUsage { - completionTokens?: number; - promptTokens?: number; - totalTokens?: number; -} - -/** - * Interface representing a request for a chat completion. - */ -interface ChatCompletionRequest { - messages: XinghuoMessage[]; - temperature?: number; - max_tokens?: number; - top_k?: number; - chat_id?: string; -} - -export interface ChatCompletionChunk { - header: { - code: number; - message: string; - sid: string; - status: number; - }; - payload: { - choices: { - status: number; - seq: number; - text: { - content: string; - role: XinghuoMessageRole; - index: number; - }[]; - }; - usage?: { - text: { - question_tokens: number; - prompt_tokens: number; - completion_tokens: number; - total_tokens: number; - }; - }; - }; -} - -/** - * Interface representing a response from a chat completion. - */ -interface ChatCompletionResponse { - result: string; - usage?: { - completion_tokens: number; - prompt_tokens: number; - total_tokens: number; - }; -} - -/** - * Function that extracts the custom role of a generic chat message. - * @param message Chat message from which to extract the custom role. - * @returns The custom role of the chat message. - */ -function extractGenericMessageCustomRole(message: ChatMessage) { - if (message.role !== "assistant" && message.role !== "user") { - console.warn(`Unknown message role: ${message.role}`); - } - return message.role as XinghuoMessageRole; -} - -/** - * Function that converts a base message to a Xinghuo message role. - * @param message Base message to convert. - * @returns The Xinghuo message role. - */ -function messageToXinghuoRole(message: BaseMessage): XinghuoMessageRole { - const type = message._getType(); - switch (type) { - case "ai": - return "assistant"; - case "human": - return "user"; - case "system": - throw new Error("System messages should not be here"); - case "function": - throw new Error("Function messages not supported"); - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -declare interface IflytekXinghuoChatInput { - /** Model version to use. Available options are: v1.1, v2.1, v3.1 - * @default "v2.1" - */ - version: string; - - /** - * ID of the end-user who made requests. - */ - userId?: string; - - /** - * APPID to use when making requests. Defaults to the value of - * `IFLYTEK_APPID` environment variable. - */ - iflytekAppid?: string; - - /** - * API key to use when making requests. Defaults to the value of - * `IFLYTEK_API_KEY` environment variable. - */ - iflytekApiKey?: string; - - /** - * API Secret to use when making requests. Defaults to the value of - * `IFLYTEK_API_SECRET` environment variable. - */ - iflytekApiSecret?: string; - - /** Amount of randomness injected into the response. Ranges - * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / - * multiple choice, and temp closer to 1 for creative - * and generative tasks. Defaults to 0.5. - */ - temperature?: number; - - max_tokens?: number; - - top_k?: number; - - streaming?: boolean; -} - -/** - * Wrapper around IflytekXingHuo large language models that use the Chat endpoint. - * - * To use you should have the `IFLYTEK_API_KEY` and `IFLYTEK_API_SECRET` and `IFLYTEK_APPID` - * environment variable set. - * - * @augments BaseChatModel - * @augments IflytekXinghuoChatInput - */ -export abstract class BaseChatIflytekXinghuo - extends BaseChatModel - implements IflytekXinghuoChatInput -{ - static lc_name() { - return "ChatIflytekXinghuo"; - } - - get callKeys(): string[] { - return ["stop", "signal", "options"]; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - iflytekApiKey: "IFLYTEK_API_KEY", - iflytekApiSecret: "IFLYTEK_API_SECRET", - }; - } - - get lc_aliases(): { [key: string]: string } | undefined { - return undefined; - } - - lc_serializable = true; - - version = "v2.1"; - - iflytekAppid: string; - - iflytekApiKey: string; - - iflytekApiSecret: string; - - userId?: string; - - apiUrl: string; - - domain: string; - - temperature = 0.5; - - max_tokens = 2048; - - top_k = 4; - - streaming = false; - - constructor(fields?: Partial & BaseChatModelParams) { - super(fields ?? {}); - - const iflytekAppid = - fields?.iflytekAppid ?? getEnvironmentVariable("IFLYTEK_APPID"); - if (!iflytekAppid) { - throw new Error("Iflytek APPID not found"); - } else { - this.iflytekAppid = iflytekAppid; - } - - const iflytekApiKey = - fields?.iflytekApiKey ?? getEnvironmentVariable("IFLYTEK_API_KEY"); - if (!iflytekApiKey) { - throw new Error("Iflytek API key not found"); - } else { - this.iflytekApiKey = iflytekApiKey; - } - - const iflytekApiSecret = - fields?.iflytekApiSecret ?? getEnvironmentVariable("IFLYTEK_API_SECRET"); - if (!iflytekApiSecret) { - throw new Error("Iflytek API secret not found"); - } else { - this.iflytekApiSecret = iflytekApiSecret; - } - - this.userId = fields?.userId ?? this.userId; - this.streaming = fields?.streaming ?? this.streaming; - this.temperature = fields?.temperature ?? this.temperature; - this.max_tokens = fields?.max_tokens ?? this.max_tokens; - this.top_k = fields?.top_k ?? this.top_k; - - this.version = fields?.version ?? this.version; - if (["v1.1", "v2.1", "v3.1"].includes(this.version)) { - switch (this.version) { - case "v1.1": - this.domain = "general"; - break; - case "v2.1": - this.domain = "generalv2"; - break; - case "v3.1": - this.domain = "generalv3"; - break; - default: - this.domain = "generalv2"; - } - this.apiUrl = `wss://spark-api.xf-yun.com/${this.version}/chat`; - } else { - throw new Error(`Invalid model version: ${this.version}`); - } - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams() { - return { - version: this.version, - ...this.invocationParams(), - }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams(): Omit & { - streaming: boolean; - } { - return { - streaming: this.streaming, - temperature: this.temperature, - top_k: this.top_k, - }; - } - - /** - * Method that retrieves the auth websocketStream for making requests to the Iflytek Xinghuo API. - * @returns The auth websocketStream for making requests to the Iflytek Xinghuo API. - */ - abstract openWebSocketStream>( - options: WebSocketStreamOptions - ): Promise; - - /** - * Calls the Xinghuo API completion. - * @param request The request to send to the Xinghuo API. - * @param signal The signal for the API call. - * @returns The response from the Xinghuo API. - */ - async completion( - request: ChatCompletionRequest, - stream: true, - signal?: AbortSignal - ): Promise>; - - async completion( - request: ChatCompletionRequest, - stream: false, - signal?: AbortSignal - ): Promise; - - async completion( - request: ChatCompletionRequest, - stream: boolean, - signal?: AbortSignal - ): Promise | ChatCompletionResponse> { - const webSocketStream = await this.openWebSocketStream({ - signal, - }); - const connection = await webSocketStream.connection; - const header = { - app_id: this.iflytekAppid, - uid: this.userId, - }; - const parameter = { - chat: { - domain: this.domain, - temperature: request.temperature ?? this.temperature, - max_tokens: request.max_tokens ?? this.max_tokens, - top_k: request.top_k ?? this.top_k, - }, - }; - const payload = { - message: { - text: request.messages, - }, - }; - const message = JSON.stringify({ - header, - parameter, - payload, - }); - const { writable, readable } = connection; - const writer = writable.getWriter(); - await writer.write(message); - const streams = IterableReadableStream.fromReadableStream(readable); - if (stream) { - return streams; - } else { - let response: ChatCompletionResponse = { result: "" }; - for await (const chunk of streams) { - const data = JSON.parse(chunk) as ChatCompletionChunk; - const { header, payload } = data; - if (header.code === 0) { - if (header.status === 0) { - response.result = payload.choices?.text[0]?.content ?? ""; - } else if (header.status === 1) { - response.result += payload.choices?.text[0]?.content ?? ""; - } else if (header.status === 2) { - response = { ...response, usage: payload.usage?.text }; - break; - } - } else { - break; - } - } - void streams.cancel(); - void webSocketStream.close(); - return response; - } - } - - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun | undefined - ): Promise { - const tokenUsage: TokenUsage = {}; - const params = this.invocationParams(); - const messagesMapped: XinghuoMessage[] = messages.map((message) => { - if (typeof message.content !== "string") { - throw new Error( - "ChatIflytekXinghuo does not support non-string message content." - ); - } - return { - role: messageToXinghuoRole(message), - content: message.content, - }; - }); - const data = params.streaming - ? await (async () => { - const streams = await this.completion( - { messages: messagesMapped, ...params }, - true, - options.signal - ); - let response: ChatCompletionResponse = { result: "" }; - for await (const chunk of streams) { - const data = JSON.parse(chunk) as ChatCompletionChunk; - const { header, payload } = data; - if (header.code === 0) { - if (header.status === 0) { - response.result = payload.choices?.text[0]?.content ?? ""; - } else if (header.status === 1) { - response.result += payload.choices?.text[0]?.content ?? ""; - } else if (header.status === 2) { - response = { ...response, usage: payload.usage?.text }; - break; - } - void runManager?.handleLLMNewToken( - payload.choices?.text[0]?.content - ); - } else { - break; - } - } - void streams.cancel(); - return response; - })() - : await this.completion( - { messages: messagesMapped, ...params }, - false, - options.signal - ); - - const { - completion_tokens: completionTokens, - prompt_tokens: promptTokens, - total_tokens: totalTokens, - } = data.usage ?? {}; - - if (completionTokens) { - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + completionTokens; - } - - if (promptTokens) { - tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; - } - - if (totalTokens) { - tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; - } - - const generations: ChatGeneration[] = []; - const text = data.result ?? ""; - generations.push({ - text, - message: new AIMessage(text), - }); - - return { - generations, - llmOutput: { tokenUsage }, - }; - } - - /** @ignore */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - _combineLLMOutput(): Record | undefined { - return []; - } - - _llmType(): string { - return "iflytek_xinghuo"; - } -} diff --git a/langchain/src/chat_models/iflytek_xinghuo/index.ts b/langchain/src/chat_models/iflytek_xinghuo/index.ts index ac54461be18a..3ad6aa3eb4b2 100644 --- a/langchain/src/chat_models/iflytek_xinghuo/index.ts +++ b/langchain/src/chat_models/iflytek_xinghuo/index.ts @@ -1,43 +1 @@ -import WebSocket from "ws"; -import { BaseChatIflytekXinghuo } from "./common.js"; -import { - BaseWebSocketStream, - WebSocketStreamOptions, -} from "../../util/iflytek_websocket_stream.js"; - -class WebSocketStream extends BaseWebSocketStream { - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore - openWebSocket(url: string, options: WebSocketStreamOptions): WebSocket { - return new WebSocket(url, options.protocols ?? []); - } -} - -/** - * @example - * ```typescript - * const model = new ChatIflytekXinghuo(); - * const response = await model.call([new HumanMessage("Nice to meet you!")]); - * console.log(response); - * ``` - */ -export class ChatIflytekXinghuo extends BaseChatIflytekXinghuo { - async openWebSocketStream( - options: WebSocketStreamOptions - ): Promise { - const host = "spark-api.xf-yun.com"; - const date = new Date().toUTCString(); - const url = `GET /${this.version}/chat HTTP/1.1`; - const { createHmac } = await import("node:crypto"); - const hash = createHmac("sha256", this.iflytekApiSecret) - .update(`host: ${host}\ndate: ${date}\n${url}`) - .digest("base64"); - const authorization_origin = `api_key="${this.iflytekApiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${hash}"`; - const authorization = Buffer.from(authorization_origin).toString("base64"); - let authWebSocketUrl = this.apiUrl; - authWebSocketUrl += `?authorization=${authorization}`; - authWebSocketUrl += `&host=${encodeURIComponent(host)}`; - authWebSocketUrl += `&date=${encodeURIComponent(date)}`; - return new WebSocketStream(authWebSocketUrl, options) as WebSocketStream; - } -} +export * from "@langchain/community/chat_models/iflytek_xinghuo"; \ No newline at end of file diff --git a/langchain/src/chat_models/iflytek_xinghuo/web.ts b/langchain/src/chat_models/iflytek_xinghuo/web.ts index 87b372b802ad..0fe64665b992 100644 --- a/langchain/src/chat_models/iflytek_xinghuo/web.ts +++ b/langchain/src/chat_models/iflytek_xinghuo/web.ts @@ -1,49 +1 @@ -import { BaseChatIflytekXinghuo } from "./common.js"; -import { - WebSocketStreamOptions, - BaseWebSocketStream, -} from "../../util/iflytek_websocket_stream.js"; - -class WebSocketStream extends BaseWebSocketStream { - openWebSocket(url: string, options: WebSocketStreamOptions): WebSocket { - return new WebSocket(url, options.protocols ?? []); - } -} - -/** - * @example - * ```typescript - * const model = new ChatIflytekXinghuo(); - * const response = await model.call([new HumanMessage("Nice to meet you!")]); - * console.log(response); - * ``` - */ -export class ChatIflytekXinghuo extends BaseChatIflytekXinghuo { - async openWebSocketStream( - options: WebSocketStreamOptions - ): Promise { - const host = "spark-api.xf-yun.com"; - const date = new Date().toUTCString(); - const url = `GET /${this.version}/chat HTTP/1.1`; - const keyBuffer = new TextEncoder().encode(this.iflytekApiSecret); - const dataBuffer = new TextEncoder().encode( - `host: ${host}\ndate: ${date}\n${url}` - ); - const cryptoKey = await crypto.subtle.importKey( - "raw", - keyBuffer, - { name: "HMAC", hash: "SHA-256" }, - false, - ["sign"] - ); - const signature = await crypto.subtle.sign("HMAC", cryptoKey, dataBuffer); - const hash = window.btoa(String.fromCharCode(...new Uint8Array(signature))); - const authorization_origin = `api_key="${this.iflytekApiKey}", algorithm="hmac-sha256", headers="host date request-line", signature="${hash}"`; - const authorization = window.btoa(authorization_origin); - let authWebSocketUrl = this.apiUrl; - authWebSocketUrl += `?authorization=${authorization}`; - authWebSocketUrl += `&host=${encodeURIComponent(host)}`; - authWebSocketUrl += `&date=${encodeURIComponent(date)}`; - return new WebSocketStream(authWebSocketUrl, options) as WebSocketStream; - } -} +export * from "@langchain/community/chat_models/iflytek_xinghuo/web"; \ No newline at end of file diff --git a/langchain/src/chat_models/llama_cpp.ts b/langchain/src/chat_models/llama_cpp.ts index 3df8b0d2a3c9..1ecd1b008fde 100644 --- a/langchain/src/chat_models/llama_cpp.ts +++ b/langchain/src/chat_models/llama_cpp.ts @@ -1,322 +1 @@ -import { - LlamaModel, - LlamaContext, - LlamaChatSession, - type ConversationInteraction, -} from "node-llama-cpp"; -import { SimpleChatModel, BaseChatModelParams } from "./base.js"; -import { - LlamaBaseCppInputs, - createLlamaModel, - createLlamaContext, -} from "../util/llama_cpp.js"; -import { BaseLanguageModelCallOptions } from "../base_language/index.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { - BaseMessage, - ChatGenerationChunk, - AIMessageChunk, - ChatMessage, -} from "../schema/index.js"; - -/** - * Note that the modelPath is the only required parameter. For testing you - * can set this in the environment variable `LLAMA_PATH`. - */ -export interface LlamaCppInputs - extends LlamaBaseCppInputs, - BaseChatModelParams {} - -export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions { - /** The maximum number of tokens the response should contain. */ - maxTokens?: number; - /** A function called when matching the provided token array */ - onToken?: (tokens: number[]) => void; -} - -/** - * To use this model you need to have the `node-llama-cpp` module installed. - * This can be installed using `npm install -S node-llama-cpp` and the minimum - * version supported in version 2.0.0. - * This also requires that have a locally built version of Llama2 installed. - * @example - * ```typescript - * // Initialize the ChatLlamaCpp model with the path to the model binary file. - * const model = new ChatLlamaCpp({ - * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin", - * temperature: 0.5, - * }); - * - * // Call the model with a message and await the response. - * const response = await model.call([ - * new HumanMessage({ content: "My name is John." }), - * ]); - * - * // Log the response to the console. - * console.log({ response }); - * - * ``` - */ -export class ChatLlamaCpp extends SimpleChatModel { - declare CallOptions: LlamaCppCallOptions; - - static inputs: LlamaCppInputs; - - maxTokens?: number; - - temperature?: number; - - topK?: number; - - topP?: number; - - trimWhitespaceSuffix?: boolean; - - _model: LlamaModel; - - _context: LlamaContext; - - _session: LlamaChatSession | null; - - static lc_name() { - return "ChatLlamaCpp"; - } - - constructor(inputs: LlamaCppInputs) { - super(inputs); - this.maxTokens = inputs?.maxTokens; - this.temperature = inputs?.temperature; - this.topK = inputs?.topK; - this.topP = inputs?.topP; - this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix; - this._model = createLlamaModel(inputs); - this._context = createLlamaContext(this._model, inputs); - this._session = null; - } - - _llmType() { - return "llama2_cpp"; - } - - /** @ignore */ - _combineLLMOutput() { - return {}; - } - - invocationParams() { - return { - maxTokens: this.maxTokens, - temperature: this.temperature, - topK: this.topK, - topP: this.topP, - trimWhitespaceSuffix: this.trimWhitespaceSuffix, - }; - } - - /** @ignore */ - async _call( - messages: BaseMessage[], - options: this["ParsedCallOptions"] - ): Promise { - let prompt = ""; - - if (messages.length > 1) { - // We need to build a new _session - prompt = this._buildSession(messages); - } else if (!this._session) { - prompt = this._buildSession(messages); - } else { - if (typeof messages[0].content !== "string") { - throw new Error( - "ChatLlamaCpp does not support non-string message content in sessions." - ); - } - // If we already have a session then we should just have a single prompt - prompt = messages[0].content; - } - - try { - const promptOptions = { - onToken: options.onToken, - maxTokens: this?.maxTokens, - temperature: this?.temperature, - topK: this?.topK, - topP: this?.topP, - trimWhitespaceSuffix: this?.trimWhitespaceSuffix, - }; - // @ts-expect-error - TS2531: Object is possibly 'null'. - const completion = await this._session.prompt(prompt, promptOptions); - return completion; - } catch (e) { - throw new Error("Error getting prompt completion."); - } - } - - async *_streamResponseChunks( - input: BaseMessage[], - _options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const promptOptions = { - temperature: this?.temperature, - topK: this?.topK, - topP: this?.topP, - }; - - const prompt = this._buildPrompt(input); - - const stream = await this.caller.call(async () => - this._context.evaluate(this._context.encode(prompt), promptOptions) - ); - - for await (const chunk of stream) { - yield new ChatGenerationChunk({ - text: this._context.decode([chunk]), - message: new AIMessageChunk({ - content: this._context.decode([chunk]), - }), - generationInfo: {}, - }); - await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? ""); - } - } - - // This constructs a new session if we need to adding in any sys messages or previous chats - protected _buildSession(messages: BaseMessage[]): string { - let prompt = ""; - let sysMessage = ""; - let noSystemMessages: BaseMessage[] = []; - let interactions: ConversationInteraction[] = []; - - // Let's see if we have a system message - if (messages.findIndex((msg) => msg._getType() === "system") !== -1) { - const sysMessages = messages.filter( - (message) => message._getType() === "system" - ); - - const systemMessageContent = sysMessages[sysMessages.length - 1].content; - - if (typeof systemMessageContent !== "string") { - throw new Error( - "ChatLlamaCpp does not support non-string message content in sessions." - ); - } - // Only use the last provided system message - sysMessage = systemMessageContent; - - // Now filter out the system messages - noSystemMessages = messages.filter( - (message) => message._getType() !== "system" - ); - } else { - noSystemMessages = messages; - } - - // Lets see if we just have a prompt left or are their previous interactions? - if (noSystemMessages.length > 1) { - // Is the last message a prompt? - if ( - noSystemMessages[noSystemMessages.length - 1]._getType() === "human" - ) { - const finalMessageContent = - noSystemMessages[noSystemMessages.length - 1].content; - if (typeof finalMessageContent !== "string") { - throw new Error( - "ChatLlamaCpp does not support non-string message content in sessions." - ); - } - prompt = finalMessageContent; - interactions = this._convertMessagesToInteractions( - noSystemMessages.slice(0, noSystemMessages.length - 1) - ); - } else { - interactions = this._convertMessagesToInteractions(noSystemMessages); - } - } else { - if (typeof noSystemMessages[0].content !== "string") { - throw new Error( - "ChatLlamaCpp does not support non-string message content in sessions." - ); - } - // If there was only a single message we assume it's a prompt - prompt = noSystemMessages[0].content; - } - - // Now lets construct a session according to what we got - if (sysMessage !== "" && interactions.length > 0) { - this._session = new LlamaChatSession({ - context: this._context, - conversationHistory: interactions, - systemPrompt: sysMessage, - }); - } else if (sysMessage !== "" && interactions.length === 0) { - this._session = new LlamaChatSession({ - context: this._context, - systemPrompt: sysMessage, - }); - } else if (sysMessage === "" && interactions.length > 0) { - this._session = new LlamaChatSession({ - context: this._context, - conversationHistory: interactions, - }); - } else { - this._session = new LlamaChatSession({ - context: this._context, - }); - } - - return prompt; - } - - // This builds a an array of interactions - protected _convertMessagesToInteractions( - messages: BaseMessage[] - ): ConversationInteraction[] { - const result: ConversationInteraction[] = []; - - for (let i = 0; i < messages.length; i += 2) { - if (i + 1 < messages.length) { - const prompt = messages[i].content; - const response = messages[i + 1].content; - if (typeof prompt !== "string" || typeof response !== "string") { - throw new Error( - "ChatLlamaCpp does not support non-string message content." - ); - } - result.push({ - prompt, - response, - }); - } - } - - return result; - } - - protected _buildPrompt(input: BaseMessage[]): string { - const prompt = input - .map((message) => { - let messageText; - if (message._getType() === "human") { - messageText = `[INST] ${message.content} [/INST]`; - } else if (message._getType() === "ai") { - messageText = message.content; - } else if (message._getType() === "system") { - messageText = `<> ${message.content} <>`; - } else if (ChatMessage.isInstance(message)) { - messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice( - 1 - )}: ${message.content}`; - } else { - console.warn( - `Unsupported message type passed to llama_cpp: "${message._getType()}"` - ); - messageText = ""; - } - return messageText; - }) - .join("\n"); - - return prompt; - } -} +export * from "@langchain/community/chat_models/llama_cpp"; \ No newline at end of file diff --git a/langchain/src/chat_models/minimax.ts b/langchain/src/chat_models/minimax.ts index c0e521d6fec0..7cc9bd240b18 100644 --- a/langchain/src/chat_models/minimax.ts +++ b/langchain/src/chat_models/minimax.ts @@ -1,880 +1 @@ -import type { OpenAI as OpenAIClient } from "openai"; - -import { BaseChatModel, BaseChatModelParams } from "./base.js"; -import { - AIMessage, - BaseMessage, - ChatGeneration, - ChatMessage, - ChatResult, - HumanMessage, -} from "../schema/index.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { StructuredTool } from "../tools/index.js"; -import { BaseFunctionCallOptions } from "../base_language/index.js"; -import { formatToOpenAIFunction } from "../tools/convert_to_openai.js"; - -/** - * Type representing the sender_type of a message in the Minimax chat model. - */ -export type MinimaxMessageRole = "BOT" | "USER" | "FUNCTION"; - -/** - * Interface representing a message in the Minimax chat model. - */ -interface MinimaxChatCompletionRequestMessage { - sender_type: MinimaxMessageRole; - sender_name?: string; - text: string; -} - -/** - * Interface representing a request for a chat completion. - */ -interface MinimaxChatCompletionRequest { - model: string; - messages: MinimaxChatCompletionRequestMessage[]; - stream?: boolean; - prompt?: string; - temperature?: number; - top_p?: number; - tokens_to_generate?: number; - skip_info_mask?: boolean; - mask_sensitive_info?: boolean; - beam_width?: number; - use_standard_sse?: boolean; - role_meta?: RoleMeta; - bot_setting?: BotSetting[]; - reply_constraints?: ReplyConstraints; - sample_messages?: MinimaxChatCompletionRequestMessage[]; - /** - * A list of functions the model may generate JSON inputs for. - * @type {Array} - */ - functions?: OpenAIClient.Chat.ChatCompletionCreateParams.Function[]; - plugins?: string[]; -} - -interface RoleMeta { - role_meta: string; - bot_name: string; -} - -interface RawGlyph { - type: "raw"; - raw_glyph: string; -} - -interface JsonGlyph { - type: "json_value"; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - json_properties: any; -} - -type ReplyConstraintsGlyph = RawGlyph | JsonGlyph; - -interface ReplyConstraints { - sender_type: string; - sender_name: string; - glyph?: ReplyConstraintsGlyph; -} - -interface BotSetting { - content: string; - bot_name: string; -} - -export declare interface ConfigurationParameters { - basePath?: string; - headers?: Record; -} - -/** - * Interface defining the input to the ChatMinimax class. - */ -declare interface MinimaxChatInputBase { - /** Model name to use - * @default "abab5.5-chat" - */ - modelName: string; - - /** Whether to stream the results or not. Defaults to false. */ - streaming?: boolean; - - prefixMessages?: MinimaxChatCompletionRequestMessage[]; - - /** - * API key to use when making requests. Defaults to the value of - * `MINIMAX_GROUP_ID` environment variable. - */ - minimaxGroupId?: string; - - /** - * Secret key to use when making requests. Defaults to the value of - * `MINIMAX_API_KEY` environment variable. - */ - minimaxApiKey?: string; - - /** Amount of randomness injected into the response. Ranges - * from 0 to 1 (0 is not included). Use temp closer to 0 for analytical / - * multiple choice, and temp closer to 1 for creative - * and generative tasks. Defaults to 0.95. - */ - temperature?: number; - - /** - * The smaller the sampling method, the more determinate the result; - * the larger the number, the more random the result. - */ - topP?: number; - - /** - * Enable Chatcompletion pro - */ - proVersion?: boolean; - - /** - * Pay attention to the maximum number of tokens generated, - * this parameter does not affect the generation effect of the model itself, - * but only realizes the function by truncating the tokens exceeding the limit. - * It is necessary to ensure that the number of tokens of the input context plus this value is less than 6144 or 16384, - * otherwise the request will fail. - */ - tokensToGenerate?: number; -} - -declare interface MinimaxChatInputNormal { - /** - * Dialogue setting, characters, or functionality setting. - */ - prompt?: string; - /** - * Sensitize text information in the output that may involve privacy issues, - * currently including but not limited to emails, domain names, - * links, ID numbers, home addresses, etc. Default false, ie. enable sensitization. - */ - skipInfoMask?: boolean; - - /** - * Whether to use the standard SSE format, when set to true, - * the streaming results will be separated by two line breaks. - * This parameter only takes effect when stream is set to true. - */ - useStandardSse?: boolean; - - /** - * If it is true, this indicates that the current request is set to continuation mode, - * and the response is a continuation of the last sentence in the incoming messages; - * at this time, the last sender is not limited to USER, it can also be BOT. - * Assuming the last sentence of incoming messages is {"sender_type": " U S E R", "text": "天生我材"}, - * the completion of the reply may be "It must be useful." - */ - continueLastMessage?: boolean; - - /** - * How many results to generate; the default is 1 and the maximum is not more than 4. - * Because beamWidth generates multiple results, it will consume more tokens. - */ - beamWidth?: number; - - /** - * Dialogue Metadata - */ - roleMeta?: RoleMeta; -} - -declare interface MinimaxChatInputPro extends MinimaxChatInputBase { - /** - * For the text information in the output that may involve privacy issues, - * code masking is currently included but not limited to emails, domains, links, ID numbers, home addresses, etc., - * with the default being true, that is, code masking is enabled. - */ - maskSensitiveInfo?: boolean; - - /** - * Default bot name - */ - defaultBotName?: string; - - /** - * Default user name - */ - defaultUserName?: string; - - /** - * Setting for each robot, only available for pro version. - */ - botSetting?: BotSetting[]; - - replyConstraints?: ReplyConstraints; -} - -type MinimaxChatInput = MinimaxChatInputNormal & MinimaxChatInputPro; - -/** - * Function that extracts the custom sender_type of a generic chat message. - * @param message Chat message from which to extract the custom sender_type. - * @returns The custom sender_type of the chat message. - */ -function extractGenericMessageCustomRole(message: ChatMessage) { - if (message.role !== "ai" && message.role !== "user") { - console.warn(`Unknown message role: ${message.role}`); - } - if (message.role === "ai") { - return "BOT" as MinimaxMessageRole; - } - if (message.role === "user") { - return "USER" as MinimaxMessageRole; - } - return message.role as MinimaxMessageRole; -} - -/** - * Function that converts a base message to a Minimax message sender_type. - * @param message Base message to convert. - * @returns The Minimax message sender_type. - */ -function messageToMinimaxRole(message: BaseMessage): MinimaxMessageRole { - const type = message._getType(); - switch (type) { - case "ai": - return "BOT"; - case "human": - return "USER"; - case "system": - throw new Error("System messages not supported"); - case "function": - return "FUNCTION"; - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -export interface ChatMinimaxCallOptions extends BaseFunctionCallOptions { - tools?: StructuredTool[]; - defaultUserName?: string; - defaultBotName?: string; - plugins?: string[]; - botSetting?: BotSetting[]; - replyConstraints?: ReplyConstraints; - sampleMessages?: BaseMessage[]; -} - -/** - * Wrapper around Minimax large language models that use the Chat endpoint. - * - * To use you should have the `MINIMAX_GROUP_ID` and `MINIMAX_API_KEY` - * environment variable set. - * @example - * ```typescript - * // Define a chat prompt with a system message setting the context for translation - * const chatPrompt = ChatPromptTemplate.fromMessages([ - * SystemMessagePromptTemplate.fromTemplate( - * "You are a helpful assistant that translates {input_language} to {output_language}.", - * ), - * HumanMessagePromptTemplate.fromTemplate("{text}"), - * ]); - * - * // Create a new LLMChain with the chat model and the defined prompt - * const chainB = new LLMChain({ - * prompt: chatPrompt, - * llm: new ChatMinimax({ temperature: 0.01 }), - * }); - * - * // Call the chain with the input language, output language, and the text to translate - * const resB = await chainB.call({ - * input_language: "English", - * output_language: "Chinese", - * text: "I love programming.", - * }); - * - * // Log the result - * console.log({ resB }); - * - * ``` - */ -export class ChatMinimax - extends BaseChatModel - implements MinimaxChatInput -{ - static lc_name() { - return "ChatMinimax"; - } - - get callKeys(): (keyof ChatMinimaxCallOptions)[] { - return [ - ...(super.callKeys as (keyof ChatMinimaxCallOptions)[]), - "functions", - "tools", - "defaultBotName", - "defaultUserName", - "plugins", - "replyConstraints", - "botSetting", - "sampleMessages", - ]; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - minimaxApiKey: "MINIMAX_API_KEY", - minimaxGroupId: "MINIMAX_GROUP_ID", - }; - } - - lc_serializable = true; - - minimaxGroupId?: string; - - minimaxApiKey?: string; - - streaming = false; - - prompt?: string; - - modelName = "abab5.5-chat"; - - defaultBotName?: string = "Assistant"; - - defaultUserName?: string = "I"; - - prefixMessages?: MinimaxChatCompletionRequestMessage[]; - - apiUrl: string; - - basePath?: string = "https://api.minimax.chat/v1"; - - headers?: Record; - - temperature?: number = 0.9; - - topP?: number = 0.8; - - tokensToGenerate?: number; - - skipInfoMask?: boolean; - - proVersion?: boolean = true; - - beamWidth?: number; - - botSetting?: BotSetting[]; - - continueLastMessage?: boolean; - - maskSensitiveInfo?: boolean; - - roleMeta?: RoleMeta; - - useStandardSse?: boolean; - - replyConstraints?: ReplyConstraints; - - constructor( - fields?: Partial & - BaseChatModelParams & { - configuration?: ConfigurationParameters; - } - ) { - super(fields ?? {}); - - this.minimaxGroupId = - fields?.minimaxGroupId ?? getEnvironmentVariable("MINIMAX_GROUP_ID"); - if (!this.minimaxGroupId) { - throw new Error("Minimax GroupID not found"); - } - - this.minimaxApiKey = - fields?.minimaxApiKey ?? getEnvironmentVariable("MINIMAX_API_KEY"); - - if (!this.minimaxApiKey) { - throw new Error("Minimax ApiKey not found"); - } - - this.streaming = fields?.streaming ?? this.streaming; - this.prompt = fields?.prompt ?? this.prompt; - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.skipInfoMask = fields?.skipInfoMask ?? this.skipInfoMask; - this.prefixMessages = fields?.prefixMessages ?? this.prefixMessages; - this.maskSensitiveInfo = - fields?.maskSensitiveInfo ?? this.maskSensitiveInfo; - this.beamWidth = fields?.beamWidth ?? this.beamWidth; - this.continueLastMessage = - fields?.continueLastMessage ?? this.continueLastMessage; - this.tokensToGenerate = fields?.tokensToGenerate ?? this.tokensToGenerate; - this.roleMeta = fields?.roleMeta ?? this.roleMeta; - this.botSetting = fields?.botSetting ?? this.botSetting; - this.useStandardSse = fields?.useStandardSse ?? this.useStandardSse; - this.replyConstraints = fields?.replyConstraints ?? this.replyConstraints; - this.defaultBotName = fields?.defaultBotName ?? this.defaultBotName; - - this.modelName = fields?.modelName ?? this.modelName; - this.basePath = fields?.configuration?.basePath ?? this.basePath; - this.headers = fields?.configuration?.headers ?? this.headers; - this.proVersion = fields?.proVersion ?? this.proVersion; - - const modelCompletion = this.proVersion - ? "chatcompletion_pro" - : "chatcompletion"; - this.apiUrl = `${this.basePath}/text/${modelCompletion}`; - } - - fallbackBotName(options?: this["ParsedCallOptions"]) { - let botName = options?.defaultBotName ?? this.defaultBotName ?? "Assistant"; - if (this.botSetting) { - botName = this.botSetting[0].bot_name; - } - return botName; - } - - defaultReplyConstraints(options?: this["ParsedCallOptions"]) { - const constraints = options?.replyConstraints ?? this.replyConstraints; - if (!constraints) { - let botName = - options?.defaultBotName ?? this.defaultBotName ?? "Assistant"; - if (this.botSetting) { - botName = this.botSetting[0].bot_name; - } - - return { - sender_type: "BOT", - sender_name: botName, - }; - } - return constraints; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams( - options?: this["ParsedCallOptions"] - ): Omit { - return { - model: this.modelName, - stream: this.streaming, - prompt: this.prompt, - temperature: this.temperature, - top_p: this.topP, - tokens_to_generate: this.tokensToGenerate, - skip_info_mask: this.skipInfoMask, - mask_sensitive_info: this.maskSensitiveInfo, - beam_width: this.beamWidth, - use_standard_sse: this.useStandardSse, - role_meta: this.roleMeta, - bot_setting: options?.botSetting ?? this.botSetting, - reply_constraints: this.defaultReplyConstraints(options), - sample_messages: this.messageToMinimaxMessage( - options?.sampleMessages, - options - ), - functions: - options?.functions ?? - (options?.tools - ? options?.tools.map(formatToOpenAIFunction) - : undefined), - plugins: options?.plugins, - }; - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams() { - return { - ...this.invocationParams(), - }; - } - - /** - * Convert a list of messages to the format expected by the model. - * @param messages - * @param options - */ - messageToMinimaxMessage( - messages?: BaseMessage[], - options?: this["ParsedCallOptions"] - ): MinimaxChatCompletionRequestMessage[] | undefined { - return messages - ?.filter((message) => { - if (ChatMessage.isInstance(message)) { - return message.role !== "system"; - } - return message._getType() !== "system"; - }) - ?.map((message) => { - const sender_type = messageToMinimaxRole(message); - if (typeof message.content !== "string") { - throw new Error( - "ChatMinimax does not support non-string message content." - ); - } - return { - sender_type, - text: message.content, - sender_name: - message.name ?? - (sender_type === "BOT" - ? this.fallbackBotName() - : options?.defaultUserName ?? this.defaultUserName), - }; - }); - } - - /** @ignore */ - async _generate( - messages: BaseMessage[], - options?: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const tokenUsage = { totalTokens: 0 }; - this.botSettingFallback(options, messages); - - const params = this.invocationParams(options); - const messagesMapped: MinimaxChatCompletionRequestMessage[] = [ - ...(this.messageToMinimaxMessage(messages, options) ?? []), - ...(this.prefixMessages ?? []), - ]; - - const data = params.stream - ? await new Promise((resolve, reject) => { - let response: ChatCompletionResponse; - let rejected = false; - let resolved = false; - this.completionWithRetry( - { - ...params, - messages: messagesMapped, - }, - true, - options?.signal, - (event) => { - const data = JSON.parse(event.data); - - if (data?.error_code) { - if (rejected) { - return; - } - rejected = true; - reject(data); - return; - } - - const message = data as ChatCompletionResponse; - // on the first message set the response properties - - if (!message.choices[0].finish_reason) { - // the last stream message - let streamText; - if (this.proVersion) { - const messages = message.choices[0].messages ?? []; - streamText = messages[0].text; - } else { - streamText = message.choices[0].delta; - } - - // TODO this should pass part.index to the callback - // when that's supported there - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(streamText ?? ""); - return; - } - - response = message; - if (!this.proVersion) { - response.choices[0].text = message.reply; - } - - if (resolved || rejected) { - return; - } - resolved = true; - resolve(response); - } - ).catch((error) => { - if (!rejected) { - rejected = true; - reject(error); - } - }); - }) - : await this.completionWithRetry( - { - ...params, - messages: messagesMapped, - }, - false, - options?.signal - ); - - const { total_tokens: totalTokens } = data.usage ?? {}; - - if (totalTokens) { - tokenUsage.totalTokens = totalTokens; - } - - if (data.base_resp?.status_code !== 0) { - throw new Error(`Minimax API error: ${data.base_resp?.status_msg}`); - } - const generations: ChatGeneration[] = []; - - if (this.proVersion) { - for (const choice of data.choices) { - const messages = choice.messages ?? []; - // 取最后一条消息 - if (messages) { - const message = messages[messages.length - 1]; - const text = message?.text ?? ""; - generations.push({ - text, - message: minimaxResponseToChatMessage(message), - }); - } - } - } else { - for (const choice of data.choices) { - const text = choice?.text ?? ""; - generations.push({ - text, - message: minimaxResponseToChatMessage({ - sender_type: "BOT", - sender_name: - options?.defaultBotName ?? this.defaultBotName ?? "Assistant", - text, - }), - }); - } - } - return { - generations, - llmOutput: { tokenUsage }, - }; - } - - /** @ignore */ - async completionWithRetry( - request: MinimaxChatCompletionRequest, - stream: boolean, - signal?: AbortSignal, - onmessage?: (event: MessageEvent) => void - ) { - // The first run will get the accessToken - const makeCompletionRequest = async () => { - const url = `${this.apiUrl}?GroupId=${this.minimaxGroupId}`; - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${this.minimaxApiKey}`, - ...this.headers, - }, - body: JSON.stringify(request), - signal, - }); - - if (!stream) { - const json = await response.json(); - return json as ChatCompletionResponse; - } else { - if (response.body) { - const reader = response.body.getReader(); - - const decoder = new TextDecoder("utf-8"); - let data = ""; - - let continueReading = true; - while (continueReading) { - const { done, value } = await reader.read(); - if (done) { - continueReading = false; - break; - } - data += decoder.decode(value); - - let continueProcessing = true; - while (continueProcessing) { - const newlineIndex = data.indexOf("\n"); - if (newlineIndex === -1) { - continueProcessing = false; - break; - } - const line = data.slice(0, newlineIndex); - data = data.slice(newlineIndex + 1); - - if (line.startsWith("data:")) { - const event = new MessageEvent("message", { - data: line.slice("data:".length).trim(), - }); - onmessage?.(event); - } - } - } - return {} as ChatCompletionResponse; - } - return {} as ChatCompletionResponse; - } - }; - return this.caller.call(makeCompletionRequest); - } - - _llmType() { - return "minimax"; - } - - /** @ignore */ - _combineLLMOutput() { - return []; - } - - private botSettingFallback( - options?: this["ParsedCallOptions"], - messages?: BaseMessage[] - ) { - const botSettings = options?.botSetting ?? this.botSetting; - if (!botSettings) { - const systemMessages = messages?.filter((message) => { - if (ChatMessage.isInstance(message)) { - return message.role === "system"; - } - return message._getType() === "system"; - }); - - // get the last system message - if (!systemMessages?.length) { - return; - } - const lastSystemMessage = systemMessages[systemMessages.length - 1]; - - if (typeof lastSystemMessage.content !== "string") { - throw new Error( - "ChatMinimax does not support non-string message content." - ); - } - - // setting the default botSetting. - this.botSetting = [ - { - content: lastSystemMessage.content, - bot_name: - options?.defaultBotName ?? this.defaultBotName ?? "Assistant", - }, - ]; - } - } -} - -function minimaxResponseToChatMessage( - message: ChatCompletionResponseMessage -): BaseMessage { - switch (message.sender_type) { - case "USER": - return new HumanMessage(message.text || ""); - case "BOT": - return new AIMessage(message.text || "", { - function_call: message.function_call, - }); - case "FUNCTION": - return new AIMessage(message.text || ""); - default: - return new ChatMessage( - message.text || "", - message.sender_type ?? "unknown" - ); - } -} - -/** ---Response Model---* */ -/** - * Interface representing a message responsed in the Minimax chat model. - */ -interface ChatCompletionResponseMessage { - sender_type: MinimaxMessageRole; - sender_name?: string; - text: string; - function_call?: ChatCompletionResponseMessageFunctionCall; -} - -/** - * Interface representing the usage of tokens in a chat completion. - */ -interface TokenUsage { - total_tokens?: number; -} - -interface BaseResp { - status_code?: number; - status_msg?: string; -} - -/** - * The name and arguments of a function that should be called, as generated by the model. - * @export - * @interface ChatCompletionResponseMessageFunctionCall - */ -export interface ChatCompletionResponseMessageFunctionCall { - /** - * The name of the function to call. - * @type {string} - * @memberof ChatCompletionResponseMessageFunctionCall - */ - name?: string; - /** - * The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function. - * @type {string} - * @memberof ChatCompletionResponseMessageFunctionCall - */ - arguments?: string; -} - -/** - * - * @export - * @interface ChatCompletionResponseChoices - */ -export interface ChatCompletionResponseChoicesPro { - /** - * - * @type {string} - * @memberof ChatCompletionResponseChoices - */ - messages?: ChatCompletionResponseMessage[]; - - /** - * - * @type {string} - * @memberof ChatCompletionResponseChoices - */ - finish_reason?: string; -} - -interface ChatCompletionResponseChoices { - delta?: string; - text?: string; - index?: number; - finish_reason?: string; -} - -/** - * Interface representing a response from a chat completion. - */ -interface ChatCompletionResponse { - model: string; - created: number; - reply: string; - input_sensitive?: boolean; - input_sensitive_type?: number; - output_sensitive?: boolean; - output_sensitive_type?: number; - usage?: TokenUsage; - base_resp?: BaseResp; - choices: Array< - ChatCompletionResponseChoicesPro & ChatCompletionResponseChoices - >; -} +export * from "@langchain/community/chat_models/minimax"; \ No newline at end of file diff --git a/langchain/src/chat_models/ollama.ts b/langchain/src/chat_models/ollama.ts index a3fbdb367eb2..5c078d77a912 100644 --- a/langchain/src/chat_models/ollama.ts +++ b/langchain/src/chat_models/ollama.ts @@ -1,298 +1 @@ -import { SimpleChatModel, BaseChatModelParams } from "./base.js"; -import { BaseLanguageModelCallOptions } from "../base_language/index.js"; -import { createOllamaStream, OllamaInput } from "../util/ollama.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { - AIMessageChunk, - BaseMessage, - ChatGenerationChunk, - ChatMessage, -} from "../schema/index.js"; -import type { StringWithAutocomplete } from "../util/types.js"; - -/** - * An interface defining the options for an Ollama API call. It extends - * the BaseLanguageModelCallOptions interface. - */ -export interface OllamaCallOptions extends BaseLanguageModelCallOptions {} - -/** - * A class that enables calls to the Ollama API to access large language - * models in a chat-like fashion. It extends the SimpleChatModel class and - * implements the OllamaInput interface. - * @example - * ```typescript - * const prompt = ChatPromptTemplate.fromMessages([ - * [ - * "system", - * `You are an expert translator. Format all responses as JSON objects with two keys: "original" and "translated".`, - * ], - * ["human", `Translate "{input}" into {language}.`], - * ]); - * - * const model = new ChatOllama({ - * baseUrl: "http://api.example.com", - * model: "llama2", - * format: "json", - * }); - * - * const chain = prompt.pipe(model); - * - * const result = await chain.invoke({ - * input: "I love programming", - * language: "German", - * }); - * - * ``` - */ -export class ChatOllama - extends SimpleChatModel - implements OllamaInput -{ - static lc_name() { - return "ChatOllama"; - } - - lc_serializable = true; - - model = "llama2"; - - baseUrl = "http://localhost:11434"; - - embeddingOnly?: boolean; - - f16KV?: boolean; - - frequencyPenalty?: number; - - logitsAll?: boolean; - - lowVram?: boolean; - - mainGpu?: number; - - mirostat?: number; - - mirostatEta?: number; - - mirostatTau?: number; - - numBatch?: number; - - numCtx?: number; - - numGpu?: number; - - numGqa?: number; - - numKeep?: number; - - numThread?: number; - - penalizeNewline?: boolean; - - presencePenalty?: number; - - repeatLastN?: number; - - repeatPenalty?: number; - - ropeFrequencyBase?: number; - - ropeFrequencyScale?: number; - - temperature?: number; - - stop?: string[]; - - tfsZ?: number; - - topK?: number; - - topP?: number; - - typicalP?: number; - - useMLock?: boolean; - - useMMap?: boolean; - - vocabOnly?: boolean; - - format?: StringWithAutocomplete<"json">; - - constructor(fields: OllamaInput & BaseChatModelParams) { - super(fields); - this.model = fields.model ?? this.model; - this.baseUrl = fields.baseUrl?.endsWith("/") - ? fields.baseUrl.slice(0, -1) - : fields.baseUrl ?? this.baseUrl; - this.embeddingOnly = fields.embeddingOnly; - this.f16KV = fields.f16KV; - this.frequencyPenalty = fields.frequencyPenalty; - this.logitsAll = fields.logitsAll; - this.lowVram = fields.lowVram; - this.mainGpu = fields.mainGpu; - this.mirostat = fields.mirostat; - this.mirostatEta = fields.mirostatEta; - this.mirostatTau = fields.mirostatTau; - this.numBatch = fields.numBatch; - this.numCtx = fields.numCtx; - this.numGpu = fields.numGpu; - this.numGqa = fields.numGqa; - this.numKeep = fields.numKeep; - this.numThread = fields.numThread; - this.penalizeNewline = fields.penalizeNewline; - this.presencePenalty = fields.presencePenalty; - this.repeatLastN = fields.repeatLastN; - this.repeatPenalty = fields.repeatPenalty; - this.ropeFrequencyBase = fields.ropeFrequencyBase; - this.ropeFrequencyScale = fields.ropeFrequencyScale; - this.temperature = fields.temperature; - this.stop = fields.stop; - this.tfsZ = fields.tfsZ; - this.topK = fields.topK; - this.topP = fields.topP; - this.typicalP = fields.typicalP; - this.useMLock = fields.useMLock; - this.useMMap = fields.useMMap; - this.vocabOnly = fields.vocabOnly; - this.format = fields.format; - } - - _llmType() { - return "ollama"; - } - - /** - * A method that returns the parameters for an Ollama API call. It - * includes model and options parameters. - * @param options Optional parsed call options. - * @returns An object containing the parameters for an Ollama API call. - */ - invocationParams(options?: this["ParsedCallOptions"]) { - return { - model: this.model, - format: this.format, - options: { - embedding_only: this.embeddingOnly, - f16_kv: this.f16KV, - frequency_penalty: this.frequencyPenalty, - logits_all: this.logitsAll, - low_vram: this.lowVram, - main_gpu: this.mainGpu, - mirostat: this.mirostat, - mirostat_eta: this.mirostatEta, - mirostat_tau: this.mirostatTau, - num_batch: this.numBatch, - num_ctx: this.numCtx, - num_gpu: this.numGpu, - num_gqa: this.numGqa, - num_keep: this.numKeep, - num_thread: this.numThread, - penalize_newline: this.penalizeNewline, - presence_penalty: this.presencePenalty, - repeat_last_n: this.repeatLastN, - repeat_penalty: this.repeatPenalty, - rope_frequency_base: this.ropeFrequencyBase, - rope_frequency_scale: this.ropeFrequencyScale, - temperature: this.temperature, - stop: options?.stop ?? this.stop, - tfs_z: this.tfsZ, - top_k: this.topK, - top_p: this.topP, - typical_p: this.typicalP, - use_mlock: this.useMLock, - use_mmap: this.useMMap, - vocab_only: this.vocabOnly, - }, - }; - } - - _combineLLMOutput() { - return {}; - } - - async *_streamResponseChunks( - input: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const stream = await this.caller.call(async () => - createOllamaStream( - this.baseUrl, - { - ...this.invocationParams(options), - prompt: this._formatMessagesAsPrompt(input), - }, - options - ) - ); - for await (const chunk of stream) { - if (!chunk.done) { - yield new ChatGenerationChunk({ - text: chunk.response, - message: new AIMessageChunk({ content: chunk.response }), - }); - await runManager?.handleLLMNewToken(chunk.response ?? ""); - } else { - yield new ChatGenerationChunk({ - text: "", - message: new AIMessageChunk({ content: "" }), - generationInfo: { - model: chunk.model, - total_duration: chunk.total_duration, - load_duration: chunk.load_duration, - prompt_eval_count: chunk.prompt_eval_count, - prompt_eval_duration: chunk.prompt_eval_duration, - eval_count: chunk.eval_count, - eval_duration: chunk.eval_duration, - }, - }); - } - } - } - - protected _formatMessagesAsPrompt(messages: BaseMessage[]): string { - const formattedMessages = messages - .map((message) => { - let messageText; - if (message._getType() === "human") { - messageText = `[INST] ${message.content} [/INST]`; - } else if (message._getType() === "ai") { - messageText = message.content; - } else if (message._getType() === "system") { - messageText = `<> ${message.content} <>`; - } else if (ChatMessage.isInstance(message)) { - messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice( - 1 - )}: ${message.content}`; - } else { - console.warn( - `Unsupported message type passed to Ollama: "${message._getType()}"` - ); - messageText = ""; - } - return messageText; - }) - .join("\n"); - return formattedMessages; - } - - /** @ignore */ - async _call( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const chunks = []; - for await (const chunk of this._streamResponseChunks( - messages, - options, - runManager - )) { - chunks.push(chunk.message.content); - } - return chunks.join(""); - } -} +export * from "@langchain/community/chat_models/ollama"; \ No newline at end of file diff --git a/langchain/src/chat_models/portkey.ts b/langchain/src/chat_models/portkey.ts index fd41231b9ac3..cf17722ab306 100644 --- a/langchain/src/chat_models/portkey.ts +++ b/langchain/src/chat_models/portkey.ts @@ -1,182 +1 @@ -import { LLMOptions } from "portkey-ai"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { PortkeySession, getPortkeySession } from "../llms/portkey.js"; -import { - AIMessage, - AIMessageChunk, - BaseMessage, - ChatGeneration, - ChatGenerationChunk, - ChatMessage, - ChatMessageChunk, - ChatResult, - FunctionMessageChunk, - HumanMessage, - HumanMessageChunk, - SystemMessage, - SystemMessageChunk, -} from "../schema/index.js"; -import { BaseChatModel } from "./base.js"; - -interface Message { - role?: string; - content?: string; -} - -function portkeyResponseToChatMessage(message: Message): BaseMessage { - switch (message.role) { - case "user": - return new HumanMessage(message.content || ""); - case "assistant": - return new AIMessage(message.content || ""); - case "system": - return new SystemMessage(message.content || ""); - default: - return new ChatMessage(message.content || "", message.role ?? "unknown"); - } -} - -function _convertDeltaToMessageChunk( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - delta: Record -) { - const { role } = delta; - const content = delta.content ?? ""; - let additional_kwargs; - if (delta.function_call) { - additional_kwargs = { - function_call: delta.function_call, - }; - } else { - additional_kwargs = {}; - } - if (role === "user") { - return new HumanMessageChunk({ content }); - } else if (role === "assistant") { - return new AIMessageChunk({ content, additional_kwargs }); - } else if (role === "system") { - return new SystemMessageChunk({ content }); - } else if (role === "function") { - return new FunctionMessageChunk({ - content, - additional_kwargs, - name: delta.name, - }); - } else { - return new ChatMessageChunk({ content, role }); - } -} - -export class PortkeyChat extends BaseChatModel { - apiKey?: string = undefined; - - baseURL?: string = undefined; - - mode?: string = undefined; - - llms?: [LLMOptions] | null = undefined; - - session: PortkeySession; - - constructor(init?: Partial) { - super(init ?? {}); - this.apiKey = init?.apiKey; - this.baseURL = init?.baseURL; - this.mode = init?.mode; - this.llms = init?.llms; - this.session = getPortkeySession({ - apiKey: this.apiKey, - baseURL: this.baseURL, - llms: this.llms, - mode: this.mode, - }); - } - - _llmType() { - return "portkey"; - } - - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - _?: CallbackManagerForLLMRun - ): Promise { - const messagesList = messages.map((message) => { - if (typeof message.content !== "string") { - throw new Error( - "PortkeyChat does not support non-string message content." - ); - } - return { - role: message._getType() as string, - content: message.content, - }; - }); - const response = await this.session.portkey.chatCompletions.create({ - messages: messagesList, - ...options, - stream: false, - }); - const generations: ChatGeneration[] = []; - for (const data of response.choices ?? []) { - const text = data.message?.content ?? ""; - const generation: ChatGeneration = { - text, - message: portkeyResponseToChatMessage(data.message ?? {}), - }; - if (data.finish_reason) { - generation.generationInfo = { finish_reason: data.finish_reason }; - } - generations.push(generation); - } - - return { - generations, - }; - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const messagesList = messages.map((message) => { - if (typeof message.content !== "string") { - throw new Error( - "PortkeyChat does not support non-string message content." - ); - } - return { - role: message._getType() as string, - content: message.content, - }; - }); - const response = await this.session.portkey.chatCompletions.create({ - messages: messagesList, - ...options, - stream: true, - }); - for await (const data of response) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - const chunk = new ChatGenerationChunk({ - message: _convertDeltaToMessageChunk(choice.delta ?? {}), - text: choice.message?.content ?? "", - generationInfo: { - finishReason: choice.finish_reason, - }, - }); - yield chunk; - void runManager?.handleLLMNewToken(chunk.text ?? ""); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - _combineLLMOutput() { - return {}; - } -} +export * from "@langchain/community/chat_models/portkey"; \ No newline at end of file diff --git a/langchain/src/chat_models/yandex.ts b/langchain/src/chat_models/yandex.ts index 5365183792ca..9a1100723334 100644 --- a/langchain/src/chat_models/yandex.ts +++ b/langchain/src/chat_models/yandex.ts @@ -1,142 +1 @@ -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { YandexGPTInputs } from "../llms/yandex.js"; -import { - AIMessage, - BaseMessage, - ChatResult, - ChatGeneration, -} from "../schema/index.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { BaseChatModel } from "./base.js"; - -const apiUrl = "https://llm.api.cloud.yandex.net/llm/v1alpha/chat"; - -interface ParsedMessage { - role: string; - text: string; -} - -function _parseChatHistory(history: BaseMessage[]): [ParsedMessage[], string] { - const chatHistory: ParsedMessage[] = []; - let instruction = ""; - - for (const message of history) { - if (typeof message.content !== "string") { - throw new Error( - "ChatYandexGPT does not support non-string message content." - ); - } - if ("content" in message) { - if (message._getType() === "human") { - chatHistory.push({ role: "user", text: message.content }); - } else if (message._getType() === "ai") { - chatHistory.push({ role: "assistant", text: message.content }); - } else if (message._getType() === "system") { - instruction = message.content; - } - } - } - - return [chatHistory, instruction]; -} - -/** - * @example - * ```typescript - * const chat = new ChatYandexGPT({}); - * // The assistant is set to translate English to French. - * const res = await chat.call([ - * new SystemMessage( - * "You are a helpful assistant that translates English to French." - * ), - * new HumanMessage("I love programming."), - * ]); - * console.log(res); - * ``` - */ -export class ChatYandexGPT extends BaseChatModel { - apiKey?: string; - - iamToken?: string; - - temperature = 0.6; - - maxTokens = 1700; - - model = "general"; - - constructor(fields?: YandexGPTInputs) { - super(fields ?? {}); - - const apiKey = fields?.apiKey ?? getEnvironmentVariable("YC_API_KEY"); - - const iamToken = fields?.iamToken ?? getEnvironmentVariable("YC_IAM_TOKEN"); - - if (apiKey === undefined && iamToken === undefined) { - throw new Error( - "Please set the YC_API_KEY or YC_IAM_TOKEN environment variable or pass it to the constructor as the apiKey or iamToken field." - ); - } - - this.apiKey = apiKey; - this.iamToken = iamToken; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.temperature = fields?.temperature ?? this.temperature; - this.model = fields?.model ?? this.model; - } - - _llmType() { - return "yandexgpt"; - } - - _combineLLMOutput?() { - return {}; - } - - /** @ignore */ - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - _?: CallbackManagerForLLMRun | undefined - ): Promise { - const [messageHistory, instruction] = _parseChatHistory(messages); - const headers = { "Content-Type": "application/json", Authorization: "" }; - if (this.apiKey !== undefined) { - headers.Authorization = `Api-Key ${this.apiKey}`; - } else { - headers.Authorization = `Bearer ${this.iamToken}`; - } - const bodyData = { - model: this.model, - generationOptions: { - temperature: this.temperature, - maxTokens: this.maxTokens, - }, - messages: messageHistory, - instructionText: instruction, - }; - const response = await fetch(apiUrl, { - method: "POST", - headers, - body: JSON.stringify(bodyData), - signal: options?.signal, - }); - if (!response.ok) { - throw new Error( - `Failed to fetch ${apiUrl} from YandexGPT: ${response.status}` - ); - } - const responseData = await response.json(); - const { result } = responseData; - const { text } = result.message; - const totalTokens = result.num_tokens; - const generations: ChatGeneration[] = [ - { text, message: new AIMessage(text) }, - ]; - - return { - generations, - llmOutput: { totalTokens }, - }; - } -} +export * from "@langchain/community/chat_models/yandex"; \ No newline at end of file diff --git a/langchain/src/embeddings/bedrock.ts b/langchain/src/embeddings/bedrock.ts index f9ca960f3a1a..bbc7a702cb60 100644 --- a/langchain/src/embeddings/bedrock.ts +++ b/langchain/src/embeddings/bedrock.ts @@ -1,142 +1 @@ -import { - BedrockRuntimeClient, - InvokeModelCommand, -} from "@aws-sdk/client-bedrock-runtime"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import type { CredentialType } from "../util/bedrock.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the BedrockEmbeddings class. - */ -export interface BedrockEmbeddingsParams extends EmbeddingsParams { - /** - * Model Name to use. Defaults to `amazon.titan-embed-text-v1` if not provided - * - */ - model?: string; - - /** - * A client provided by the user that allows them to customze any - * SDK configuration options. - */ - client?: BedrockRuntimeClient; - - region?: string; - - credentials?: CredentialType; -} - -/** - * Class that extends the Embeddings class and provides methods for - * generating embeddings using the Bedrock API. - * @example - * ```typescript - * const embeddings = new BedrockEmbeddings({ - * region: "your-aws-region", - * credentials: { - * accessKeyId: "your-access-key-id", - * secretAccessKey: "your-secret-access-key", - * }, - * model: "amazon.titan-embed-text-v1", - * }); - * - * // Embed a query and log the result - * const res = await embeddings.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * ``` - */ -export class BedrockEmbeddings - extends Embeddings - implements BedrockEmbeddingsParams -{ - model: string; - - client: BedrockRuntimeClient; - - batchSize = 512; - - constructor(fields?: BedrockEmbeddingsParams) { - super(fields ?? {}); - - this.model = fields?.model ?? "amazon.titan-embed-text-v1"; - - this.client = - fields?.client ?? - new BedrockRuntimeClient({ - region: fields?.region, - credentials: fields?.credentials, - }); - } - - /** - * Protected method to make a request to the Bedrock API to generate - * embeddings. Handles the retry logic and returns the response from the - * API. - * @param request Request to send to the Bedrock API. - * @returns Promise that resolves to the response from the API. - */ - protected async _embedText(text: string): Promise { - return this.caller.call(async () => { - try { - // replace newlines, which can negatively affect performance. - const cleanedText = text.replace(/\n/g, " "); - - const res = await this.client.send( - new InvokeModelCommand({ - modelId: this.model, - body: JSON.stringify({ - inputText: cleanedText, - }), - contentType: "application/json", - accept: "application/json", - }) - ); - - const body = new TextDecoder().decode(res.body); - return JSON.parse(body).embedding; - } catch (e) { - console.error({ - error: e, - }); - // eslint-disable-next-line no-instanceof/no-instanceof - if (e instanceof Error) { - throw new Error( - `An error occurred while embedding documents with Bedrock: ${e.message}` - ); - } - - throw new Error( - "An error occurred while embedding documents with Bedrock" - ); - } - }); - } - - /** - * Method that takes a document as input and returns a promise that - * resolves to an embedding for the document. It calls the _embedText - * method with the document as the input. - * @param document Document for which to generate an embedding. - * @returns Promise that resolves to an embedding for the input document. - */ - embedQuery(document: string): Promise { - return this.caller.callWithOptions( - {}, - this._embedText.bind(this), - document - ); - } - - /** - * Method to generate embeddings for an array of texts. Calls _embedText - * method which batches and handles retry logic when calling the AWS Bedrock API. - * @param documents Array of texts for which to generate embeddings. - * @returns Promise that resolves to a 2D array of embeddings for each input document. - */ - async embedDocuments(documents: string[]): Promise { - return Promise.all(documents.map((document) => this._embedText(document))); - } -} +export * from "@langchain/community/embeddings/bedrock"; \ No newline at end of file diff --git a/langchain/src/embeddings/cloudflare_workersai.ts b/langchain/src/embeddings/cloudflare_workersai.ts index 191213dfbf5f..a5a6050cd44b 100644 --- a/langchain/src/embeddings/cloudflare_workersai.ts +++ b/langchain/src/embeddings/cloudflare_workersai.ts @@ -1,94 +1 @@ -import { Ai } from "@cloudflare/ai"; -import { Fetcher } from "@cloudflare/workers-types"; -import { chunkArray } from "../util/chunk.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -type AiTextEmbeddingsInput = { - text: string | string[]; -}; - -type AiTextEmbeddingsOutput = { - shape: number[]; - data: number[][]; -}; - -export interface CloudflareWorkersAIEmbeddingsParams extends EmbeddingsParams { - /** Binding */ - binding: Fetcher; - - /** Model name to use */ - modelName?: string; - - /** - * The maximum number of documents to embed in a single request. - */ - batchSize?: number; - - /** - * Whether to strip new lines from the input text. This is recommended by - * OpenAI, but may not be suitable for all use cases. - */ - stripNewLines?: boolean; -} - -export class CloudflareWorkersAIEmbeddings extends Embeddings { - modelName = "@cf/baai/bge-base-en-v1.5"; - - batchSize = 50; - - stripNewLines = true; - - ai: Ai; - - constructor(fields: CloudflareWorkersAIEmbeddingsParams) { - super(fields); - - if (!fields.binding) { - throw new Error( - "Must supply a Workers AI binding, eg { binding: env.AI }" - ); - } - this.ai = new Ai(fields.binding); - this.modelName = fields.modelName ?? this.modelName; - this.stripNewLines = fields.stripNewLines ?? this.stripNewLines; - } - - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray( - this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, - this.batchSize - ); - - const batchRequests = batches.map((batch) => this.runEmbedding(batch)); - const batchResponses = await Promise.all(batchRequests); - const embeddings: number[][] = []; - - for (let i = 0; i < batchResponses.length; i += 1) { - const batchResponse = batchResponses[i]; - for (let j = 0; j < batchResponse.length; j += 1) { - embeddings.push(batchResponse[j]); - } - } - - return embeddings; - } - - async embedQuery(text: string): Promise { - const data = await this.runEmbedding([ - this.stripNewLines ? text.replace(/\n/g, " ") : text, - ]); - return data[0]; - } - - private async runEmbedding(texts: string[]) { - return this.caller.call(async () => { - const response: AiTextEmbeddingsOutput = await this.ai.run( - this.modelName, - { - text: texts, - } as AiTextEmbeddingsInput - ); - return response.data; - }); - } -} +export * from "@langchain/community/embeddings/cloudflare_workersai"; \ No newline at end of file diff --git a/langchain/src/embeddings/cohere.ts b/langchain/src/embeddings/cohere.ts index 4d510c205459..e65f3be38b9f 100644 --- a/langchain/src/embeddings/cohere.ts +++ b/langchain/src/embeddings/cohere.ts @@ -1,155 +1 @@ -import { chunkArray } from "../util/chunk.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the CohereEmbeddings class. - */ -export interface CohereEmbeddingsParams extends EmbeddingsParams { - modelName: string; - - /** - * The maximum number of documents to embed in a single request. This is - * limited by the Cohere API to a maximum of 96. - */ - batchSize?: number; -} - -/** - * A class for generating embeddings using the Cohere API. - * @example - * ```typescript - * // Embed a query using the CohereEmbeddings class - * const model = new ChatOpenAI(); - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?", - * ); - * console.log({ res }); - * - * ``` - */ -export class CohereEmbeddings - extends Embeddings - implements CohereEmbeddingsParams -{ - modelName = "small"; - - batchSize = 48; - - private apiKey: string; - - private client: typeof import("cohere-ai"); - - /** - * Constructor for the CohereEmbeddings class. - * @param fields - An optional object with properties to configure the instance. - */ - constructor( - fields?: Partial & { - verbose?: boolean; - apiKey?: string; - } - ) { - const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; - - super(fieldsWithDefaults); - - const apiKey = - fieldsWithDefaults?.apiKey || getEnvironmentVariable("COHERE_API_KEY"); - - if (!apiKey) { - throw new Error("Cohere API key not found"); - } - - this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; - this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; - this.apiKey = apiKey; - } - - /** - * Generates embeddings for an array of texts. - * @param texts - An array of strings to generate embeddings for. - * @returns A Promise that resolves to an array of embeddings. - */ - async embedDocuments(texts: string[]): Promise { - await this.maybeInitClient(); - - const batches = chunkArray(texts, this.batchSize); - - const batchRequests = batches.map((batch) => - this.embeddingWithRetry({ - model: this.modelName, - texts: batch, - }) - ); - - const batchResponses = await Promise.all(batchRequests); - - const embeddings: number[][] = []; - - for (let i = 0; i < batchResponses.length; i += 1) { - const batch = batches[i]; - const { body: batchResponse } = batchResponses[i]; - for (let j = 0; j < batch.length; j += 1) { - embeddings.push(batchResponse.embeddings[j]); - } - } - - return embeddings; - } - - /** - * Generates an embedding for a single text. - * @param text - A string to generate an embedding for. - * @returns A Promise that resolves to an array of numbers representing the embedding. - */ - async embedQuery(text: string): Promise { - await this.maybeInitClient(); - - const { body } = await this.embeddingWithRetry({ - model: this.modelName, - texts: [text], - }); - return body.embeddings[0]; - } - - /** - * Generates embeddings with retry capabilities. - * @param request - An object containing the request parameters for generating embeddings. - * @returns A Promise that resolves to the API response. - */ - private async embeddingWithRetry( - request: Parameters[0] - ) { - await this.maybeInitClient(); - - return this.caller.call(this.client.embed.bind(this.client), request); - } - - /** - * Initializes the Cohere client if it hasn't been initialized already. - */ - private async maybeInitClient() { - if (!this.client) { - const { cohere } = await CohereEmbeddings.imports(); - - this.client = cohere; - this.client.init(this.apiKey); - } - } - - /** @ignore */ - static async imports(): Promise<{ - cohere: typeof import("cohere-ai"); - }> { - try { - const { default: cohere } = await import("cohere-ai"); - return { cohere }; - } catch (e) { - throw new Error( - "Please install cohere-ai as a dependency with, e.g. `yarn add cohere-ai`" - ); - } - } -} +export * from "@langchain/community/embeddings/cohere"; \ No newline at end of file diff --git a/langchain/src/embeddings/googlepalm.ts b/langchain/src/embeddings/googlepalm.ts index 2d969dc98106..07625a3217c9 100644 --- a/langchain/src/embeddings/googlepalm.ts +++ b/langchain/src/embeddings/googlepalm.ts @@ -1,107 +1 @@ -import { TextServiceClient } from "@google-ai/generativelanguage"; -import { GoogleAuth } from "google-auth-library"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the GooglePaLMEmbeddings class. - */ -export interface GooglePaLMEmbeddingsParams extends EmbeddingsParams { - /** - * Model Name to use - * - * Note: The format must follow the pattern - `models/{model}` - */ - modelName?: string; - /** - * Google Palm API key to use - */ - apiKey?: string; -} - -/** - * Class that extends the Embeddings class and provides methods for - * generating embeddings using the Google Palm API. - * @example - * ```typescript - * const model = new GooglePaLMEmbeddings({ - * apiKey: "", - * modelName: "models/embedding-gecko-001", - * }); - * - * // Embed a single query - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * - * // Embed multiple documents - * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); - * console.log({ documentRes }); - * ``` - */ -export class GooglePaLMEmbeddings - extends Embeddings - implements GooglePaLMEmbeddingsParams -{ - apiKey?: string; - - modelName = "models/embedding-gecko-001"; - - private client: TextServiceClient; - - constructor(fields?: GooglePaLMEmbeddingsParams) { - super(fields ?? {}); - - this.modelName = fields?.modelName ?? this.modelName; - - this.apiKey = - fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); - if (!this.apiKey) { - throw new Error( - "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" - ); - } - - this.client = new TextServiceClient({ - authClient: new GoogleAuth().fromAPIKey(this.apiKey), - }); - } - - protected async _embedText(text: string): Promise { - // replace newlines, which can negatively affect performance. - const cleanedText = text.replace(/\n/g, " "); - const res = await this.client.embedText({ - model: this.modelName, - text: cleanedText, - }); - return res[0].embedding?.value ?? []; - } - - /** - * Method that takes a document as input and returns a promise that - * resolves to an embedding for the document. It calls the _embedText - * method with the document as the input. - * @param document Document for which to generate an embedding. - * @returns Promise that resolves to an embedding for the input document. - */ - embedQuery(document: string): Promise { - return this.caller.callWithOptions( - {}, - this._embedText.bind(this), - document - ); - } - - /** - * Method that takes an array of documents as input and returns a promise - * that resolves to a 2D array of embeddings for each document. It calls - * the _embedText method for each document in the array. - * @param documents Array of documents for which to generate embeddings. - * @returns Promise that resolves to a 2D array of embeddings for each input document. - */ - embedDocuments(documents: string[]): Promise { - return Promise.all(documents.map((document) => this._embedText(document))); - } -} +export * from "@langchain/community/embeddings/googlepalm"; \ No newline at end of file diff --git a/langchain/src/embeddings/googlevertexai.ts b/langchain/src/embeddings/googlevertexai.ts index 121d8c6efd9b..2e5377894e49 100644 --- a/langchain/src/embeddings/googlevertexai.ts +++ b/langchain/src/embeddings/googlevertexai.ts @@ -1,145 +1 @@ -import { GoogleAuth, GoogleAuthOptions } from "google-auth-library"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import { - GoogleVertexAIBasePrediction, - GoogleVertexAIBaseLLMInput, - GoogleVertexAILLMPredictions, -} from "../types/googlevertexai-types.js"; -import { GoogleVertexAILLMConnection } from "../util/googlevertexai-connection.js"; -import { AsyncCallerCallOptions } from "../util/async_caller.js"; -import { chunkArray } from "../util/chunk.js"; - -/** - * Defines the parameters required to initialize a - * GoogleVertexAIEmbeddings instance. It extends EmbeddingsParams and - * GoogleVertexAIConnectionParams. - */ -export interface GoogleVertexAIEmbeddingsParams - extends EmbeddingsParams, - GoogleVertexAIBaseLLMInput {} - -/** - * Defines additional options specific to the - * GoogleVertexAILLMEmbeddingsInstance. It extends AsyncCallerCallOptions. - */ -interface GoogleVertexAILLMEmbeddingsOptions extends AsyncCallerCallOptions {} - -/** - * Represents an instance for generating embeddings using the Google - * Vertex AI API. It contains the content to be embedded. - */ -interface GoogleVertexAILLMEmbeddingsInstance { - content: string; -} - -/** - * Defines the structure of the embeddings results returned by the Google - * Vertex AI API. It extends GoogleVertexAIBasePrediction and contains the - * embeddings and their statistics. - */ -interface GoogleVertexEmbeddingsResults extends GoogleVertexAIBasePrediction { - embeddings: { - statistics: { - token_count: number; - truncated: boolean; - }; - values: number[]; - }; -} - -/** - * Enables calls to the Google Cloud's Vertex AI API to access - * the embeddings generated by Large Language Models. - * - * To use, you will need to have one of the following authentication - * methods in place: - * - You are logged into an account permitted to the Google Cloud project - * using Vertex AI. - * - You are running this on a machine using a service account permitted to - * the Google Cloud project using Vertex AI. - * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the - * path of a credentials file for a service account permitted to the - * Google Cloud project using Vertex AI. - * @example - * ```typescript - * const model = new GoogleVertexAIEmbeddings(); - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * ``` - */ -export class GoogleVertexAIEmbeddings - extends Embeddings - implements GoogleVertexAIEmbeddingsParams -{ - model = "textembedding-gecko"; - - private connection: GoogleVertexAILLMConnection< - GoogleVertexAILLMEmbeddingsOptions, - GoogleVertexAILLMEmbeddingsInstance, - GoogleVertexEmbeddingsResults, - GoogleAuthOptions - >; - - constructor(fields?: GoogleVertexAIEmbeddingsParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - - this.connection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - new GoogleAuth({ - scopes: "https://www.googleapis.com/auth/cloud-platform", - ...fields?.authOptions, - }) - ); - } - - /** - * Takes an array of documents as input and returns a promise that - * resolves to a 2D array of embeddings for each document. It splits the - * documents into chunks and makes requests to the Google Vertex AI API to - * generate embeddings. - * @param documents An array of documents to be embedded. - * @returns A promise that resolves to a 2D array of embeddings for each document. - */ - async embedDocuments(documents: string[]): Promise { - const instanceChunks: GoogleVertexAILLMEmbeddingsInstance[][] = chunkArray( - documents.map((document) => ({ - content: document, - })), - 5 - ); // Vertex AI accepts max 5 instances per prediction - const parameters = {}; - const options = {}; - const responses = await Promise.all( - instanceChunks.map((instances) => - this.connection.request(instances, parameters, options) - ) - ); - const result: number[][] = - responses - ?.map( - (response) => - ( - response?.data as GoogleVertexAILLMPredictions - )?.predictions?.map((result) => result.embeddings.values) ?? [] - ) - .flat() ?? []; - return result; - } - - /** - * Takes a document as input and returns a promise that resolves to an - * embedding for the document. It calls the embedDocuments method with the - * document as the input. - * @param document A document to be embedded. - * @returns A promise that resolves to an embedding for the document. - */ - async embedQuery(document: string): Promise { - const data = await this.embedDocuments([document]); - return data[0]; - } -} +export * from "@langchain/community/embeddings/googlevertexai"; \ No newline at end of file diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index e03b7a364c0f..d5a860615d3c 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -1,118 +1 @@ -import { Gradient } from "@gradientai/nodejs-sdk"; -import { getEnvironmentVariable } from "../util/env.js"; -import { chunkArray } from "../util/chunk.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -/** - * Interface for GradientEmbeddings parameters. Extends EmbeddingsParams and - * defines additional parameters specific to the GradientEmbeddings class. - */ -export interface GradientEmbeddingsParams extends EmbeddingsParams { - /** - * Gradient AI Access Token. - * Provide Access Token if you do not wish to automatically pull from env. - */ - gradientAccessKey?: string; - /** - * Gradient Workspace Id. - * Provide workspace id if you do not wish to automatically pull from env. - */ - workspaceId?: string; -} - -/** - * Class for generating embeddings using the Gradient AI's API. Extends the - * Embeddings class and implements GradientEmbeddingsParams and - */ -export class GradientEmbeddings - extends Embeddings - implements GradientEmbeddingsParams -{ - gradientAccessKey?: string; - - workspaceId?: string; - - batchSize = 128; - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - model: any; - - constructor(fields: GradientEmbeddingsParams) { - super(fields); - - this.gradientAccessKey = - fields?.gradientAccessKey ?? - getEnvironmentVariable("GRADIENT_ACCESS_TOKEN"); - this.workspaceId = - fields?.workspaceId ?? getEnvironmentVariable("GRADIENT_WORKSPACE_ID"); - - if (!this.gradientAccessKey) { - throw new Error("Missing Gradient AI Access Token"); - } - - if (!this.workspaceId) { - throw new Error("Missing Gradient AI Workspace ID"); - } - } - - /** - * Method to generate embeddings for an array of documents. Splits the - * documents into batches and makes requests to the Gradient API to generate - * embeddings. - * @param texts Array of documents to generate embeddings for. - * @returns Promise that resolves to a 2D array of embeddings for each document. - */ - async embedDocuments(texts: string[]): Promise { - await this.setModel(); - - const mappedTexts = texts.map((text) => ({ input: text })); - - const batches = chunkArray(mappedTexts, this.batchSize); - - const batchRequests = batches.map((batch) => - this.caller.call(async () => - this.model.generateEmbeddings({ - inputs: batch, - }) - ) - ); - const batchResponses = await Promise.all(batchRequests); - - const embeddings: number[][] = []; - for (let i = 0; i < batchResponses.length; i += 1) { - const batch = batches[i]; - const { embeddings: batchResponse } = batchResponses[i]; - for (let j = 0; j < batch.length; j += 1) { - embeddings.push(batchResponse[j].embedding); - } - } - return embeddings; - } - - /** - * Method to generate an embedding for a single document. Calls the - * embedDocuments method with the document as the input. - * @param text Document to generate an embedding for. - * @returns Promise that resolves to an embedding for the document. - */ - async embedQuery(text: string): Promise { - const data = await this.embedDocuments([text]); - return data[0]; - } - - /** - * Method to set the model to use for generating embeddings. - * @sets the class' `model` value to that of the retrieved Embeddings Model. - */ - async setModel() { - if (this.model) return; - - const gradient = new Gradient({ - accessToken: this.gradientAccessKey, - workspaceId: this.workspaceId, - }); - this.model = await gradient.getEmbeddingsModel({ - slug: "bge-large", - }); - } -} +export * from "@langchain/community/embeddings/gradient_ai"; \ No newline at end of file diff --git a/langchain/src/embeddings/hf.ts b/langchain/src/embeddings/hf.ts index 83b801a90566..27686094c28a 100644 --- a/langchain/src/embeddings/hf.ts +++ b/langchain/src/embeddings/hf.ts @@ -1,77 +1 @@ -import { HfInference, HfInferenceEndpoint } from "@huggingface/inference"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the HuggingFaceInferenceEmbeddings class. - */ -export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams { - apiKey?: string; - model?: string; - endpointUrl?: string; -} - -/** - * Class that extends the Embeddings class and provides methods for - * generating embeddings using Hugging Face models through the - * HuggingFaceInference API. - */ -export class HuggingFaceInferenceEmbeddings - extends Embeddings - implements HuggingFaceInferenceEmbeddingsParams -{ - apiKey?: string; - - model: string; - - endpointUrl?: string; - - client: HfInference | HfInferenceEndpoint; - - constructor(fields?: HuggingFaceInferenceEmbeddingsParams) { - super(fields ?? {}); - - this.model = fields?.model ?? "BAAI/bge-base-en-v1.5"; - this.apiKey = - fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY"); - this.endpointUrl = fields?.endpointUrl; - this.client = this.endpointUrl - ? new HfInference(this.apiKey).endpoint(this.endpointUrl) - : new HfInference(this.apiKey); - } - - async _embed(texts: string[]): Promise { - // replace newlines, which can negatively affect performance. - const clean = texts.map((text) => text.replace(/\n/g, " ")); - return this.caller.call(() => - this.client.featureExtraction({ - model: this.model, - inputs: clean, - }) - ) as Promise; - } - - /** - * Method that takes a document as input and returns a promise that - * resolves to an embedding for the document. It calls the _embed method - * with the document as the input and returns the first embedding in the - * resulting array. - * @param document Document to generate an embedding for. - * @returns Promise that resolves to an embedding for the document. - */ - embedQuery(document: string): Promise { - return this._embed([document]).then((embeddings) => embeddings[0]); - } - - /** - * Method that takes an array of documents as input and returns a promise - * that resolves to a 2D array of embeddings for each document. It calls - * the _embed method with the documents as the input. - * @param documents Array of documents to generate embeddings for. - * @returns Promise that resolves to a 2D array of embeddings for each document. - */ - embedDocuments(documents: string[]): Promise { - return this._embed(documents); - } -} +export * from "@langchain/community/embeddings/hf"; \ No newline at end of file diff --git a/langchain/src/embeddings/hf_transformers.ts b/langchain/src/embeddings/hf_transformers.ts index e2b8bdcc98a5..eb38d9a597ec 100644 --- a/langchain/src/embeddings/hf_transformers.ts +++ b/langchain/src/embeddings/hf_transformers.ts @@ -1,105 +1 @@ -import { Pipeline, pipeline } from "@xenova/transformers"; -import { chunkArray } from "../util/chunk.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -export interface HuggingFaceTransformersEmbeddingsParams - extends EmbeddingsParams { - /** Model name to use */ - modelName: string; - - /** - * Timeout to use when making requests to OpenAI. - */ - timeout?: number; - - /** - * The maximum number of documents to embed in a single request. - */ - batchSize?: number; - - /** - * Whether to strip new lines from the input text. This is recommended by - * OpenAI, but may not be suitable for all use cases. - */ - stripNewLines?: boolean; -} - -/** - * @example - * ```typescript - * const model = new HuggingFaceTransformersEmbeddings({ - * modelName: "Xenova/all-MiniLM-L6-v2", - * }); - * - * // Embed a single query - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * - * // Embed multiple documents - * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); - * console.log({ documentRes }); - * ``` - */ -export class HuggingFaceTransformersEmbeddings - extends Embeddings - implements HuggingFaceTransformersEmbeddingsParams -{ - modelName = "Xenova/all-MiniLM-L6-v2"; - - batchSize = 512; - - stripNewLines = true; - - timeout?: number; - - private pipelinePromise: Promise; - - constructor(fields?: Partial) { - super(fields ?? {}); - - this.modelName = fields?.modelName ?? this.modelName; - this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; - this.timeout = fields?.timeout; - } - - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray( - this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, - this.batchSize - ); - - const batchRequests = batches.map((batch) => this.runEmbedding(batch)); - const batchResponses = await Promise.all(batchRequests); - const embeddings: number[][] = []; - - for (let i = 0; i < batchResponses.length; i += 1) { - const batchResponse = batchResponses[i]; - for (let j = 0; j < batchResponse.length; j += 1) { - embeddings.push(batchResponse[j]); - } - } - - return embeddings; - } - - async embedQuery(text: string): Promise { - const data = await this.runEmbedding([ - this.stripNewLines ? text.replace(/\n/g, " ") : text, - ]); - return data[0]; - } - - private async runEmbedding(texts: string[]) { - const pipe = await (this.pipelinePromise ??= pipeline( - "feature-extraction", - this.modelName - )); - - return this.caller.call(async () => { - const output = await pipe(texts, { pooling: "mean", normalize: true }); - return output.tolist(); - }); - } -} +export * from "@langchain/community/embeddings/hf_transformers"; \ No newline at end of file diff --git a/langchain/src/embeddings/llama_cpp.ts b/langchain/src/embeddings/llama_cpp.ts index 51b9b3c3e007..f8d30024a0d9 100644 --- a/langchain/src/embeddings/llama_cpp.ts +++ b/langchain/src/embeddings/llama_cpp.ts @@ -1,103 +1 @@ -import { LlamaModel, LlamaContext } from "node-llama-cpp"; -import { - LlamaBaseCppInputs, - createLlamaModel, - createLlamaContext, -} from "../util/llama_cpp.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -/** - * Note that the modelPath is the only required parameter. For testing you - * can set this in the environment variable `LLAMA_PATH`. - */ -export interface LlamaCppEmbeddingsParams - extends LlamaBaseCppInputs, - EmbeddingsParams {} - -/** - * @example - * ```typescript - * // Initialize LlamaCppEmbeddings with the path to the model file - * const embeddings = new LlamaCppEmbeddings({ - * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin", - * }); - * - * // Embed a query string using the Llama embeddings - * const res = embeddings.embedQuery("Hello Llama!"); - * - * // Output the resulting embeddings - * console.log(res); - * - * ``` - */ -export class LlamaCppEmbeddings extends Embeddings { - _model: LlamaModel; - - _context: LlamaContext; - - constructor(inputs: LlamaCppEmbeddingsParams) { - super(inputs); - const _inputs = inputs; - _inputs.embedding = true; - - this._model = createLlamaModel(_inputs); - this._context = createLlamaContext(this._model, _inputs); - } - - /** - * Generates embeddings for an array of texts. - * @param texts - An array of strings to generate embeddings for. - * @returns A Promise that resolves to an array of embeddings. - */ - async embedDocuments(texts: string[]): Promise { - const tokensArray = []; - - for (const text of texts) { - const encodings = await this.caller.call( - () => - new Promise((resolve) => { - resolve(this._context.encode(text)); - }) - ); - tokensArray.push(encodings); - } - - const embeddings: number[][] = []; - - for (const tokens of tokensArray) { - const embedArray: number[] = []; - - for (let i = 0; i < tokens.length; i += 1) { - const nToken: number = +tokens[i]; - embedArray.push(nToken); - } - - embeddings.push(embedArray); - } - - return embeddings; - } - - /** - * Generates an embedding for a single text. - * @param text - A string to generate an embedding for. - * @returns A Promise that resolves to an array of numbers representing the embedding. - */ - async embedQuery(text: string): Promise { - const tokens: number[] = []; - - const encodings = await this.caller.call( - () => - new Promise((resolve) => { - resolve(this._context.encode(text)); - }) - ); - - for (let i = 0; i < encodings.length; i += 1) { - const token: number = +encodings[i]; - tokens.push(token); - } - - return tokens; - } -} +export * from "@langchain/community/embeddings/llama_cpp"; \ No newline at end of file diff --git a/langchain/src/embeddings/minimax.ts b/langchain/src/embeddings/minimax.ts index 80697e30f60a..c2576a58b278 100644 --- a/langchain/src/embeddings/minimax.ts +++ b/langchain/src/embeddings/minimax.ts @@ -1,222 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { chunkArray } from "../util/chunk.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import { ConfigurationParameters } from "../chat_models/minimax.js"; - -/** - * Interface for MinimaxEmbeddings parameters. Extends EmbeddingsParams and - * defines additional parameters specific to the MinimaxEmbeddings class. - */ -export interface MinimaxEmbeddingsParams extends EmbeddingsParams { - /** Model name to use */ - modelName: string; - - /** - * API key to use when making requests. Defaults to the value of - * `MINIMAX_GROUP_ID` environment variable. - */ - minimaxGroupId?: string; - - /** - * Secret key to use when making requests. Defaults to the value of - * `MINIMAX_API_KEY` environment variable. - */ - minimaxApiKey?: string; - - /** - * The maximum number of documents to embed in a single request. This is - * limited by the Minimax API to a maximum of 4096. - */ - batchSize?: number; - - /** - * Whether to strip new lines from the input text. This is recommended by - * Minimax, but may not be suitable for all use cases. - */ - stripNewLines?: boolean; - - /** - * The target use-case after generating the vector. - * When using embeddings, the vector of the target content is first generated through the db and stored in the vector database, - * and then the vector of the retrieval text is generated through the query. - * Note: For the parameters of the partial algorithm, we adopted a separate algorithm plan for query and db. - * Therefore, for a paragraph of text, if it is to be used as a retrieval text, it should use the db, - * and if it is used as a retrieval text, it should use the query. - */ - type?: "db" | "query"; -} - -export interface CreateMinimaxEmbeddingRequest { - /** - * @type {string} - * @memberof CreateMinimaxEmbeddingRequest - */ - model: string; - - /** - * Text to generate vector expectation - * @type {CreateEmbeddingRequestInput} - * @memberof CreateMinimaxEmbeddingRequest - */ - texts: string[]; - - /** - * The target use-case after generating the vector. When using embeddings, - * first generate the vector of the target content through the db and store it in the vector database, - * and then generate the vector of the retrieval text through the query. - * Note: For the parameter of the algorithm, we use the algorithm scheme of query and db separation, - * so a text, if it is to be retrieved as a text, should use the db, - * if it is used as a retrieval text, should use the query. - * @type {string} - * @memberof CreateMinimaxEmbeddingRequest - */ - type: "db" | "query"; -} - -/** - * Class for generating embeddings using the Minimax API. Extends the - * Embeddings class and implements MinimaxEmbeddingsParams - * @example - * ```typescript - * const embeddings = new MinimaxEmbeddings(); - * - * // Embed a single query - * const queryEmbedding = await embeddings.embedQuery("Hello world"); - * console.log(queryEmbedding); - * - * // Embed multiple documents - * const documentsEmbedding = await embeddings.embedDocuments([ - * "Hello world", - * "Bye bye", - * ]); - * console.log(documentsEmbedding); - * ``` - */ -export class MinimaxEmbeddings - extends Embeddings - implements MinimaxEmbeddingsParams -{ - modelName = "embo-01"; - - batchSize = 512; - - stripNewLines = true; - - minimaxGroupId?: string; - - minimaxApiKey?: string; - - type: "db" | "query" = "db"; - - apiUrl: string; - - basePath?: string = "https://api.minimax.chat/v1"; - - headers?: Record; - - constructor( - fields?: Partial & { - configuration?: ConfigurationParameters; - } - ) { - const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; - super(fieldsWithDefaults); - - this.minimaxGroupId = - fields?.minimaxGroupId ?? getEnvironmentVariable("MINIMAX_GROUP_ID"); - if (!this.minimaxGroupId) { - throw new Error("Minimax GroupID not found"); - } - - this.minimaxApiKey = - fields?.minimaxApiKey ?? getEnvironmentVariable("MINIMAX_API_KEY"); - - if (!this.minimaxApiKey) { - throw new Error("Minimax ApiKey not found"); - } - - this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; - this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; - this.type = fieldsWithDefaults?.type ?? this.type; - this.stripNewLines = - fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; - this.basePath = fields?.configuration?.basePath ?? this.basePath; - this.apiUrl = `${this.basePath}/embeddings`; - this.headers = fields?.configuration?.headers ?? this.headers; - } - - /** - * Method to generate embeddings for an array of documents. Splits the - * documents into batches and makes requests to the Minimax API to generate - * embeddings. - * @param texts Array of documents to generate embeddings for. - * @returns Promise that resolves to a 2D array of embeddings for each document. - */ - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray( - this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, - this.batchSize - ); - - const batchRequests = batches.map((batch) => - this.embeddingWithRetry({ - model: this.modelName, - texts: batch, - type: this.type, - }) - ); - const batchResponses = await Promise.all(batchRequests); - - const embeddings: number[][] = []; - for (let i = 0; i < batchResponses.length; i += 1) { - const batch = batches[i]; - const { vectors: batchResponse } = batchResponses[i]; - for (let j = 0; j < batch.length; j += 1) { - embeddings.push(batchResponse[j]); - } - } - return embeddings; - } - - /** - * Method to generate an embedding for a single document. Calls the - * embeddingWithRetry method with the document as the input. - * @param text Document to generate an embedding for. - * @returns Promise that resolves to an embedding for the document. - */ - async embedQuery(text: string): Promise { - const { vectors } = await this.embeddingWithRetry({ - model: this.modelName, - texts: [this.stripNewLines ? text.replace(/\n/g, " ") : text], - type: this.type, - }); - return vectors[0]; - } - - /** - * Private method to make a request to the Minimax API to generate - * embeddings. Handles the retry logic and returns the response from the - * API. - * @param request Request to send to the Minimax API. - * @returns Promise that resolves to the response from the API. - */ - private async embeddingWithRetry(request: CreateMinimaxEmbeddingRequest) { - const makeCompletionRequest = async () => { - const url = `${this.apiUrl}?GroupId=${this.minimaxGroupId}`; - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${this.minimaxApiKey}`, - ...this.headers, - }, - body: JSON.stringify(request), - }); - - const json = await response.json(); - return json; - }; - - return this.caller.call(makeCompletionRequest); - } -} +export * from "@langchain/community/embeddings/minimax"; \ No newline at end of file diff --git a/langchain/src/embeddings/ollama.ts b/langchain/src/embeddings/ollama.ts index de9c77797dfe..2b6d5602fdf1 100644 --- a/langchain/src/embeddings/ollama.ts +++ b/langchain/src/embeddings/ollama.ts @@ -1,148 +1 @@ -import { OllamaInput, OllamaRequestParams } from "../util/ollama.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -type CamelCasedRequestOptions = Omit< - OllamaInput, - "baseUrl" | "model" | "format" ->; - -/** - * Interface for OllamaEmbeddings parameters. Extends EmbeddingsParams and - * defines additional parameters specific to the OllamaEmbeddings class. - */ -interface OllamaEmbeddingsParams extends EmbeddingsParams { - /** The Ollama model to use, e.g: "llama2:13b" */ - model?: string; - - /** Base URL of the Ollama server, defaults to "http://localhost:11434" */ - baseUrl?: string; - - /** Advanced Ollama API request parameters in camelCase, see - * https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values - * for details of the available parameters. - */ - requestOptions?: CamelCasedRequestOptions; -} - -export class OllamaEmbeddings extends Embeddings { - model = "llama2"; - - baseUrl = "http://localhost:11434"; - - requestOptions?: OllamaRequestParams["options"]; - - constructor(params?: OllamaEmbeddingsParams) { - super(params || {}); - - if (params?.model) { - this.model = params.model; - } - - if (params?.baseUrl) { - this.baseUrl = params.baseUrl; - } - - if (params?.requestOptions) { - this.requestOptions = this._convertOptions(params.requestOptions); - } - } - - /** convert camelCased Ollama request options like "useMMap" to - * the snake_cased equivalent which the ollama API actually uses. - * Used only for consistency with the llms/Ollama and chatModels/Ollama classes - */ - _convertOptions(requestOptions: CamelCasedRequestOptions) { - const snakeCasedOptions: Record = {}; - const mapping: Record = { - embeddingOnly: "embedding_only", - f16KV: "f16_kv", - frequencyPenalty: "frequency_penalty", - logitsAll: "logits_all", - lowVram: "low_vram", - mainGpu: "main_gpu", - mirostat: "mirostat", - mirostatEta: "mirostat_eta", - mirostatTau: "mirostat_tau", - numBatch: "num_batch", - numCtx: "num_ctx", - numGpu: "num_gpu", - numGqa: "num_gqa", - numKeep: "num_keep", - numThread: "num_thread", - penalizeNewline: "penalize_newline", - presencePenalty: "presence_penalty", - repeatLastN: "repeat_last_n", - repeatPenalty: "repeat_penalty", - ropeFrequencyBase: "rope_frequency_base", - ropeFrequencyScale: "rope_frequency_scale", - temperature: "temperature", - stop: "stop", - tfsZ: "tfs_z", - topK: "top_k", - topP: "top_p", - typicalP: "typical_p", - useMLock: "use_mlock", - useMMap: "use_mmap", - vocabOnly: "vocab_only", - }; - - for (const [key, value] of Object.entries(requestOptions)) { - const snakeCasedOption = mapping[key as keyof CamelCasedRequestOptions]; - if (snakeCasedOption) { - snakeCasedOptions[snakeCasedOption] = value; - } - } - return snakeCasedOptions; - } - - async _request(prompt: string): Promise { - const { model, baseUrl, requestOptions } = this; - - let formattedBaseUrl = baseUrl; - if (formattedBaseUrl.startsWith("http://localhost:")) { - // Node 18 has issues with resolving "localhost" - // See https://github.com/node-fetch/node-fetch/issues/1624 - formattedBaseUrl = formattedBaseUrl.replace( - "http://localhost:", - "http://127.0.0.1:" - ); - } - - const response = await fetch(`${formattedBaseUrl}/api/embeddings`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - prompt, - model, - options: requestOptions, - }), - }); - if (!response.ok) { - throw new Error( - `Request to Ollama server failed: ${response.status} ${response.statusText}` - ); - } - - const json = await response.json(); - return json.embedding; - } - - async _embed(strings: string[]): Promise { - const embeddings: number[][] = []; - - for await (const prompt of strings) { - const embedding = await this.caller.call(() => this._request(prompt)); - embeddings.push(embedding); - } - - return embeddings; - } - - async embedDocuments(documents: string[]) { - return this._embed(documents); - } - - async embedQuery(document: string) { - return (await this.embedDocuments([document]))[0]; - } -} +export * from "@langchain/community/embeddings/ollama"; \ No newline at end of file diff --git a/langchain/src/embeddings/tensorflow.ts b/langchain/src/embeddings/tensorflow.ts index 1fc198ccdb8f..2f1919d08352 100644 --- a/langchain/src/embeddings/tensorflow.ts +++ b/langchain/src/embeddings/tensorflow.ts @@ -1,91 +1 @@ -import { load } from "@tensorflow-models/universal-sentence-encoder"; -import * as tf from "@tensorflow/tfjs-core"; - -import { Embeddings, EmbeddingsParams } from "./base.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the TensorFlowEmbeddings class. - */ -export interface TensorFlowEmbeddingsParams extends EmbeddingsParams {} - -/** - * Class that extends the Embeddings class and provides methods for - * generating embeddings using the Universal Sentence Encoder model from - * TensorFlow.js. - * @example - * ```typescript - * const embeddings = new TensorFlowEmbeddings(); - * const store = new MemoryVectorStore(embeddings); - * - * const documents = [ - * "A document", - * "Some other piece of text", - * "One more", - * "And another", - * ]; - * - * await store.addDocuments( - * documents.map((pageContent) => new Document({ pageContent })) - * ); - * ``` - */ -export class TensorFlowEmbeddings extends Embeddings { - constructor(fields?: TensorFlowEmbeddingsParams) { - super(fields ?? {}); - - try { - tf.backend(); - } catch (e) { - throw new Error("No TensorFlow backend found, see instructions at ..."); - } - } - - _cached: ReturnType; - - /** - * Private method that loads the Universal Sentence Encoder model if it - * hasn't been loaded already. It returns a promise that resolves to the - * loaded model. - * @returns Promise that resolves to the loaded Universal Sentence Encoder model. - */ - private async load() { - if (this._cached === undefined) { - this._cached = load(); - } - return this._cached; - } - - private _embed(texts: string[]) { - return this.caller.call(async () => { - const model = await this.load(); - return model.embed(texts); - }); - } - - /** - * Method that takes a document as input and returns a promise that - * resolves to an embedding for the document. It calls the _embed method - * with the document as the input and processes the result to return a - * single embedding. - * @param document Document to generate an embedding for. - * @returns Promise that resolves to an embedding for the input document. - */ - embedQuery(document: string): Promise { - return this._embed([document]) - .then((embeddings) => embeddings.array()) - .then((embeddings) => embeddings[0]); - } - - /** - * Method that takes an array of documents as input and returns a promise - * that resolves to a 2D array of embeddings for each document. It calls - * the _embed method with the documents as the input and processes the - * result to return the embeddings. - * @param documents Array of documents to generate embeddings for. - * @returns Promise that resolves to a 2D array of embeddings for each input document. - */ - embedDocuments(documents: string[]): Promise { - return this._embed(documents).then((embeddings) => embeddings.array()); - } -} +export * from "@langchain/community/embeddings/tensorflow"; \ No newline at end of file diff --git a/langchain/src/embeddings/voyage.ts b/langchain/src/embeddings/voyage.ts index 6b4d03c7e210..eb93200b313d 100644 --- a/langchain/src/embeddings/voyage.ts +++ b/langchain/src/embeddings/voyage.ts @@ -1,152 +1 @@ -import { chunkArray } from "../util/chunk.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; - -/** - * Interface that extends EmbeddingsParams and defines additional - * parameters specific to the VoyageEmbeddings class. - */ -export interface VoyageEmbeddingsParams extends EmbeddingsParams { - modelName: string; - - /** - * The maximum number of documents to embed in a single request. This is - * limited by the Voyage AI API to a maximum of 8. - */ - batchSize?: number; -} - -/** - * Interface for the request body to generate embeddings. - */ -export interface CreateVoyageEmbeddingRequest { - /** - * @type {string} - * @memberof CreateVoyageEmbeddingRequest - */ - model: string; - - /** - * Text to generate vector expectation - * @type {CreateEmbeddingRequestInput} - * @memberof CreateVoyageEmbeddingRequest - */ - input: string | string[]; -} - -/** - * A class for generating embeddings using the Voyage AI API. - */ -export class VoyageEmbeddings - extends Embeddings - implements VoyageEmbeddingsParams -{ - modelName = "voyage-01"; - - batchSize = 8; - - private apiKey: string; - - basePath?: string = "https://api.voyageai.com/v1"; - - apiUrl: string; - - headers?: Record; - - /** - * Constructor for the VoyageEmbeddings class. - * @param fields - An optional object with properties to configure the instance. - */ - constructor( - fields?: Partial & { - verbose?: boolean; - apiKey?: string; - } - ) { - const fieldsWithDefaults = { ...fields }; - - super(fieldsWithDefaults); - - const apiKey = - fieldsWithDefaults?.apiKey || getEnvironmentVariable("VOYAGEAI_API_KEY"); - - if (!apiKey) { - throw new Error("Voyage AI API key not found"); - } - - this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; - this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize; - this.apiKey = apiKey; - this.apiUrl = `${this.basePath}/embeddings`; - } - - /** - * Generates embeddings for an array of texts. - * @param texts - An array of strings to generate embeddings for. - * @returns A Promise that resolves to an array of embeddings. - */ - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray(texts, this.batchSize); - - const batchRequests = batches.map((batch) => - this.embeddingWithRetry({ - model: this.modelName, - input: batch, - }) - ); - - const batchResponses = await Promise.all(batchRequests); - - const embeddings: number[][] = []; - - for (let i = 0; i < batchResponses.length; i += 1) { - const batch = batches[i]; - const { data: batchResponse } = batchResponses[i]; - for (let j = 0; j < batch.length; j += 1) { - embeddings.push(batchResponse[j].embedding); - } - } - - return embeddings; - } - - /** - * Generates an embedding for a single text. - * @param text - A string to generate an embedding for. - * @returns A Promise that resolves to an array of numbers representing the embedding. - */ - async embedQuery(text: string): Promise { - const { data } = await this.embeddingWithRetry({ - model: this.modelName, - input: text, - }); - - return data[0].embedding; - } - - /** - * Makes a request to the Voyage AI API to generate embeddings for an array of texts. - * @param request - An object with properties to configure the request. - * @returns A Promise that resolves to the response from the Voyage AI API. - */ - - private async embeddingWithRetry(request: CreateVoyageEmbeddingRequest) { - const makeCompletionRequest = async () => { - const url = `${this.apiUrl}`; - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - ...this.headers, - }, - body: JSON.stringify(request), - }); - - const json = await response.json(); - return json; - }; - - return this.caller.call(makeCompletionRequest); - } -} +export * from "@langchain/community/embeddings/voyage"; \ No newline at end of file diff --git a/langchain/src/graphs/neo4j_graph.ts b/langchain/src/graphs/neo4j_graph.ts index c404e7e3b2ad..e1be4e2c49cf 100644 --- a/langchain/src/graphs/neo4j_graph.ts +++ b/langchain/src/graphs/neo4j_graph.ts @@ -1,286 +1 @@ -import neo4j, { Neo4jError } from "neo4j-driver"; - -interface Neo4jGraphConfig { - url: string; - username: string; - password: string; - database?: string; -} - -interface StructuredSchema { - nodeProps: { [key: NodeType["labels"]]: NodeType["properties"] }; - relProps: { [key: RelType["type"]]: RelType["properties"] }; - relationships: PathType[]; -} - -type NodeType = { - labels: string; - properties: { property: string; type: string }[]; -}; -type RelType = { - type: string; - properties: { property: string; type: string }[]; -}; -type PathType = { start: string; type: string; end: string }; - -/** - * @security *Security note*: Make sure that the database connection uses credentials - * that are narrowly-scoped to only include necessary permissions. - * Failure to do so may result in data corruption or loss, since the calling - * code may attempt commands that would result in deletion, mutation - * of data if appropriately prompted or reading sensitive data if such - * data is present in the database. - * The best way to guard against such negative outcomes is to (as appropriate) - * limit the permissions granted to the credentials used with this tool. - * For example, creating read only users for the database is a good way to - * ensure that the calling code cannot mutate or delete data. - * - * @link See https://js.langchain.com/docs/security for more information. - */ -export class Neo4jGraph { - private driver: neo4j.Driver; - - private database: string; - - private schema = ""; - - private structuredSchema: StructuredSchema = { - nodeProps: {}, - relProps: {}, - relationships: [], - }; - - constructor({ - url, - username, - password, - database = "neo4j", - }: Neo4jGraphConfig) { - try { - this.driver = neo4j.driver(url, neo4j.auth.basic(username, password)); - this.database = database; - } catch (error) { - throw new Error( - "Could not create a Neo4j driver instance. Please check the connection details." - ); - } - } - - static async initialize(config: Neo4jGraphConfig): Promise { - const graph = new Neo4jGraph(config); - - try { - await graph.verifyConnectivity(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (error: any) { - console.log("Failed to verify connection."); - } - - try { - await graph.refreshSchema(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (error: any) { - const message = [ - "Could not use APOC procedures.", - "Please ensure the APOC plugin is installed in Neo4j and that", - "'apoc.meta.data()' is allowed in Neo4j configuration", - ].join("\n"); - - throw new Error(message); - } finally { - console.log("Schema refreshed successfully."); - } - - return graph; - } - - getSchema(): string { - return this.schema; - } - - getStructuredSchema() { - return this.structuredSchema; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - async query(query: string, params: any = {}): Promise { - try { - const result = await this.driver.executeQuery(query, params, { - database: this.database, - }); - return toObjects(result.records); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (error: any) { - if ( - // eslint-disable-next-line - error instanceof Neo4jError && - error.code === "Neo.ClientError.Procedure.ProcedureNotFound" - ) { - throw new Error("Procedure not found in Neo4j."); - } - } - return undefined; - } - - async verifyConnectivity() { - await this.driver.verifyAuthentication(); - } - - async refreshSchema() { - const nodePropertiesQuery = ` - CALL apoc.meta.data() - YIELD label, other, elementType, type, property - WHERE NOT type = "RELATIONSHIP" AND elementType = "node" - WITH label AS nodeLabels, collect({property:property, type:type}) AS properties - RETURN {labels: nodeLabels, properties: properties} AS output - `; - - const relPropertiesQuery = ` - CALL apoc.meta.data() - YIELD label, other, elementType, type, property - WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship" - WITH label AS nodeLabels, collect({property:property, type:type}) AS properties - RETURN {type: nodeLabels, properties: properties} AS output - `; - - const relQuery = ` - CALL apoc.meta.data() - YIELD label, other, elementType, type, property - WHERE type = "RELATIONSHIP" AND elementType = "node" - UNWIND other AS other_node - RETURN {start: label, type: property, end: toString(other_node)} AS output - `; - - // Assuming query method is defined and returns a Promise - const nodeProperties: NodeType[] | undefined = ( - await this.query(nodePropertiesQuery) - )?.map((el: { output: NodeType }) => el.output); - - const relationshipsProperties: RelType[] | undefined = ( - await this.query(relPropertiesQuery) - )?.map((el: { output: RelType }) => el.output); - - const relationships: PathType[] | undefined = ( - await this.query(relQuery) - )?.map((el: { output: PathType }) => el.output); - - // Structured schema similar to Python's dictionary comprehension - this.structuredSchema = { - nodeProps: Object.fromEntries( - nodeProperties?.map((el) => [el.labels, el.properties]) || [] - ), - relProps: Object.fromEntries( - relationshipsProperties?.map((el) => [el.type, el.properties]) || [] - ), - relationships: relationships || [], - }; - - // Format node properties - const formattedNodeProps = nodeProperties?.map((el) => { - const propsStr = el.properties - .map((prop) => `${prop.property}: ${prop.type}`) - .join(", "); - return `${el.labels} {${propsStr}}`; - }); - - // Format relationship properties - const formattedRelProps = relationshipsProperties?.map((el) => { - const propsStr = el.properties - .map((prop) => `${prop.property}: ${prop.type}`) - .join(", "); - return `${el.type} {${propsStr}}`; - }); - - // Format relationships - const formattedRels = relationships?.map( - (el) => `(:${el.start})-[:${el.type}]->(:${el.end})` - ); - - // Combine all formatted elements into a single string - this.schema = [ - "Node properties are the following:", - formattedNodeProps?.join(", "), - "Relationship properties are the following:", - formattedRelProps?.join(", "), - "The relationships are the following:", - formattedRels?.join(", "), - ].join("\n"); - } - - async close() { - await this.driver.close(); - } -} - -function toObjects(records: neo4j.Record[]) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const recordValues: Record[] = records.map((record) => { - const rObj = record.toObject(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const out: { [key: string]: any } = {}; - Object.keys(rObj).forEach((key) => { - out[key] = itemIntToString(rObj[key]); - }); - return out; - }); - return recordValues; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function itemIntToString(item: any): any { - if (neo4j.isInt(item)) return item.toString(); - if (Array.isArray(item)) return item.map((ii) => itemIntToString(ii)); - if (["number", "string", "boolean"].indexOf(typeof item) !== -1) return item; - if (item === null) return item; - if (typeof item === "object") return objIntToString(item); -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function objIntToString(obj: any) { - const entry = extractFromNeoObjects(obj); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let newObj: any = null; - if (Array.isArray(entry)) { - newObj = entry.map((item) => itemIntToString(item)); - } else if (entry !== null && typeof entry === "object") { - newObj = {}; - Object.keys(entry).forEach((key) => { - newObj[key] = itemIntToString(entry[key]); - }); - } - return newObj; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function extractFromNeoObjects(obj: any) { - if ( - // eslint-disable-next-line - obj instanceof (neo4j.types.Node as any) || - // eslint-disable-next-line - obj instanceof (neo4j.types.Relationship as any) - ) { - return obj.properties; - // eslint-disable-next-line - } else if (obj instanceof (neo4j.types.Path as any)) { - // eslint-disable-next-line - return [].concat.apply([], extractPathForRows(obj)); - } - return obj; -} - -const extractPathForRows = (path: neo4j.Path) => { - let { segments } = path; - // Zero length path. No relationship, end === start - if (!Array.isArray(path.segments) || path.segments.length < 1) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - segments = [{ ...path, end: null } as any]; - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return segments.map((segment: any) => - [ - objIntToString(segment.start), - objIntToString(segment.relationship), - objIntToString(segment.end), - ].filter((part) => part !== null) - ); -}; +export * from "@langchain/community/graphs/neo4j_graph"; \ No newline at end of file diff --git a/langchain/src/graphs/tests/neo4j_graph.int.test.ts b/langchain/src/graphs/tests/neo4j_graph.int.test.ts deleted file mode 100644 index 3b47800fc323..000000000000 --- a/langchain/src/graphs/tests/neo4j_graph.int.test.ts +++ /dev/null @@ -1,56 +0,0 @@ -/* eslint-disable no-process-env */ - -import { test } from "@jest/globals"; -import { Neo4jGraph } from "../neo4j_graph.js"; - -describe.skip("Neo4j Graph Tests", () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - let graph: Neo4jGraph; - - beforeEach(async () => { - graph = await Neo4jGraph.initialize({ url, username, password }); - }); - afterEach(async () => { - await graph.close(); - }); - - test("Schema generation works correctly", async () => { - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - // Clear the database - await graph.query("MATCH (n) DETACH DELETE n"); - - await graph.query( - "CREATE (a:Actor {name:'Bruce Willis'})" + - "-[:ACTED_IN {roles: ['Butch Coolidge']}]->(:Movie {title: 'Pulp Fiction'})" - ); - - await graph.refreshSchema(); - console.log(graph.getSchema()); - - // expect(graph.getSchema()).toMatchInlineSnapshot(` - // "Node properties are the following: - // Actor {name: STRING}, Movie {title: STRING} - // Relationship properties are the following: - // ACTED_IN {roles: LIST} - // The relationships are the following: - // (:Actor)-[:ACTED_IN]->(:Movie)" - // `); - }); - - test("Test that Neo4j database is correctly instantiated and connected", async () => { - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - // Integers are casted to strings in the output - const expectedOutput = [{ output: { str: "test", int: "1" } }]; - const res = await graph.query('RETURN {str: "test", int: 1} AS output'); - await graph.close(); - expect(res).toEqual(expectedOutput); - }); -}); diff --git a/langchain/src/llms/ai21.ts b/langchain/src/llms/ai21.ts index 66d63b97abc3..da34e9c6da84 100644 --- a/langchain/src/llms/ai21.ts +++ b/langchain/src/llms/ai21.ts @@ -1,199 +1 @@ -import { LLM, BaseLLMParams } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Type definition for AI21 penalty data. - */ -export type AI21PenaltyData = { - scale: number; - applyToWhitespaces: boolean; - applyToPunctuations: boolean; - applyToNumbers: boolean; - applyToStopwords: boolean; - applyToEmojis: boolean; -}; - -/** - * Interface for AI21 input parameters. - */ -export interface AI21Input extends BaseLLMParams { - ai21ApiKey?: string; - model?: string; - temperature?: number; - minTokens?: number; - maxTokens?: number; - topP?: number; - presencePenalty?: AI21PenaltyData; - countPenalty?: AI21PenaltyData; - frequencyPenalty?: AI21PenaltyData; - numResults?: number; - logitBias?: Record; - stop?: string[]; - baseUrl?: string; -} - -/** - * Class representing the AI21 language model. It extends the LLM (Large - * Language Model) class, providing a standard interface for interacting - * with the AI21 language model. - */ -export class AI21 extends LLM implements AI21Input { - model = "j2-jumbo-instruct"; - - temperature = 0.7; - - maxTokens = 1024; - - minTokens = 0; - - topP = 1; - - presencePenalty = AI21.getDefaultAI21PenaltyData(); - - countPenalty = AI21.getDefaultAI21PenaltyData(); - - frequencyPenalty = AI21.getDefaultAI21PenaltyData(); - - numResults = 1; - - logitBias?: Record; - - ai21ApiKey?: string; - - stop?: string[]; - - baseUrl?: string; - - constructor(fields?: AI21Input) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - this.temperature = fields?.temperature ?? this.temperature; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.minTokens = fields?.minTokens ?? this.minTokens; - this.topP = fields?.topP ?? this.topP; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.countPenalty = fields?.countPenalty ?? this.countPenalty; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.numResults = fields?.numResults ?? this.numResults; - this.logitBias = fields?.logitBias; - this.ai21ApiKey = - fields?.ai21ApiKey ?? getEnvironmentVariable("AI21_API_KEY"); - this.stop = fields?.stop; - this.baseUrl = fields?.baseUrl; - } - - /** - * Method to validate the environment. It checks if the AI21 API key is - * set. If not, it throws an error. - */ - validateEnvironment() { - if (!this.ai21ApiKey) { - throw new Error( - `No AI21 API key found. Please set it as "AI21_API_KEY" in your environment variables.` - ); - } - } - - /** - * Static method to get the default penalty data for AI21. - * @returns AI21PenaltyData - */ - static getDefaultAI21PenaltyData(): AI21PenaltyData { - return { - scale: 0, - applyToWhitespaces: true, - applyToPunctuations: true, - applyToNumbers: true, - applyToStopwords: true, - applyToEmojis: true, - }; - } - - /** Get the type of LLM. */ - _llmType() { - return "ai21"; - } - - /** Get the default parameters for calling AI21 API. */ - get defaultParams() { - return { - temperature: this.temperature, - maxTokens: this.maxTokens, - minTokens: this.minTokens, - topP: this.topP, - presencePenalty: this.presencePenalty, - countPenalty: this.countPenalty, - frequencyPenalty: this.frequencyPenalty, - numResults: this.numResults, - logitBias: this.logitBias, - }; - } - - /** Get the identifying parameters for this LLM. */ - get identifyingParams() { - return { ...this.defaultParams, model: this.model }; - } - - /** Call out to AI21's complete endpoint. - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - - Returns: - The string generated by the model. - - Example: - let response = ai21._call("Tell me a joke."); - */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - let stop = options?.stop; - this.validateEnvironment(); - if (this.stop && stop && this.stop.length > 0 && stop.length > 0) { - throw new Error("`stop` found in both the input and default params."); - } - stop = this.stop ?? stop ?? []; - - const baseUrl = - this.baseUrl ?? this.model === "j1-grande-instruct" - ? "https://api.ai21.com/studio/v1/experimental" - : "https://api.ai21.com/studio/v1"; - - const url = `${baseUrl}/${this.model}/complete`; - const headers = { - Authorization: `Bearer ${this.ai21ApiKey}`, - "Content-Type": "application/json", - }; - const data = { prompt, stopSequences: stop, ...this.defaultParams }; - const responseData = await this.caller.callWithOptions({}, async () => { - const response = await fetch(url, { - method: "POST", - headers, - body: JSON.stringify(data), - signal: options.signal, - }); - if (!response.ok) { - const error = new Error( - `AI21 call failed with status code ${response.status}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).response = response; - throw error; - } - return response.json(); - }); - - if ( - !responseData.completions || - responseData.completions.length === 0 || - !responseData.completions[0].data - ) { - throw new Error("No completions found in response"); - } - - return responseData.completions[0].data.text ?? ""; - } -} +export * from "@langchain/community/llms/ai21"; \ No newline at end of file diff --git a/langchain/src/llms/aleph_alpha.ts b/langchain/src/llms/aleph_alpha.ts index 0e8968327073..d3aa0002c4b2 100644 --- a/langchain/src/llms/aleph_alpha.ts +++ b/langchain/src/llms/aleph_alpha.ts @@ -1,298 +1 @@ -import { LLM, BaseLLMParams } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface for the input parameters specific to the Aleph Alpha LLM. - */ -export interface AlephAlphaInput extends BaseLLMParams { - model: string; - maximum_tokens: number; - minimum_tokens?: number; - echo?: boolean; - temperature?: number; - top_k?: number; - top_p?: number; - presence_penalty?: number; - frequency_penalty?: number; - sequence_penalty?: number; - sequence_penalty_min_length?: number; - repetition_penalties_include_prompt?: boolean; - repetition_penalties_include_completion?: boolean; - use_multiplicative_presence_penalty?: boolean; - use_multiplicative_frequency_penalty?: boolean; - use_multiplicative_sequence_penalty?: boolean; - penalty_bias?: string; - penalty_exceptions?: string[]; - penalty_exceptions_include_stop_sequences?: boolean; - best_of?: number; - n?: number; - logit_bias?: object; - log_probs?: number; - tokens?: boolean; - raw_completion: boolean; - disable_optimizations?: boolean; - completion_bias_inclusion?: string[]; - completion_bias_inclusion_first_token_only: boolean; - completion_bias_exclusion?: string[]; - completion_bias_exclusion_first_token_only: boolean; - contextual_control_threshold?: number; - control_log_additive: boolean; - stop?: string[]; - aleph_alpha_api_key?: string; - base_url: string; -} - -/** - * Specific implementation of a Large Language Model (LLM) designed to - * interact with the Aleph Alpha API. It extends the base LLM class and - * includes a variety of parameters for customizing the behavior of the - * Aleph Alpha model. - */ -export class AlephAlpha extends LLM implements AlephAlphaInput { - model = "luminous-base"; - - maximum_tokens = 64; - - minimum_tokens = 0; - - echo: boolean; - - temperature = 0.0; - - top_k: number; - - top_p = 0.0; - - presence_penalty?: number; - - frequency_penalty?: number; - - sequence_penalty?: number; - - sequence_penalty_min_length?: number; - - repetition_penalties_include_prompt?: boolean; - - repetition_penalties_include_completion?: boolean; - - use_multiplicative_presence_penalty?: boolean; - - use_multiplicative_frequency_penalty?: boolean; - - use_multiplicative_sequence_penalty?: boolean; - - penalty_bias?: string; - - penalty_exceptions?: string[]; - - penalty_exceptions_include_stop_sequences?: boolean; - - best_of?: number; - - n?: number; - - logit_bias?: object; - - log_probs?: number; - - tokens?: boolean; - - raw_completion: boolean; - - disable_optimizations?: boolean; - - completion_bias_inclusion?: string[]; - - completion_bias_inclusion_first_token_only: boolean; - - completion_bias_exclusion?: string[]; - - completion_bias_exclusion_first_token_only: boolean; - - contextual_control_threshold?: number; - - control_log_additive: boolean; - - aleph_alpha_api_key? = getEnvironmentVariable("ALEPH_ALPHA_API_KEY"); - - stop?: string[]; - - base_url = "https://api.aleph-alpha.com/complete"; - - constructor(fields: Partial) { - super(fields ?? {}); - this.model = fields?.model ?? this.model; - this.temperature = fields?.temperature ?? this.temperature; - this.maximum_tokens = fields?.maximum_tokens ?? this.maximum_tokens; - this.minimum_tokens = fields?.minimum_tokens ?? this.minimum_tokens; - this.top_k = fields?.top_k ?? this.top_k; - this.top_p = fields?.top_p ?? this.top_p; - this.presence_penalty = fields?.presence_penalty ?? this.presence_penalty; - this.frequency_penalty = - fields?.frequency_penalty ?? this.frequency_penalty; - this.sequence_penalty = fields?.sequence_penalty ?? this.sequence_penalty; - this.sequence_penalty_min_length = - fields?.sequence_penalty_min_length ?? this.sequence_penalty_min_length; - this.repetition_penalties_include_prompt = - fields?.repetition_penalties_include_prompt ?? - this.repetition_penalties_include_prompt; - this.repetition_penalties_include_completion = - fields?.repetition_penalties_include_completion ?? - this.repetition_penalties_include_completion; - this.use_multiplicative_presence_penalty = - fields?.use_multiplicative_presence_penalty ?? - this.use_multiplicative_presence_penalty; - this.use_multiplicative_frequency_penalty = - fields?.use_multiplicative_frequency_penalty ?? - this.use_multiplicative_frequency_penalty; - this.use_multiplicative_sequence_penalty = - fields?.use_multiplicative_sequence_penalty ?? - this.use_multiplicative_sequence_penalty; - this.penalty_bias = fields?.penalty_bias ?? this.penalty_bias; - this.penalty_exceptions = - fields?.penalty_exceptions ?? this.penalty_exceptions; - this.penalty_exceptions_include_stop_sequences = - fields?.penalty_exceptions_include_stop_sequences ?? - this.penalty_exceptions_include_stop_sequences; - this.best_of = fields?.best_of ?? this.best_of; - this.n = fields?.n ?? this.n; - this.logit_bias = fields?.logit_bias ?? this.logit_bias; - this.log_probs = fields?.log_probs ?? this.log_probs; - this.tokens = fields?.tokens ?? this.tokens; - this.raw_completion = fields?.raw_completion ?? this.raw_completion; - this.disable_optimizations = - fields?.disable_optimizations ?? this.disable_optimizations; - this.completion_bias_inclusion = - fields?.completion_bias_inclusion ?? this.completion_bias_inclusion; - this.completion_bias_inclusion_first_token_only = - fields?.completion_bias_inclusion_first_token_only ?? - this.completion_bias_inclusion_first_token_only; - this.completion_bias_exclusion = - fields?.completion_bias_exclusion ?? this.completion_bias_exclusion; - this.completion_bias_exclusion_first_token_only = - fields?.completion_bias_exclusion_first_token_only ?? - this.completion_bias_exclusion_first_token_only; - this.contextual_control_threshold = - fields?.contextual_control_threshold ?? this.contextual_control_threshold; - this.control_log_additive = - fields?.control_log_additive ?? this.control_log_additive; - this.aleph_alpha_api_key = - fields?.aleph_alpha_api_key ?? this.aleph_alpha_api_key; - this.stop = fields?.stop ?? this.stop; - } - - /** - * Validates the environment by ensuring the necessary Aleph Alpha API key - * is available. Throws an error if the API key is missing. - */ - validateEnvironment() { - if (!this.aleph_alpha_api_key) { - throw new Error( - "Aleph Alpha API Key is missing in environment variables." - ); - } - } - - /** Get the default parameters for calling Aleph Alpha API. */ - get defaultParams() { - return { - model: this.model, - temperature: this.temperature, - maximum_tokens: this.maximum_tokens, - minimum_tokens: this.minimum_tokens, - top_k: this.top_k, - top_p: this.top_p, - presence_penalty: this.presence_penalty, - frequency_penalty: this.frequency_penalty, - sequence_penalty: this.sequence_penalty, - sequence_penalty_min_length: this.sequence_penalty_min_length, - repetition_penalties_include_prompt: - this.repetition_penalties_include_prompt, - repetition_penalties_include_completion: - this.repetition_penalties_include_completion, - use_multiplicative_presence_penalty: - this.use_multiplicative_presence_penalty, - use_multiplicative_frequency_penalty: - this.use_multiplicative_frequency_penalty, - use_multiplicative_sequence_penalty: - this.use_multiplicative_sequence_penalty, - penalty_bias: this.penalty_bias, - penalty_exceptions: this.penalty_exceptions, - penalty_exceptions_include_stop_sequences: - this.penalty_exceptions_include_stop_sequences, - best_of: this.best_of, - n: this.n, - logit_bias: this.logit_bias, - log_probs: this.log_probs, - tokens: this.tokens, - raw_completion: this.raw_completion, - disable_optimizations: this.disable_optimizations, - completion_bias_inclusion: this.completion_bias_inclusion, - completion_bias_inclusion_first_token_only: - this.completion_bias_inclusion_first_token_only, - completion_bias_exclusion: this.completion_bias_exclusion, - completion_bias_exclusion_first_token_only: - this.completion_bias_exclusion_first_token_only, - contextual_control_threshold: this.contextual_control_threshold, - control_log_additive: this.control_log_additive, - }; - } - - /** Get the identifying parameters for this LLM. */ - get identifyingParams() { - return { ...this.defaultParams }; - } - - /** Get the type of LLM. */ - _llmType(): string { - return "aleph_alpha"; - } - - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - let stop = options?.stop; - this.validateEnvironment(); - if (this.stop && stop && this.stop.length > 0 && stop.length > 0) { - throw new Error("`stop` found in both the input and default params."); - } - stop = this.stop ?? stop ?? []; - const headers = { - Authorization: `Bearer ${this.aleph_alpha_api_key}`, - "Content-Type": "application/json", - Accept: "application/json", - }; - const data = { prompt, stop_sequences: stop, ...this.defaultParams }; - const responseData = await this.caller.call(async () => { - const response = await fetch(this.base_url, { - method: "POST", - headers, - body: JSON.stringify(data), - signal: options.signal, - }); - if (!response.ok) { - // consume the response body to release the connection - // https://undici.nodejs.org/#/?id=garbage-collection - const text = await response.text(); - const error = new Error( - `Aleph Alpha call failed with status ${response.status} and body ${text}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).response = response; - throw error; - } - return response.json(); - }); - - if ( - !responseData.completions || - responseData.completions.length === 0 || - !responseData.completions[0].completion - ) { - throw new Error("No completions found in response"); - } - - return responseData.completions[0].completion ?? ""; - } -} +export * from "@langchain/community/llms/aleph_alpha"; \ No newline at end of file diff --git a/langchain/src/llms/bedrock/web.ts b/langchain/src/llms/bedrock/web.ts index f10660c9feab..578b2f90b7e2 100644 --- a/langchain/src/llms/bedrock/web.ts +++ b/langchain/src/llms/bedrock/web.ts @@ -1,356 +1 @@ -import { SignatureV4 } from "@smithy/signature-v4"; - -import { HttpRequest } from "@smithy/protocol-http"; -import { EventStreamCodec } from "@smithy/eventstream-codec"; -import { fromUtf8, toUtf8 } from "@smithy/util-utf8"; -import { Sha256 } from "@aws-crypto/sha256-js"; - -import { - BaseBedrockInput, - BedrockLLMInputOutputAdapter, - type CredentialType, -} from "../../util/bedrock.js"; -import { getEnvironmentVariable } from "../../util/env.js"; -import { LLM, BaseLLMParams } from "../base.js"; -import { CallbackManagerForLLMRun } from "../../callbacks/manager.js"; -import { GenerationChunk } from "../../schema/index.js"; -import type { SerializedFields } from "../../load/map_keys.js"; - -/** - * A type of Large Language Model (LLM) that interacts with the Bedrock - * service. It extends the base `LLM` class and implements the - * `BaseBedrockInput` interface. The class is designed to authenticate and - * interact with the Bedrock service, which is a part of Amazon Web - * Services (AWS). It uses AWS credentials for authentication and can be - * configured with various parameters such as the model to use, the AWS - * region, and the maximum number of tokens to generate. - */ -export class Bedrock extends LLM implements BaseBedrockInput { - model = "amazon.titan-tg1-large"; - - region: string; - - credentials: CredentialType; - - temperature?: number | undefined = undefined; - - maxTokens?: number | undefined = undefined; - - fetchFn: typeof fetch; - - endpointHost?: string; - - /** @deprecated */ - stopSequences?: string[]; - - modelKwargs?: Record; - - codec: EventStreamCodec = new EventStreamCodec(toUtf8, fromUtf8); - - streaming = false; - - lc_serializable = true; - - get lc_aliases(): Record { - return { - model: "model_id", - region: "region_name", - }; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - "credentials.accessKeyId": "BEDROCK_AWS_ACCESS_KEY_ID", - "credentials.secretAccessKey": "BEDROCK_AWS_SECRET_ACCESS_KEY", - }; - } - - get lc_attributes(): SerializedFields | undefined { - return { region: this.region }; - } - - _llmType() { - return "bedrock"; - } - - static lc_name() { - return "Bedrock"; - } - - constructor(fields?: Partial & BaseLLMParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - const allowedModels = ["ai21", "anthropic", "amazon", "cohere", "meta"]; - if (!allowedModels.includes(this.model.split(".")[0])) { - throw new Error( - `Unknown model: '${this.model}', only these are supported: ${allowedModels}` - ); - } - const region = - fields?.region ?? getEnvironmentVariable("AWS_DEFAULT_REGION"); - if (!region) { - throw new Error( - "Please set the AWS_DEFAULT_REGION environment variable or pass it to the constructor as the region field." - ); - } - this.region = region; - - const credentials = fields?.credentials; - if (!credentials) { - throw new Error( - "Please set the AWS credentials in the 'credentials' field." - ); - } - this.credentials = credentials; - - this.temperature = fields?.temperature ?? this.temperature; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.fetchFn = fields?.fetchFn ?? fetch.bind(globalThis); - this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl; - this.stopSequences = fields?.stopSequences; - this.modelKwargs = fields?.modelKwargs; - this.streaming = fields?.streaming ?? this.streaming; - } - - /** Call out to Bedrock service model. - Arguments: - prompt: The prompt to pass into the model. - - Returns: - The string generated by the model. - - Example: - response = model.call("Tell me a joke.") - */ - async _call( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const service = "bedrock-runtime"; - const endpointHost = - this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; - const provider = this.model.split(".")[0]; - if (this.streaming) { - const stream = this._streamResponseChunks(prompt, options, runManager); - let finalResult: GenerationChunk | undefined; - for await (const chunk of stream) { - if (finalResult === undefined) { - finalResult = chunk; - } else { - finalResult = finalResult.concat(chunk); - } - } - return finalResult?.text ?? ""; - } - const response = await this._signedFetch(prompt, options, { - bedrockMethod: "invoke", - endpointHost, - provider, - }); - const json = await response.json(); - if (!response.ok) { - throw new Error( - `Error ${response.status}: ${json.message ?? JSON.stringify(json)}` - ); - } - const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); - return text; - } - - async _signedFetch( - prompt: string, - options: this["ParsedCallOptions"], - fields: { - bedrockMethod: "invoke" | "invoke-with-response-stream"; - endpointHost: string; - provider: string; - } - ) { - const { bedrockMethod, endpointHost, provider } = fields; - const inputBody = BedrockLLMInputOutputAdapter.prepareInput( - provider, - prompt, - this.maxTokens, - this.temperature, - options.stop ?? this.stopSequences, - this.modelKwargs, - fields.bedrockMethod - ); - - const url = new URL( - `https://${endpointHost}/model/${this.model}/${bedrockMethod}` - ); - - const request = new HttpRequest({ - hostname: url.hostname, - path: url.pathname, - protocol: url.protocol, - method: "POST", // method must be uppercase - body: JSON.stringify(inputBody), - query: Object.fromEntries(url.searchParams.entries()), - headers: { - // host is required by AWS Signature V4: https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - host: url.host, - accept: "application/json", - "content-type": "application/json", - }, - }); - - const signer = new SignatureV4({ - credentials: this.credentials, - service: "bedrock", - region: this.region, - sha256: Sha256, - }); - - const signedRequest = await signer.sign(request); - - // Send request to AWS using the low-level fetch API - const response = await this.caller.callWithOptions( - { signal: options.signal }, - async () => - this.fetchFn(url, { - headers: signedRequest.headers, - body: signedRequest.body, - method: signedRequest.method, - }) - ); - return response; - } - - invocationParams(options?: this["ParsedCallOptions"]) { - return { - model: this.model, - region: this.region, - temperature: this.temperature, - maxTokens: this.maxTokens, - stop: options?.stop ?? this.stopSequences, - modelKwargs: this.modelKwargs, - }; - } - - async *_streamResponseChunks( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const provider = this.model.split(".")[0]; - const bedrockMethod = - provider === "anthropic" || provider === "cohere" || provider === "meta" - ? "invoke-with-response-stream" - : "invoke"; - - const service = "bedrock-runtime"; - const endpointHost = - this.endpointHost ?? `${service}.${this.region}.amazonaws.com`; - - // Send request to AWS using the low-level fetch API - const response = await this._signedFetch(prompt, options, { - bedrockMethod, - endpointHost, - provider, - }); - - if (response.status < 200 || response.status >= 300) { - throw Error( - `Failed to access underlying url '${endpointHost}': got ${ - response.status - } ${response.statusText}: ${await response.text()}` - ); - } - - if ( - provider === "anthropic" || - provider === "cohere" || - provider === "meta" - ) { - const reader = response.body?.getReader(); - const decoder = new TextDecoder(); - for await (const chunk of this._readChunks(reader)) { - const event = this.codec.decode(chunk); - if ( - (event.headers[":event-type"] !== undefined && - event.headers[":event-type"].value !== "chunk") || - event.headers[":content-type"].value !== "application/json" - ) { - throw Error(`Failed to get event chunk: got ${chunk}`); - } - const body = JSON.parse(decoder.decode(event.body)); - if (body.message) { - throw new Error(body.message); - } - if (body.bytes !== undefined) { - const chunkResult = JSON.parse( - decoder.decode( - Uint8Array.from(atob(body.bytes), (m) => m.codePointAt(0) ?? 0) - ) - ); - const text = BedrockLLMInputOutputAdapter.prepareOutput( - provider, - chunkResult - ); - yield new GenerationChunk({ - text, - generationInfo: {}, - }); - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(text); - } - } - } else { - const json = await response.json(); - const text = BedrockLLMInputOutputAdapter.prepareOutput(provider, json); - yield new GenerationChunk({ - text, - generationInfo: {}, - }); - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(text); - } - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - _readChunks(reader: any) { - function _concatChunks(a: Uint8Array, b: Uint8Array) { - const newBuffer = new Uint8Array(a.length + b.length); - newBuffer.set(a); - newBuffer.set(b, a.length); - return newBuffer; - } - - function getMessageLength(buffer: Uint8Array) { - if (buffer.byteLength === 0) return 0; - const view = new DataView( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength - ); - - return view.getUint32(0, false); - } - - return { - async *[Symbol.asyncIterator]() { - let readResult = await reader.read(); - - let buffer: Uint8Array = new Uint8Array(0); - while (!readResult.done) { - const chunk: Uint8Array = readResult.value; - - buffer = _concatChunks(buffer, chunk); - let messageLength = getMessageLength(buffer); - - while (buffer.byteLength > 0 && buffer.byteLength >= messageLength) { - yield buffer.slice(0, messageLength); - buffer = buffer.slice(messageLength); - messageLength = getMessageLength(buffer); - } - - readResult = await reader.read(); - } - }, - }; - } -} +export * from "@langchain/community/llms/bedrock/web"; \ No newline at end of file diff --git a/langchain/src/llms/cloudflare_workersai.ts b/langchain/src/llms/cloudflare_workersai.ts index 25c6b5a16276..015cee52bb8e 100644 --- a/langchain/src/llms/cloudflare_workersai.ts +++ b/langchain/src/llms/cloudflare_workersai.ts @@ -1,189 +1 @@ -import { LLM, BaseLLMParams } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk } from "../schema/index.js"; -import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; - -/** - * Interface for CloudflareWorkersAI input parameters. - */ -export interface CloudflareWorkersAIInput { - cloudflareAccountId?: string; - cloudflareApiToken?: string; - model?: string; - baseUrl?: string; - streaming?: boolean; -} - -/** - * Class representing the CloudflareWorkersAI language model. It extends the LLM (Large - * Language Model) class, providing a standard interface for interacting - * with the CloudflareWorkersAI language model. - */ -export class CloudflareWorkersAI - extends LLM - implements CloudflareWorkersAIInput -{ - model = "@cf/meta/llama-2-7b-chat-int8"; - - cloudflareAccountId?: string; - - cloudflareApiToken?: string; - - baseUrl: string; - - streaming = false; - - static lc_name() { - return "CloudflareWorkersAI"; - } - - lc_serializable = true; - - constructor(fields?: CloudflareWorkersAIInput & BaseLLMParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - this.streaming = fields?.streaming ?? this.streaming; - this.cloudflareAccountId = - fields?.cloudflareAccountId ?? - getEnvironmentVariable("CLOUDFLARE_ACCOUNT_ID"); - this.cloudflareApiToken = - fields?.cloudflareApiToken ?? - getEnvironmentVariable("CLOUDFLARE_API_TOKEN"); - this.baseUrl = - fields?.baseUrl ?? - `https://api.cloudflare.com/client/v4/accounts/${this.cloudflareAccountId}/ai/run`; - if (this.baseUrl.endsWith("/")) { - this.baseUrl = this.baseUrl.slice(0, -1); - } - } - - /** - * Method to validate the environment. - */ - validateEnvironment() { - if (this.baseUrl === undefined) { - if (!this.cloudflareAccountId) { - throw new Error( - `No Cloudflare account ID found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_ACCOUNT_ID" in your environment variables.` - ); - } - if (!this.cloudflareApiToken) { - throw new Error( - `No Cloudflare API key found. Please provide it when instantiating the CloudflareWorkersAI class, or set it as "CLOUDFLARE_API_KEY" in your environment variables.` - ); - } - } - } - - /** Get the identifying parameters for this LLM. */ - get identifyingParams() { - return { model: this.model }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams() { - return { - model: this.model, - }; - } - - /** Get the type of LLM. */ - _llmType() { - return "cloudflare"; - } - - async _request( - prompt: string, - options: this["ParsedCallOptions"], - stream?: boolean - ) { - this.validateEnvironment(); - - const url = `${this.baseUrl}/${this.model}`; - const headers = { - Authorization: `Bearer ${this.cloudflareApiToken}`, - "Content-Type": "application/json", - }; - - const data = { prompt, stream }; - return this.caller.call(async () => { - const response = await fetch(url, { - method: "POST", - headers, - body: JSON.stringify(data), - signal: options.signal, - }); - if (!response.ok) { - const error = new Error( - `Cloudflare LLM call failed with status code ${response.status}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).response = response; - throw error; - } - return response; - }); - } - - async *_streamResponseChunks( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const response = await this._request(prompt, options, true); - if (!response.body) { - throw new Error("Empty response from Cloudflare. Please try again."); - } - const stream = convertEventStreamToIterableReadableDataStream( - response.body - ); - for await (const chunk of stream) { - if (chunk !== "[DONE]") { - const parsedChunk = JSON.parse(chunk); - const generationChunk = new GenerationChunk({ - text: parsedChunk.response, - }); - yield generationChunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(generationChunk.text ?? ""); - } - } - } - - /** Call out to CloudflareWorkersAI's complete endpoint. - Args: - prompt: The prompt to pass into the model. - Returns: - The string generated by the model. - Example: - let response = CloudflareWorkersAI.call("Tell me a joke."); - */ - async _call( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - if (!this.streaming) { - const response = await this._request(prompt, options); - - const responseData = await response.json(); - - return responseData.result.response; - } else { - const stream = this._streamResponseChunks(prompt, options, runManager); - let finalResult: GenerationChunk | undefined; - for await (const chunk of stream) { - if (finalResult === undefined) { - finalResult = chunk; - } else { - finalResult = finalResult.concat(chunk); - } - } - return finalResult?.text ?? ""; - } - } -} +export * from "@langchain/community/llms/cloudflare_workersai"; \ No newline at end of file diff --git a/langchain/src/llms/cohere.ts b/langchain/src/llms/cohere.ts index 393876efb939..0aa63c4ec6a4 100644 --- a/langchain/src/llms/cohere.ts +++ b/langchain/src/llms/cohere.ts @@ -1,129 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { LLM, BaseLLMParams } from "./base.js"; - -/** - * Interface for the input parameters specific to the Cohere model. - */ -export interface CohereInput extends BaseLLMParams { - /** Sampling temperature to use */ - temperature?: number; - - /** - * Maximum number of tokens to generate in the completion. - */ - maxTokens?: number; - - /** Model to use */ - model?: string; - - apiKey?: string; -} - -/** - * Class representing a Cohere Large Language Model (LLM). It interacts - * with the Cohere API to generate text completions. - * @example - * ```typescript - * const model = new Cohere({ - * temperature: 0.7, - * maxTokens: 20, - * maxRetries: 5, - * }); - * - * const res = await model.call( - * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" - * ); - * console.log({ res }); - * ``` - */ -export class Cohere extends LLM implements CohereInput { - static lc_name() { - return "Cohere"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "COHERE_API_KEY", - }; - } - - get lc_aliases(): { [key: string]: string } | undefined { - return { - apiKey: "cohere_api_key", - }; - } - - lc_serializable = true; - - temperature = 0; - - maxTokens = 250; - - model: string; - - apiKey: string; - - constructor(fields?: CohereInput) { - super(fields ?? {}); - - const apiKey = fields?.apiKey ?? getEnvironmentVariable("COHERE_API_KEY"); - - if (!apiKey) { - throw new Error( - "Please set the COHERE_API_KEY environment variable or pass it to the constructor as the apiKey field." - ); - } - - this.apiKey = apiKey; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.temperature = fields?.temperature ?? this.temperature; - this.model = fields?.model ?? this.model; - } - - _llmType() { - return "cohere"; - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const { cohere } = await Cohere.imports(); - - cohere.init(this.apiKey); - - // Hit the `generate` endpoint on the `large` model - const generateResponse = await this.caller.callWithOptions( - { signal: options.signal }, - cohere.generate.bind(cohere), - { - prompt, - model: this.model, - max_tokens: this.maxTokens, - temperature: this.temperature, - end_sequences: options.stop, - } - ); - try { - return generateResponse.body.generations[0].text; - } catch { - console.log(generateResponse); - throw new Error("Could not parse response."); - } - } - - /** @ignore */ - static async imports(): Promise<{ - cohere: typeof import("cohere-ai"); - }> { - try { - const { default: cohere } = await import("cohere-ai"); - return { cohere }; - } catch (e) { - throw new Error( - "Please install cohere-ai as a dependency with, e.g. `yarn add cohere-ai`" - ); - } - } -} +export * from "@langchain/community/llms/cohere"; \ No newline at end of file diff --git a/langchain/src/llms/fireworks.ts b/langchain/src/llms/fireworks.ts index 8dd5981aad69..b65fa31f7f93 100644 --- a/langchain/src/llms/fireworks.ts +++ b/langchain/src/llms/fireworks.ts @@ -1,140 +1 @@ -import type { OpenAI as OpenAIClient } from "openai"; - -import type { BaseLLMParams } from "./base.js"; -import type { OpenAICallOptions, OpenAIInput } from "./openai.js"; -import type { OpenAICoreRequestOptions } from "../types/openai-types.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { OpenAI } from "./openai.js"; - -type FireworksUnsupportedArgs = - | "frequencyPenalty" - | "presencePenalty" - | "bestOf" - | "logitBias"; - -type FireworksUnsupportedCallOptions = "functions" | "function_call" | "tools"; - -export type FireworksCallOptions = Partial< - Omit ->; - -/** - * Wrapper around Fireworks API for large language models - * - * Fireworks API is compatible to the OpenAI API with some limitations described in - * https://readme.fireworks.ai/docs/openai-compatibility. - * - * To use, you should have the `openai` package installed and - * the `FIREWORKS_API_KEY` environment variable set. - */ -export class Fireworks extends OpenAI { - static lc_name() { - return "Fireworks"; - } - - _llmType() { - return "fireworks"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - fireworksApiKey: "FIREWORKS_API_KEY", - }; - } - - lc_serializable = true; - - fireworksApiKey?: string; - - constructor( - fields?: Partial< - Omit - > & - BaseLLMParams & { fireworksApiKey?: string } - ) { - const fireworksApiKey = - fields?.fireworksApiKey || getEnvironmentVariable("FIREWORKS_API_KEY"); - - if (!fireworksApiKey) { - throw new Error( - `Fireworks API key not found. Please set the FIREWORKS_API_KEY environment variable or provide the key into "fireworksApiKey"` - ); - } - - super({ - ...fields, - openAIApiKey: fireworksApiKey, - modelName: fields?.modelName || "accounts/fireworks/models/llama-v2-13b", - configuration: { - baseURL: "https://api.fireworks.ai/inference/v1", - }, - }); - - this.fireworksApiKey = fireworksApiKey; - } - - toJSON() { - const result = super.toJSON(); - - if ( - "kwargs" in result && - typeof result.kwargs === "object" && - result.kwargs != null - ) { - delete result.kwargs.openai_api_key; - delete result.kwargs.configuration; - } - - return result; - } - - async completionWithRetry( - request: OpenAIClient.CompletionCreateParamsStreaming, - options?: OpenAICoreRequestOptions - ): Promise>; - - async completionWithRetry( - request: OpenAIClient.CompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise; - - /** - * Calls the Fireworks API with retry logic in case of failures. - * @param request The request to send to the Fireworks API. - * @param options Optional configuration for the API call. - * @returns The response from the Fireworks API. - */ - async completionWithRetry( - request: - | OpenAIClient.CompletionCreateParamsStreaming - | OpenAIClient.CompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise< - AsyncIterable | OpenAIClient.Completions.Completion - > { - // https://readme.fireworks.ai/docs/openai-compatibility#api-compatibility - if (Array.isArray(request.prompt)) { - if (request.prompt.length > 1) { - throw new Error("Multiple prompts are not supported by Fireworks"); - } - - const prompt = request.prompt[0]; - if (typeof prompt !== "string") { - throw new Error("Only string prompts are supported by Fireworks"); - } - - request.prompt = prompt; - } - - delete request.frequency_penalty; - delete request.presence_penalty; - delete request.best_of; - delete request.logit_bias; - - if (request.stream === true) { - return super.completionWithRetry(request, options); - } - - return super.completionWithRetry(request, options); - } -} +export * from "@langchain/community/llms/fireworks"; \ No newline at end of file diff --git a/langchain/src/llms/googlepalm.ts b/langchain/src/llms/googlepalm.ts index 2bd8d8d2fc49..44de2cf9d3f1 100644 --- a/langchain/src/llms/googlepalm.ts +++ b/langchain/src/llms/googlepalm.ts @@ -1,203 +1 @@ -import { TextServiceClient, protos } from "@google-ai/generativelanguage"; -import { GoogleAuth } from "google-auth-library"; -import { BaseLLMParams, LLM } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Input for Text generation for Google Palm - */ -export interface GooglePaLMTextInput extends BaseLLMParams { - /** - * Model Name to use - * - * Note: The format must follow the pattern - `models/{model}` - */ - modelName?: string; - - /** - * Controls the randomness of the output. - * - * Values can range from [0.0,1.0], inclusive. A value closer to 1.0 - * will produce responses that are more varied and creative, while - * a value closer to 0.0 will typically result in more straightforward - * responses from the model. - * - * Note: The default value varies by model - */ - temperature?: number; - - /** - * Maximum number of tokens to generate in the completion. - */ - maxOutputTokens?: number; - - /** - * Top-p changes how the model selects tokens for output. - * - * Tokens are selected from most probable to least until the sum - * of their probabilities equals the top-p value. - * - * For example, if tokens A, B, and C have a probability of - * .3, .2, and .1 and the top-p value is .5, then the model will - * select either A or B as the next token (using temperature). - * - * Note: The default value varies by model - */ - topP?: number; - - /** - * Top-k changes how the model selects tokens for output. - * - * A top-k of 1 means the selected token is the most probable among - * all tokens in the model’s vocabulary (also called greedy decoding), - * while a top-k of 3 means that the next token is selected from - * among the 3 most probable tokens (using temperature). - * - * Note: The default value varies by model - */ - topK?: number; - - /** - * The set of character sequences (up to 5) that will stop output generation. - * If specified, the API will stop at the first appearance of a stop - * sequence. - * - * Note: The stop sequence will not be included as part of the response. - */ - stopSequences?: string[]; - - /** - * A list of unique `SafetySetting` instances for blocking unsafe content. The API will block - * any prompts and responses that fail to meet the thresholds set by these settings. If there - * is no `SafetySetting` for a given `SafetyCategory` provided in the list, the API will use - * the default safety setting for that category. - */ - safetySettings?: protos.google.ai.generativelanguage.v1beta2.ISafetySetting[]; - - /** - * Google Palm API key to use - */ - apiKey?: string; -} - -/** - * Google Palm 2 Language Model Wrapper to generate texts - */ -export class GooglePaLM extends LLM implements GooglePaLMTextInput { - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "GOOGLE_PALM_API_KEY", - }; - } - - modelName = "models/text-bison-001"; - - temperature?: number; // default value chosen based on model - - maxOutputTokens?: number; // defaults to 64 - - topP?: number; // default value chosen based on model - - topK?: number; // default value chosen based on model - - stopSequences: string[] = []; - - safetySettings?: protos.google.ai.generativelanguage.v1beta2.ISafetySetting[]; // default safety setting for that category - - apiKey?: string; - - private client: TextServiceClient; - - constructor(fields?: GooglePaLMTextInput) { - super(fields ?? {}); - - this.modelName = fields?.modelName ?? this.modelName; - - this.temperature = fields?.temperature ?? this.temperature; - if (this.temperature && (this.temperature < 0 || this.temperature > 1)) { - throw new Error("`temperature` must be in the range of [0.0,1.0]"); - } - - this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; - if (this.maxOutputTokens && this.maxOutputTokens < 0) { - throw new Error("`maxOutputTokens` must be a positive integer"); - } - - this.topP = fields?.topP ?? this.topP; - if (this.topP && this.topP < 0) { - throw new Error("`topP` must be a positive integer"); - } - - if (this.topP && this.topP > 1) { - throw new Error("Google PaLM `topP` must in the range of [0,1]"); - } - - this.topK = fields?.topK ?? this.topK; - if (this.topK && this.topK < 0) { - throw new Error("`topK` must be a positive integer"); - } - - this.stopSequences = fields?.stopSequences ?? this.stopSequences; - - this.safetySettings = fields?.safetySettings ?? this.safetySettings; - if (this.safetySettings && this.safetySettings.length > 0) { - const safetySettingsSet = new Set( - this.safetySettings.map((s) => s.category) - ); - if (safetySettingsSet.size !== this.safetySettings.length) { - throw new Error( - "The categories in `safetySettings` array must be unique" - ); - } - } - - this.apiKey = - fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PALM_API_KEY"); - if (!this.apiKey) { - throw new Error( - "Please set an API key for Google Palm 2 in the environment variable GOOGLE_PALM_API_KEY or in the `apiKey` field of the GooglePalm constructor" - ); - } - - this.client = new TextServiceClient({ - authClient: new GoogleAuth().fromAPIKey(this.apiKey), - }); - } - - _llmType(): string { - return "googlepalm"; - } - - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const res = await this.caller.callWithOptions( - { signal: options.signal }, - this._generateText.bind(this), - prompt - ); - return res ?? ""; - } - - protected async _generateText( - prompt: string - ): Promise { - const res = await this.client.generateText({ - model: this.modelName, - temperature: this.temperature, - candidateCount: 1, - topK: this.topK, - topP: this.topP, - maxOutputTokens: this.maxOutputTokens, - stopSequences: this.stopSequences, - safetySettings: this.safetySettings, - prompt: { - text: prompt, - }, - }); - return res[0].candidates && res[0].candidates.length > 0 - ? res[0].candidates[0].output - : undefined; - } -} +export * from "@langchain/community/llms/googlepalm"; \ No newline at end of file diff --git a/langchain/src/llms/googlevertexai/common.ts b/langchain/src/llms/googlevertexai/common.ts deleted file mode 100644 index ff1663d7db77..000000000000 --- a/langchain/src/llms/googlevertexai/common.ts +++ /dev/null @@ -1,231 +0,0 @@ -import { BaseLLM } from "../base.js"; -import { Generation, GenerationChunk, LLMResult } from "../../schema/index.js"; -import { - GoogleVertexAILLMConnection, - GoogleVertexAIStream, - GoogleVertexAILLMResponse, -} from "../../util/googlevertexai-connection.js"; -import { - GoogleVertexAIBaseLLMInput, - GoogleVertexAIBasePrediction, - GoogleVertexAILLMPredictions, - GoogleVertexAIModelParams, -} from "../../types/googlevertexai-types.js"; -import { BaseLanguageModelCallOptions } from "../../base_language/index.js"; -import { CallbackManagerForLLMRun } from "../../callbacks/index.js"; - -/** - * Interface representing the instance of text input to the Google Vertex - * AI model. - */ -interface GoogleVertexAILLMTextInstance { - content: string; -} - -/** - * Interface representing the instance of code input to the Google Vertex - * AI model. - */ -interface GoogleVertexAILLMCodeInstance { - prefix: string; -} - -/** - * Type representing an instance of either text or code input to the - * Google Vertex AI model. - */ -type GoogleVertexAILLMInstance = - | GoogleVertexAILLMTextInstance - | GoogleVertexAILLMCodeInstance; - -/** - * Models the data returned from the API call - */ -interface TextPrediction extends GoogleVertexAIBasePrediction { - content: string; -} - -/** - * Base class for Google Vertex AI LLMs. - * Implemented subclasses must provide a GoogleVertexAILLMConnection - * with an appropriate auth client. - */ -export class BaseGoogleVertexAI - extends BaseLLM - implements GoogleVertexAIBaseLLMInput -{ - lc_serializable = true; - - model = "text-bison"; - - temperature = 0.7; - - maxOutputTokens = 1024; - - topP = 0.8; - - topK = 40; - - protected connection: GoogleVertexAILLMConnection< - BaseLanguageModelCallOptions, - GoogleVertexAILLMInstance, - TextPrediction, - AuthOptions - >; - - protected streamedConnection: GoogleVertexAILLMConnection< - BaseLanguageModelCallOptions, - GoogleVertexAILLMInstance, - TextPrediction, - AuthOptions - >; - - get lc_aliases(): Record { - return { - model: "model_name", - }; - } - - constructor(fields?: GoogleVertexAIBaseLLMInput) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - - // Change the defaults for code models - if (this.model.startsWith("code-gecko")) { - this.maxOutputTokens = 64; - } - if (this.model.startsWith("code-")) { - this.temperature = 0.2; - } - - this.temperature = fields?.temperature ?? this.temperature; - this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens; - this.topP = fields?.topP ?? this.topP; - this.topK = fields?.topK ?? this.topK; - } - - _llmType(): string { - return "vertexai"; - } - - async *_streamResponseChunks( - _input: string, - _options: this["ParsedCallOptions"], - _runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - // Make the call as a streaming request - const instance = this.formatInstance(_input); - const parameters = this.formatParameters(); - const result = await this.streamedConnection.request( - [instance], - parameters, - _options - ); - - // Get the streaming parser of the response - const stream = result.data as GoogleVertexAIStream; - - // Loop until the end of the stream - // During the loop, yield each time we get a chunk from the streaming parser - // that is either available or added to the queue - while (!stream.streamDone) { - const output = await stream.nextChunk(); - const chunk = - output !== null - ? new GenerationChunk( - this.extractGenerationFromPrediction(output.outputs[0]) - ) - : new GenerationChunk({ - text: "", - generationInfo: { finishReason: "stop" }, - }); - yield chunk; - } - } - - async _generate( - prompts: string[], - options: this["ParsedCallOptions"] - ): Promise { - const generations: Generation[][] = await Promise.all( - prompts.map((prompt) => this._generatePrompt(prompt, options)) - ); - return { generations }; - } - - async _generatePrompt( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const instance = this.formatInstance(prompt); - const parameters = this.formatParameters(); - const result = await this.connection.request( - [instance], - parameters, - options - ); - const prediction = this.extractPredictionFromResponse(result); - return [this.extractGenerationFromPrediction(prediction)]; - } - - /** - * Formats the input instance as a text instance for the Google Vertex AI - * model. - * @param prompt Prompt to be formatted as a text instance. - * @returns A GoogleVertexAILLMInstance object representing the formatted text instance. - */ - formatInstanceText(prompt: string): GoogleVertexAILLMInstance { - return { content: prompt }; - } - - /** - * Formats the input instance as a code instance for the Google Vertex AI - * model. - * @param prompt Prompt to be formatted as a code instance. - * @returns A GoogleVertexAILLMInstance object representing the formatted code instance. - */ - formatInstanceCode(prompt: string): GoogleVertexAILLMInstance { - return { prefix: prompt }; - } - - /** - * Formats the input instance for the Google Vertex AI model based on the - * model type (text or code). - * @param prompt Prompt to be formatted as an instance. - * @returns A GoogleVertexAILLMInstance object representing the formatted instance. - */ - formatInstance(prompt: string): GoogleVertexAILLMInstance { - return this.model.startsWith("code-") - ? this.formatInstanceCode(prompt) - : this.formatInstanceText(prompt); - } - - formatParameters(): GoogleVertexAIModelParams { - return { - temperature: this.temperature, - topK: this.topK, - topP: this.topP, - maxOutputTokens: this.maxOutputTokens, - }; - } - - /** - * Extracts the prediction from the API response. - * @param result The API response from which to extract the prediction. - * @returns A TextPrediction object representing the extracted prediction. - */ - extractPredictionFromResponse( - result: GoogleVertexAILLMResponse - ): TextPrediction { - return (result?.data as GoogleVertexAILLMPredictions) - ?.predictions[0]; - } - - extractGenerationFromPrediction(prediction: TextPrediction): Generation { - return { - text: prediction.content, - generationInfo: prediction, - }; - } -} diff --git a/langchain/src/llms/googlevertexai/index.ts b/langchain/src/llms/googlevertexai/index.ts index c3c7cbd6127a..c4e23fa6a7af 100644 --- a/langchain/src/llms/googlevertexai/index.ts +++ b/langchain/src/llms/googlevertexai/index.ts @@ -1,66 +1 @@ -import { GoogleAuthOptions } from "google-auth-library"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; -import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; -import { BaseGoogleVertexAI } from "./common.js"; -import { GAuthClient } from "../../util/googlevertexai-gauth.js"; - -/** - * Interface representing the input to the Google Vertex AI model. - */ -export interface GoogleVertexAITextInput - extends GoogleVertexAIBaseLLMInput {} - -/** - * Enables calls to the Google Cloud's Vertex AI API to access - * Large Language Models. - * - * To use, you will need to have one of the following authentication - * methods in place: - * - You are logged into an account permitted to the Google Cloud project - * using Vertex AI. - * - You are running this on a machine using a service account permitted to - * the Google Cloud project using Vertex AI. - * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the - * path of a credentials file for a service account permitted to the - * Google Cloud project using Vertex AI. - * @example - * ```typescript - * const model = new GoogleVertexAI({ - * temperature: 0.7, - * }); - * const stream = await model.stream( - * "What would be a good company name for a company that makes colorful socks?", - * ); - * for await (const chunk of stream) { - * console.log(chunk); - * } - * ``` - */ -export class GoogleVertexAI extends BaseGoogleVertexAI { - static lc_name() { - return "VertexAI"; - } - - constructor(fields?: GoogleVertexAITextInput) { - super(fields); - - const client = new GAuthClient({ - scopes: "https://www.googleapis.com/auth/cloud-platform", - ...fields?.authOptions, - }); - - this.connection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - false - ); - - this.streamedConnection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - true - ); - } -} +export * from "@langchain/community/llms/googlevertexai"; \ No newline at end of file diff --git a/langchain/src/llms/googlevertexai/web.ts b/langchain/src/llms/googlevertexai/web.ts index 0b656308d53b..30304d34c5c9 100644 --- a/langchain/src/llms/googlevertexai/web.ts +++ b/langchain/src/llms/googlevertexai/web.ts @@ -1,66 +1 @@ -import { - WebGoogleAuth, - WebGoogleAuthOptions, -} from "../../util/googlevertexai-webauth.js"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; -import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; -import { BaseGoogleVertexAI } from "./common.js"; - -/** - * Interface representing the input to the Google Vertex AI model. - */ -export interface GoogleVertexAITextInput - extends GoogleVertexAIBaseLLMInput {} - -/** - * Enables calls to the Google Cloud's Vertex AI API to access - * Large Language Models. - * - * This entrypoint and class are intended to be used in web environments like Edge - * functions where you do not have access to the file system. It supports passing - * service account credentials directly as a "GOOGLE_VERTEX_AI_WEB_CREDENTIALS" - * environment variable or directly as "authOptions.credentials". - * @example - * ```typescript - * const model = new GoogleVertexAI({ - * temperature: 0.7, - * }); - * const stream = await model.stream( - * "What would be a good company name for a company that makes colorful socks?", - * ); - * for await (const chunk of stream) { - * console.log(chunk); - * } - * ``` - */ -export class GoogleVertexAI extends BaseGoogleVertexAI { - static lc_name() { - return "VertexAI"; - } - - get lc_secrets(): { [key: string]: string } { - return { - "authOptions.credentials": "GOOGLE_VERTEX_AI_WEB_CREDENTIALS", - }; - } - - constructor(fields?: GoogleVertexAITextInput) { - super(fields); - - const client = new WebGoogleAuth(fields?.authOptions); - - this.connection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - false - ); - - this.streamedConnection = new GoogleVertexAILLMConnection( - { ...fields, ...this }, - this.caller, - client, - true - ); - } -} +export * from "@langchain/community/llms/googlevertexai/web"; \ No newline at end of file diff --git a/langchain/src/llms/gradient_ai.ts b/langchain/src/llms/gradient_ai.ts index 782bb8cd114e..c47dee6c5a30 100644 --- a/langchain/src/llms/gradient_ai.ts +++ b/langchain/src/llms/gradient_ai.ts @@ -1,136 +1 @@ -import { Gradient } from "@gradientai/nodejs-sdk"; -import { BaseLLMCallOptions, BaseLLMParams, LLM } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * The GradientLLMParams interface defines the input parameters for - * the GradientLLM class. - */ -export interface GradientLLMParams extends BaseLLMParams { - /** - * Gradient AI Access Token. - * Provide Access Token if you do not wish to automatically pull from env. - */ - gradientAccessKey?: string; - /** - * Gradient Workspace Id. - * Provide workspace id if you do not wish to automatically pull from env. - */ - workspaceId?: string; - /** - * Parameters accepted by the Gradient npm package. - */ - inferenceParameters?: Record; - /** - * Gradient AI Model Slug. - */ - modelSlug?: string; - /** - * Gradient Adapter ID for custom fine tuned models. - */ - adapterId?: string; -} - -/** - * The GradientLLM class is used to interact with Gradient AI inference Endpoint models. - * This requires your Gradient AI Access Token which is autoloaded if not specified. - */ -export class GradientLLM extends LLM { - static lc_name() { - return "GradientLLM"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - gradientAccessKey: "GRADIENT_ACCESS_TOKEN", - workspaceId: "GRADIENT_WORKSPACE_ID", - }; - } - - modelSlug = "llama2-7b-chat"; - - adapterId?: string; - - gradientAccessKey?: string; - - workspaceId?: string; - - inferenceParameters?: Record; - - // Gradient AI does not export the BaseModel type. Once it does, we can use it here. - // eslint-disable-next-line @typescript-eslint/no-explicit-any - model: any; - - constructor(fields: GradientLLMParams) { - super(fields); - - this.modelSlug = fields?.modelSlug ?? this.modelSlug; - this.adapterId = fields?.adapterId; - this.gradientAccessKey = - fields?.gradientAccessKey ?? - getEnvironmentVariable("GRADIENT_ACCESS_TOKEN"); - this.workspaceId = - fields?.workspaceId ?? getEnvironmentVariable("GRADIENT_WORKSPACE_ID"); - - this.inferenceParameters = fields.inferenceParameters; - - if (!this.gradientAccessKey) { - throw new Error("Missing Gradient AI Access Token"); - } - - if (!this.workspaceId) { - throw new Error("Missing Gradient AI Workspace ID"); - } - } - - _llmType() { - return "gradient_ai"; - } - - /** - * Calls the Gradient AI endpoint and retrieves the result. - * @param {string} prompt The input prompt. - * @returns {Promise} A promise that resolves to the generated string. - */ - /** @ignore */ - async _call( - prompt: string, - _options: this["ParsedCallOptions"] - ): Promise { - await this.setModel(); - - // GradientLLM does not export the CompleteResponse type. Once it does, we can use it here. - interface CompleteResponse { - finishReason: string; - generatedOutput: string; - } - - const response = (await this.caller.call(async () => - this.model.complete({ - query: prompt, - ...this.inferenceParameters, - }) - )) as CompleteResponse; - - return response.generatedOutput; - } - - async setModel() { - if (this.model) return; - - const gradient = new Gradient({ - accessToken: this.gradientAccessKey, - workspaceId: this.workspaceId, - }); - - if (this.adapterId) { - this.model = await gradient.getModelAdapter({ - modelAdapterId: this.adapterId, - }); - } else { - this.model = await gradient.getBaseModel({ - baseModelSlug: this.modelSlug, - }); - } - } -} +export * from "@langchain/community/llms/gradient_ai"; \ No newline at end of file diff --git a/langchain/src/llms/hf.ts b/langchain/src/llms/hf.ts index e849a7acb37e..b0dd59b3a5d6 100644 --- a/langchain/src/llms/hf.ts +++ b/langchain/src/llms/hf.ts @@ -1,155 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { LLM, BaseLLMParams } from "./base.js"; - -/** - * Interface defining the parameters for configuring the Hugging Face - * model for text generation. - */ -export interface HFInput { - /** Model to use */ - model: string; - - /** Custom inference endpoint URL to use */ - endpointUrl?: string; - - /** Sampling temperature to use */ - temperature?: number; - - /** - * Maximum number of tokens to generate in the completion. - */ - maxTokens?: number; - - /** Total probability mass of tokens to consider at each step */ - topP?: number; - - /** Integer to define the top tokens considered within the sample operation to create new text. */ - topK?: number; - - /** Penalizes repeated tokens according to frequency */ - frequencyPenalty?: number; - - /** API key to use. */ - apiKey?: string; - - /** - * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. - */ - includeCredentials?: string | boolean; -} - -/** - * Class implementing the Large Language Model (LLM) interface using the - * Hugging Face Inference API for text generation. - * @example - * ```typescript - * const model = new HuggingFaceInference({ - * model: "gpt2", - * temperature: 0.7, - * maxTokens: 50, - * }); - * - * const res = await model.call( - * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" - * ); - * console.log({ res }); - * ``` - */ -export class HuggingFaceInference extends LLM implements HFInput { - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "HUGGINGFACEHUB_API_KEY", - }; - } - - model = "gpt2"; - - temperature: number | undefined = undefined; - - maxTokens: number | undefined = undefined; - - topP: number | undefined = undefined; - - topK: number | undefined = undefined; - - frequencyPenalty: number | undefined = undefined; - - apiKey: string | undefined = undefined; - - endpointUrl: string | undefined = undefined; - - includeCredentials: string | boolean | undefined = undefined; - - constructor(fields?: Partial & BaseLLMParams) { - super(fields ?? {}); - - this.model = fields?.model ?? this.model; - this.temperature = fields?.temperature ?? this.temperature; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.topP = fields?.topP ?? this.topP; - this.topK = fields?.topK ?? this.topK; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.apiKey = - fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY"); - this.endpointUrl = fields?.endpointUrl; - this.includeCredentials = fields?.includeCredentials; - - if (!this.apiKey) { - throw new Error( - "Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor." - ); - } - } - - _llmType() { - return "hf"; - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const { HfInference } = await HuggingFaceInference.imports(); - const hf = this.endpointUrl - ? new HfInference(this.apiKey, { - includeCredentials: this.includeCredentials, - }).endpoint(this.endpointUrl) - : new HfInference(this.apiKey, { - includeCredentials: this.includeCredentials, - }); - - const res = await this.caller.callWithOptions( - { signal: options.signal }, - hf.textGeneration.bind(hf), - { - model: this.model, - parameters: { - // make it behave similar to openai, returning only the generated text - return_full_text: false, - temperature: this.temperature, - max_new_tokens: this.maxTokens, - top_p: this.topP, - top_k: this.topK, - repetition_penalty: this.frequencyPenalty, - }, - inputs: prompt, - } - ); - return res.generated_text; - } - - /** @ignore */ - static async imports(): Promise<{ - HfInference: typeof import("@huggingface/inference").HfInference; - }> { - try { - const { HfInference } = await import("@huggingface/inference"); - return { HfInference }; - } catch (e) { - throw new Error( - "Please install huggingface as a dependency with, e.g. `yarn add @huggingface/inference`" - ); - } - } -} +export * from "@langchain/community/llms/hf"; \ No newline at end of file diff --git a/langchain/src/llms/llama_cpp.ts b/langchain/src/llms/llama_cpp.ts index f2d6518ffd2b..08380bd67f4d 100644 --- a/langchain/src/llms/llama_cpp.ts +++ b/langchain/src/llms/llama_cpp.ts @@ -1,116 +1 @@ -import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"; -import { - LlamaBaseCppInputs, - createLlamaModel, - createLlamaContext, - createLlamaSession, -} from "../util/llama_cpp.js"; -import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk } from "../schema/index.js"; - -/** - * Note that the modelPath is the only required parameter. For testing you - * can set this in the environment variable `LLAMA_PATH`. - */ -export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {} - -export interface LlamaCppCallOptions extends BaseLLMCallOptions { - /** The maximum number of tokens the response should contain. */ - maxTokens?: number; - /** A function called when matching the provided token array */ - onToken?: (tokens: number[]) => void; -} - -/** - * To use this model you need to have the `node-llama-cpp` module installed. - * This can be installed using `npm install -S node-llama-cpp` and the minimum - * version supported in version 2.0.0. - * This also requires that have a locally built version of Llama2 installed. - */ -export class LlamaCpp extends LLM { - declare CallOptions: LlamaCppCallOptions; - - static inputs: LlamaCppInputs; - - maxTokens?: number; - - temperature?: number; - - topK?: number; - - topP?: number; - - trimWhitespaceSuffix?: boolean; - - _model: LlamaModel; - - _context: LlamaContext; - - _session: LlamaChatSession; - - static lc_name() { - return "LlamaCpp"; - } - - constructor(inputs: LlamaCppInputs) { - super(inputs); - this.maxTokens = inputs?.maxTokens; - this.temperature = inputs?.temperature; - this.topK = inputs?.topK; - this.topP = inputs?.topP; - this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix; - this._model = createLlamaModel(inputs); - this._context = createLlamaContext(this._model, inputs); - this._session = createLlamaSession(this._context); - } - - _llmType() { - return "llama2_cpp"; - } - - /** @ignore */ - async _call( - prompt: string, - options?: this["ParsedCallOptions"] - ): Promise { - try { - const promptOptions = { - onToken: options?.onToken, - maxTokens: this?.maxTokens, - temperature: this?.temperature, - topK: this?.topK, - topP: this?.topP, - trimWhitespaceSuffix: this?.trimWhitespaceSuffix, - }; - const completion = await this._session.prompt(prompt, promptOptions); - return completion; - } catch (e) { - throw new Error("Error getting prompt completion."); - } - } - - async *_streamResponseChunks( - prompt: string, - _options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const promptOptions = { - temperature: this?.temperature, - topK: this?.topK, - topP: this?.topP, - }; - - const stream = await this.caller.call(async () => - this._context.evaluate(this._context.encode(prompt), promptOptions) - ); - - for await (const chunk of stream) { - yield new GenerationChunk({ - text: this._context.decode([chunk]), - generationInfo: {}, - }); - await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? ""); - } - } -} +export * from "@langchain/community/llms/llama_cpp"; \ No newline at end of file diff --git a/langchain/src/llms/ollama.ts b/langchain/src/llms/ollama.ts index 8b5c178cfb1f..b636acd69fa3 100644 --- a/langchain/src/llms/ollama.ts +++ b/langchain/src/llms/ollama.ts @@ -1,245 +1 @@ -import { LLM, BaseLLMParams } from "./base.js"; -import { - createOllamaStream, - OllamaInput, - OllamaCallOptions, -} from "../util/ollama.js"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk } from "../schema/index.js"; -import type { StringWithAutocomplete } from "../util/types.js"; - -/** - * Class that represents the Ollama language model. It extends the base - * LLM class and implements the OllamaInput interface. - * @example - * ```typescript - * const ollama = new Ollama({ - * baseUrl: "http://api.example.com", - * model: "llama2", - * }); - * - * // Streaming translation from English to German - * const stream = await ollama.stream( - * `Translate "I love programming" into German.` - * ); - * - * const chunks = []; - * for await (const chunk of stream) { - * chunks.push(chunk); - * } - * - * console.log(chunks.join("")); - * ``` - */ -export class Ollama extends LLM implements OllamaInput { - static lc_name() { - return "Ollama"; - } - - lc_serializable = true; - - model = "llama2"; - - baseUrl = "http://localhost:11434"; - - embeddingOnly?: boolean; - - f16KV?: boolean; - - frequencyPenalty?: number; - - logitsAll?: boolean; - - lowVram?: boolean; - - mainGpu?: number; - - mirostat?: number; - - mirostatEta?: number; - - mirostatTau?: number; - - numBatch?: number; - - numCtx?: number; - - numGpu?: number; - - numGqa?: number; - - numKeep?: number; - - numThread?: number; - - penalizeNewline?: boolean; - - presencePenalty?: number; - - repeatLastN?: number; - - repeatPenalty?: number; - - ropeFrequencyBase?: number; - - ropeFrequencyScale?: number; - - temperature?: number; - - stop?: string[]; - - tfsZ?: number; - - topK?: number; - - topP?: number; - - typicalP?: number; - - useMLock?: boolean; - - useMMap?: boolean; - - vocabOnly?: boolean; - - format?: StringWithAutocomplete<"json">; - - constructor(fields: OllamaInput & BaseLLMParams) { - super(fields); - this.model = fields.model ?? this.model; - this.baseUrl = fields.baseUrl?.endsWith("/") - ? fields.baseUrl.slice(0, -1) - : fields.baseUrl ?? this.baseUrl; - - this.embeddingOnly = fields.embeddingOnly; - this.f16KV = fields.f16KV; - this.frequencyPenalty = fields.frequencyPenalty; - this.logitsAll = fields.logitsAll; - this.lowVram = fields.lowVram; - this.mainGpu = fields.mainGpu; - this.mirostat = fields.mirostat; - this.mirostatEta = fields.mirostatEta; - this.mirostatTau = fields.mirostatTau; - this.numBatch = fields.numBatch; - this.numCtx = fields.numCtx; - this.numGpu = fields.numGpu; - this.numGqa = fields.numGqa; - this.numKeep = fields.numKeep; - this.numThread = fields.numThread; - this.penalizeNewline = fields.penalizeNewline; - this.presencePenalty = fields.presencePenalty; - this.repeatLastN = fields.repeatLastN; - this.repeatPenalty = fields.repeatPenalty; - this.ropeFrequencyBase = fields.ropeFrequencyBase; - this.ropeFrequencyScale = fields.ropeFrequencyScale; - this.temperature = fields.temperature; - this.stop = fields.stop; - this.tfsZ = fields.tfsZ; - this.topK = fields.topK; - this.topP = fields.topP; - this.typicalP = fields.typicalP; - this.useMLock = fields.useMLock; - this.useMMap = fields.useMMap; - this.vocabOnly = fields.vocabOnly; - this.format = fields.format; - } - - _llmType() { - return "ollama"; - } - - invocationParams(options?: this["ParsedCallOptions"]) { - return { - model: this.model, - format: this.format, - options: { - embedding_only: this.embeddingOnly, - f16_kv: this.f16KV, - frequency_penalty: this.frequencyPenalty, - logits_all: this.logitsAll, - low_vram: this.lowVram, - main_gpu: this.mainGpu, - mirostat: this.mirostat, - mirostat_eta: this.mirostatEta, - mirostat_tau: this.mirostatTau, - num_batch: this.numBatch, - num_ctx: this.numCtx, - num_gpu: this.numGpu, - num_gqa: this.numGqa, - num_keep: this.numKeep, - num_thread: this.numThread, - penalize_newline: this.penalizeNewline, - presence_penalty: this.presencePenalty, - repeat_last_n: this.repeatLastN, - repeat_penalty: this.repeatPenalty, - rope_frequency_base: this.ropeFrequencyBase, - rope_frequency_scale: this.ropeFrequencyScale, - temperature: this.temperature, - stop: options?.stop ?? this.stop, - tfs_z: this.tfsZ, - top_k: this.topK, - top_p: this.topP, - typical_p: this.typicalP, - use_mlock: this.useMLock, - use_mmap: this.useMMap, - vocab_only: this.vocabOnly, - }, - }; - } - - async *_streamResponseChunks( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const stream = await this.caller.call(async () => - createOllamaStream( - this.baseUrl, - { ...this.invocationParams(options), prompt }, - options - ) - ); - for await (const chunk of stream) { - if (!chunk.done) { - yield new GenerationChunk({ - text: chunk.response, - generationInfo: { - ...chunk, - response: undefined, - }, - }); - await runManager?.handleLLMNewToken(chunk.response ?? ""); - } else { - yield new GenerationChunk({ - text: "", - generationInfo: { - model: chunk.model, - total_duration: chunk.total_duration, - load_duration: chunk.load_duration, - prompt_eval_count: chunk.prompt_eval_count, - prompt_eval_duration: chunk.prompt_eval_duration, - eval_count: chunk.eval_count, - eval_duration: chunk.eval_duration, - }, - }); - } - } - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const chunks = []; - for await (const chunk of this._streamResponseChunks( - prompt, - options, - runManager - )) { - chunks.push(chunk.text); - } - return chunks.join(""); - } -} +export * from "@langchain/community/llms/ollama"; \ No newline at end of file diff --git a/langchain/src/llms/portkey.ts b/langchain/src/llms/portkey.ts index c6a2df313826..f915efb8c94a 100644 --- a/langchain/src/llms/portkey.ts +++ b/langchain/src/llms/portkey.ts @@ -1,179 +1 @@ -import _ from "lodash"; -import { LLMOptions, Portkey as _Portkey } from "portkey-ai"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk, LLMResult } from "../schema/index.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { BaseLLM } from "./base.js"; - -interface PortkeyOptions { - apiKey?: string; - baseURL?: string; - mode?: string; - llms?: [LLMOptions] | null; -} - -const readEnv = (env: string, default_val?: string): string | undefined => - getEnvironmentVariable(env) ?? default_val; - -export class PortkeySession { - portkey: _Portkey; - - constructor(options: PortkeyOptions = {}) { - if (!options.apiKey) { - /* eslint-disable no-param-reassign */ - options.apiKey = readEnv("PORTKEY_API_KEY"); - } - - if (!options.baseURL) { - /* eslint-disable no-param-reassign */ - options.baseURL = readEnv("PORTKEY_BASE_URL", "https://api.portkey.ai"); - } - - this.portkey = new _Portkey({}); - this.portkey.llms = [{}]; - if (!options.apiKey) { - throw new Error("Set Portkey ApiKey in PORTKEY_API_KEY env variable"); - } - - this.portkey = new _Portkey(options); - } -} - -const defaultPortkeySession: { - session: PortkeySession; - options: PortkeyOptions; -}[] = []; - -/** - * Get a session for the Portkey API. If one already exists with the same options, - * it will be returned. Otherwise, a new session will be created. - * @param options - * @returns - */ -export function getPortkeySession(options: PortkeyOptions = {}) { - let session = defaultPortkeySession.find((session) => - _.isEqual(session.options, options) - )?.session; - - if (!session) { - session = new PortkeySession(options); - defaultPortkeySession.push({ session, options }); - } - return session; -} - -/** - * @example - * ```typescript - * const model = new Portkey({ - * mode: "single", - * llms: [ - * { - * provider: "openai", - * virtual_key: "open-ai-key-1234", - * model: "text-davinci-003", - * max_tokens: 2000, - * }, - * ], - * }); - * - * // Stream the output of the model and process it - * const res = await model.stream( - * "Question: Write a story about a king\nAnswer:" - * ); - * for await (const i of res) { - * process.stdout.write(i); - * } - * ``` - */ -export class Portkey extends BaseLLM { - apiKey?: string = undefined; - - baseURL?: string = undefined; - - mode?: string = undefined; - - llms?: [LLMOptions] | null = undefined; - - session: PortkeySession; - - constructor(init?: Partial) { - super(init ?? {}); - this.apiKey = init?.apiKey; - - this.baseURL = init?.baseURL; - - this.mode = init?.mode; - - this.llms = init?.llms; - - this.session = getPortkeySession({ - apiKey: this.apiKey, - baseURL: this.baseURL, - llms: this.llms, - mode: this.mode, - }); - } - - _llmType() { - return "portkey"; - } - - async _generate( - prompts: string[], - options: this["ParsedCallOptions"], - _?: CallbackManagerForLLMRun - ): Promise { - const choices = []; - for (let i = 0; i < prompts.length; i += 1) { - const response = await this.session.portkey.completions.create({ - prompt: prompts[i], - ...options, - stream: false, - }); - choices.push(response.choices); - } - const generations = choices.map((promptChoices) => - promptChoices.map((choice) => ({ - text: choice.text ?? "", - generationInfo: { - finishReason: choice.finish_reason, - logprobs: choice.logprobs, - }, - })) - ); - - return { - generations, - }; - } - - async *_streamResponseChunks( - input: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const response = await this.session.portkey.completions.create({ - prompt: input, - ...options, - stream: true, - }); - for await (const data of response) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - const chunk = new GenerationChunk({ - text: choice.text ?? "", - generationInfo: { - finishReason: choice.finish_reason, - }, - }); - yield chunk; - void runManager?.handleLLMNewToken(chunk.text ?? ""); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } -} +export * from "@langchain/community/llms/portkey"; \ No newline at end of file diff --git a/langchain/src/llms/raycast.ts b/langchain/src/llms/raycast.ts index 7901f0fde238..df36240c49a0 100644 --- a/langchain/src/llms/raycast.ts +++ b/langchain/src/llms/raycast.ts @@ -1,99 +1 @@ -import { AI, environment } from "@raycast/api"; -import { LLM, BaseLLMParams } from "./base.js"; - -/** - * The input parameters for the RaycastAI class, which extends the BaseLLMParams interface. - */ -export interface RaycastAIInput extends BaseLLMParams { - model?: AI.Model; - creativity?: number; - rateLimitPerMinute?: number; -} - -const wait = (ms: number) => - new Promise((resolve) => { - setTimeout(resolve, ms); - }); - -/** - * The RaycastAI class, which extends the LLM class and implements the RaycastAIInput interface. - */ -export class RaycastAI extends LLM implements RaycastAIInput { - /** - * The model to use for generating text. - */ - model: AI.Model; - - /** - * The creativity parameter, also known as the "temperature". - */ - creativity: number; - - /** - * The rate limit for API calls, in requests per minute. - */ - rateLimitPerMinute: number; - - /** - * The timestamp of the last API call, used to enforce the rate limit. - */ - private lastCallTimestamp = 0; - - /** - * Creates a new instance of the RaycastAI class. - * @param {RaycastAIInput} fields The input parameters for the RaycastAI class. - * @throws {Error} If the Raycast AI environment is not accessible. - */ - constructor(fields: RaycastAIInput) { - super(fields ?? {}); - - if (!environment.canAccess(AI)) { - throw new Error("Raycast AI environment is not accessible."); - } - - this.model = fields.model ?? "text-davinci-003"; - this.creativity = fields.creativity ?? 0.5; - this.rateLimitPerMinute = fields.rateLimitPerMinute ?? 10; - } - - /** - * Returns the type of the LLM, which is "raycast_ai". - * @return {string} The type of the LLM. - * @ignore - */ - _llmType() { - return "raycast_ai"; - } - - /** - * Calls AI.ask with the given prompt and returns the generated text. - * @param {string} prompt The prompt to generate text from. - * @return {Promise} A Promise that resolves to the generated text. - * @ignore - */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const response = await this.caller.call(async () => { - // Rate limit calls to Raycast AI - const now = Date.now(); - const timeSinceLastCall = now - this.lastCallTimestamp; - const timeToWait = - (60 / this.rateLimitPerMinute) * 1000 - timeSinceLastCall; - - if (timeToWait > 0) { - await wait(timeToWait); - } - - return await AI.ask(prompt, { - model: this.model, - creativity: this.creativity, - signal: options.signal, - }); - }); - - // Since Raycast AI returns the response directly, no need for output transformation - return response; - } -} +export * from "@langchain/community/llms/raycast"; \ No newline at end of file diff --git a/langchain/src/llms/replicate.ts b/langchain/src/llms/replicate.ts index 27fa66c5eb7c..a88ed8e5889c 100644 --- a/langchain/src/llms/replicate.ts +++ b/langchain/src/llms/replicate.ts @@ -1,158 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { LLM, BaseLLMParams } from "./base.js"; - -/** - * Interface defining the structure of the input data for the Replicate - * class. It includes details about the model to be used, any additional - * input parameters, and the API key for the Replicate service. - */ -export interface ReplicateInput { - // owner/model_name:version - model: `${string}/${string}:${string}`; - - input?: { - // different models accept different inputs - [key: string]: string | number | boolean; - }; - - apiKey?: string; - - /** The key used to pass prompts to the model. */ - promptKey?: string; -} - -/** - * Class responsible for managing the interaction with the Replicate API. - * It handles the API key and model details, makes the actual API calls, - * and converts the API response into a format usable by the rest of the - * LangChain framework. - * @example - * ```typescript - * const model = new Replicate({ - * model: "replicate/flan-t5-xl:3ae0799123a1fe11f8c89fd99632f843fc5f7a761630160521c4253149754523", - * }); - * - * const res = await model.call( - * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" - * ); - * console.log({ res }); - * ``` - */ -export class Replicate extends LLM implements ReplicateInput { - static lc_name() { - return "Replicate"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "REPLICATE_API_TOKEN", - }; - } - - lc_serializable = true; - - model: ReplicateInput["model"]; - - input: ReplicateInput["input"]; - - apiKey: string; - - promptKey?: string; - - constructor(fields: ReplicateInput & BaseLLMParams) { - super(fields); - - const apiKey = - fields?.apiKey ?? - getEnvironmentVariable("REPLICATE_API_KEY") ?? // previous environment variable for backwards compatibility - getEnvironmentVariable("REPLICATE_API_TOKEN"); // current environment variable, matching the Python library - - if (!apiKey) { - throw new Error( - "Please set the REPLICATE_API_TOKEN environment variable" - ); - } - - this.apiKey = apiKey; - this.model = fields.model; - this.input = fields.input ?? {}; - this.promptKey = fields.promptKey; - } - - _llmType() { - return "replicate"; - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const imports = await Replicate.imports(); - - const replicate = new imports.Replicate({ - userAgent: "langchain", - auth: this.apiKey, - }); - - if (this.promptKey === undefined) { - const [modelString, versionString] = this.model.split(":"); - const version = await replicate.models.versions.get( - modelString.split("/")[0], - modelString.split("/")[1], - versionString - ); - const openapiSchema = version.openapi_schema; - const inputProperties: { "x-order": number | undefined }[] = - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (openapiSchema as any)?.components?.schemas?.Input?.properties; - if (inputProperties === undefined) { - this.promptKey = "prompt"; - } else { - const sortedInputProperties = Object.entries(inputProperties).sort( - ([_keyA, valueA], [_keyB, valueB]) => { - const orderA = valueA["x-order"] || 0; - const orderB = valueB["x-order"] || 0; - return orderA - orderB; - } - ); - this.promptKey = sortedInputProperties[0][0] ?? "prompt"; - } - } - const output = await this.caller.callWithOptions( - { signal: options.signal }, - () => - replicate.run(this.model, { - input: { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - [this.promptKey!]: prompt, - ...this.input, - }, - }) - ); - - if (typeof output === "string") { - return output; - } else if (Array.isArray(output)) { - return output.join(""); - } else { - // Note this is a little odd, but the output format is not consistent - // across models, so it makes some amount of sense. - return String(output); - } - } - - /** @ignore */ - static async imports(): Promise<{ - Replicate: typeof import("replicate").default; - }> { - try { - const { default: Replicate } = await import("replicate"); - return { Replicate }; - } catch (e) { - throw new Error( - "Please install replicate as a dependency with, e.g. `yarn add replicate`" - ); - } - } -} +export * from "@langchain/community/llms/replicate"; \ No newline at end of file diff --git a/langchain/src/llms/sagemaker_endpoint.ts b/langchain/src/llms/sagemaker_endpoint.ts index 38706d608417..6de4672bc0cd 100644 --- a/langchain/src/llms/sagemaker_endpoint.ts +++ b/langchain/src/llms/sagemaker_endpoint.ts @@ -1,283 +1 @@ -import { - InvokeEndpointCommand, - InvokeEndpointWithResponseStreamCommand, - SageMakerRuntimeClient, - SageMakerRuntimeClientConfig, -} from "@aws-sdk/client-sagemaker-runtime"; -import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk } from "../schema/index.js"; -import { BaseLLMCallOptions, BaseLLMParams, LLM } from "./base.js"; - -/** - * A handler class to transform input from LLM to a format that SageMaker - * endpoint expects. Similarily, the class also handles transforming output from - * the SageMaker endpoint to a format that LLM class expects. - * - * Example: - * ``` - * class ContentHandler implements ContentHandlerBase { - * contentType = "application/json" - * accepts = "application/json" - * - * transformInput(prompt: string, modelKwargs: Record) { - * const inputString = JSON.stringify({ - * prompt, - * ...modelKwargs - * }) - * return Buffer.from(inputString) - * } - * - * transformOutput(output: Uint8Array) { - * const responseJson = JSON.parse(Buffer.from(output).toString("utf-8")) - * return responseJson[0].generated_text - * } - * - * } - * ``` - */ -export abstract class BaseSageMakerContentHandler { - contentType = "text/plain"; - - accepts = "text/plain"; - - /** - * Transforms the prompt and model arguments into a specific format for sending to SageMaker. - * @param {InputType} prompt The prompt to be transformed. - * @param {Record} modelKwargs Additional arguments. - * @returns {Promise} A promise that resolves to the formatted data for sending. - */ - abstract transformInput( - prompt: InputType, - modelKwargs: Record - ): Promise; - - /** - * Transforms SageMaker output into a desired format. - * @param {Uint8Array} output The raw output from SageMaker. - * @returns {Promise} A promise that resolves to the transformed data. - */ - abstract transformOutput(output: Uint8Array): Promise; -} - -export type SageMakerLLMContentHandler = BaseSageMakerContentHandler< - string, - string ->; - -/** - * The SageMakerEndpointInput interface defines the input parameters for - * the SageMakerEndpoint class, which includes the endpoint name, client - * options for the SageMaker client, the content handler, and optional - * keyword arguments for the model and the endpoint. - */ -export interface SageMakerEndpointInput extends BaseLLMParams { - /** - * The name of the endpoint from the deployed SageMaker model. Must be unique - * within an AWS Region. - */ - endpointName: string; - /** - * Options passed to the SageMaker client. - */ - clientOptions: SageMakerRuntimeClientConfig; - /** - * Key word arguments to pass to the model. - */ - modelKwargs?: Record; - /** - * Optional attributes passed to the InvokeEndpointCommand - */ - endpointKwargs?: Record; - /** - * The content handler class that provides an input and output transform - * functions to handle formats between LLM and the endpoint. - */ - contentHandler: SageMakerLLMContentHandler; - streaming?: boolean; -} - -/** - * The SageMakerEndpoint class is used to interact with SageMaker - * Inference Endpoint models. It uses the AWS client for authentication, - * which automatically loads credentials. - * If a specific credential profile is to be used, the name of the profile - * from the ~/.aws/credentials file must be passed. The credentials or - * roles used should have the required policies to access the SageMaker - * endpoint. - */ -export class SageMakerEndpoint extends LLM { - static lc_name() { - return "SageMakerEndpoint"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - "clientOptions.credentials.accessKeyId": "AWS_ACCESS_KEY_ID", - "clientOptions.credentials.secretAccessKey": "AWS_SECRET_ACCESS_KEY", - "clientOptions.credentials.sessionToken": "AWS_SESSION_TOKEN", - }; - } - - endpointName: string; - - modelKwargs?: Record; - - endpointKwargs?: Record; - - client: SageMakerRuntimeClient; - - contentHandler: SageMakerLLMContentHandler; - - streaming: boolean; - - constructor(fields: SageMakerEndpointInput) { - super(fields); - - if (!fields.clientOptions.region) { - throw new Error( - `Please pass a "clientOptions" object with a "region" field to the constructor` - ); - } - - const endpointName = fields?.endpointName; - if (!endpointName) { - throw new Error(`Please pass an "endpointName" field to the constructor`); - } - - const contentHandler = fields?.contentHandler; - if (!contentHandler) { - throw new Error( - `Please pass a "contentHandler" field to the constructor` - ); - } - - this.endpointName = fields.endpointName; - this.contentHandler = fields.contentHandler; - this.endpointKwargs = fields.endpointKwargs; - this.modelKwargs = fields.modelKwargs; - this.streaming = fields.streaming ?? false; - this.client = new SageMakerRuntimeClient(fields.clientOptions); - } - - _llmType() { - return "sagemaker_endpoint"; - } - - /** - * Calls the SageMaker endpoint and retrieves the result. - * @param {string} prompt The input prompt. - * @param {this["ParsedCallOptions"]} options Parsed call options. - * @param {CallbackManagerForLLMRun} runManager Optional run manager. - * @returns {Promise} A promise that resolves to the generated string. - */ - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - return this.streaming - ? await this.streamingCall(prompt, options, runManager) - : await this.noStreamingCall(prompt, options); - } - - private async streamingCall( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const chunks = []; - for await (const chunk of this._streamResponseChunks( - prompt, - options, - runManager - )) { - chunks.push(chunk.text); - } - return chunks.join(""); - } - - private async noStreamingCall( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const body = await this.contentHandler.transformInput( - prompt, - this.modelKwargs ?? {} - ); - const { contentType, accepts } = this.contentHandler; - - const response = await this.caller.call(() => - this.client.send( - new InvokeEndpointCommand({ - EndpointName: this.endpointName, - Body: body, - ContentType: contentType, - Accept: accepts, - ...this.endpointKwargs, - }), - { abortSignal: options.signal } - ) - ); - - if (response.Body === undefined) { - throw new Error("Inference result missing Body"); - } - return this.contentHandler.transformOutput(response.Body); - } - - /** - * Streams response chunks from the SageMaker endpoint. - * @param {string} prompt The input prompt. - * @param {this["ParsedCallOptions"]} options Parsed call options. - * @returns {AsyncGenerator} An asynchronous generator yielding generation chunks. - */ - async *_streamResponseChunks( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const body = await this.contentHandler.transformInput( - prompt, - this.modelKwargs ?? {} - ); - const { contentType, accepts } = this.contentHandler; - - const stream = await this.caller.call(() => - this.client.send( - new InvokeEndpointWithResponseStreamCommand({ - EndpointName: this.endpointName, - Body: body, - ContentType: contentType, - Accept: accepts, - ...this.endpointKwargs, - }), - { abortSignal: options.signal } - ) - ); - - if (!stream.Body) { - throw new Error("Inference result missing Body"); - } - - for await (const chunk of stream.Body) { - if (chunk.PayloadPart && chunk.PayloadPart.Bytes) { - const text = await this.contentHandler.transformOutput( - chunk.PayloadPart.Bytes - ); - yield new GenerationChunk({ - text, - generationInfo: { - ...chunk, - response: undefined, - }, - }); - await runManager?.handleLLMNewToken(text); - } else if (chunk.InternalStreamFailure) { - throw new Error(chunk.InternalStreamFailure.message); - } else if (chunk.ModelStreamError) { - throw new Error(chunk.ModelStreamError.message); - } - } - } -} +export * from "@langchain/community/llms/sagemaker_endpoint"; \ No newline at end of file diff --git a/langchain/src/llms/watsonx_ai.ts b/langchain/src/llms/watsonx_ai.ts index dca510ba21c1..ccda3df79f20 100644 --- a/langchain/src/llms/watsonx_ai.ts +++ b/langchain/src/llms/watsonx_ai.ts @@ -1,194 +1 @@ -import { BaseLLMCallOptions, BaseLLMParams, LLM } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * The WatsonxAIParams interface defines the input parameters for - * the WatsonxAI class. - */ -export interface WatsonxAIParams extends BaseLLMParams { - /** - * WatsonX AI Complete Endpoint. - * Can be used if you want a fully custom endpoint. - */ - endpoint?: string; - /** - * IBM Cloud Compute Region. - * eg. us-south, us-east, etc. - */ - region?: string; - /** - * WatsonX AI Version. - * Date representing the WatsonX AI Version. - * eg. 2023-05-29 - */ - version?: string; - /** - * WatsonX AI Key. - * Provide API Key if you do not wish to automatically pull from env. - */ - ibmCloudApiKey?: string; - /** - * WatsonX AI Key. - * Provide API Key if you do not wish to automatically pull from env. - */ - projectId?: string; - /** - * Parameters accepted by the WatsonX AI Endpoint. - */ - modelParameters?: Record; - /** - * WatsonX AI Model ID. - */ - modelId?: string; -} - -const endpointConstructor = (region: string, version: string) => - `https://${region}.ml.cloud.ibm.com/ml/v1-beta/generation/text?version=${version}`; - -/** - * The WatsonxAI class is used to interact with Watsonx AI - * Inference Endpoint models. It uses IBM Cloud for authentication. - * This requires your IBM Cloud API Key which is autoloaded if not specified. - */ - -export class WatsonxAI extends LLM { - static lc_name() { - return "WatsonxAI"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - ibmCloudApiKey: "IBM_CLOUD_API_KEY", - projectId: "WATSONX_PROJECT_ID", - }; - } - - endpoint: string; - - region = "us-south"; - - version = "2023-05-29"; - - modelId = "meta-llama/llama-2-70b-chat"; - - modelKwargs?: Record; - - ibmCloudApiKey?: string; - - ibmCloudToken?: string; - - ibmCloudTokenExpiresAt?: number; - - projectId?: string; - - modelParameters?: Record; - - constructor(fields: WatsonxAIParams) { - super(fields); - - this.region = fields?.region ?? this.region; - this.version = fields?.version ?? this.version; - this.modelId = fields?.modelId ?? this.modelId; - this.ibmCloudApiKey = - fields?.ibmCloudApiKey ?? getEnvironmentVariable("IBM_CLOUD_API_KEY"); - this.projectId = - fields?.projectId ?? getEnvironmentVariable("WATSONX_PROJECT_ID"); - - this.endpoint = - fields?.endpoint ?? endpointConstructor(this.region, this.version); - this.modelParameters = fields.modelParameters; - - if (!this.ibmCloudApiKey) { - throw new Error("Missing IBM Cloud API Key"); - } - - if (!this.projectId) { - throw new Error("Missing WatsonX AI Project ID"); - } - } - - _llmType() { - return "watsonx_ai"; - } - - /** - * Calls the WatsonX AI endpoint and retrieves the result. - * @param {string} prompt The input prompt. - * @returns {Promise} A promise that resolves to the generated string. - */ - /** @ignore */ - async _call( - prompt: string, - _options: this["ParsedCallOptions"] - ): Promise { - interface WatsonxAIResponse { - results: { - generated_text: string; - generated_token_count: number; - input_token_count: number; - }[]; - errors: { - code: string; - message: string; - }[]; - } - const response = (await this.caller.call(async () => - fetch(this.endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - Authorization: `Bearer ${await this.generateToken()}`, - }, - body: JSON.stringify({ - project_id: this.projectId, - model_id: this.modelId, - input: prompt, - parameters: this.modelParameters, - }), - }).then((res) => res.json()) - )) as WatsonxAIResponse; - - /** - * Handle Errors for invalid requests. - */ - if (response.errors) { - throw new Error(response.errors[0].message); - } - - return response.results[0].generated_text; - } - - async generateToken(): Promise { - if (this.ibmCloudToken && this.ibmCloudTokenExpiresAt) { - if (this.ibmCloudTokenExpiresAt > Date.now()) { - return this.ibmCloudToken; - } - } - - interface TokenResponse { - access_token: string; - expiration: number; - } - - const urlTokenParams = new URLSearchParams(); - urlTokenParams.append( - "grant_type", - "urn:ibm:params:oauth:grant-type:apikey" - ); - urlTokenParams.append("apikey", this.ibmCloudApiKey as string); - - const data = (await fetch("https://iam.cloud.ibm.com/identity/token", { - method: "POST", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - }, - body: urlTokenParams, - }).then((res) => res.json())) as TokenResponse; - - this.ibmCloudTokenExpiresAt = data.expiration * 1000; - this.ibmCloudToken = data.access_token; - - return this.ibmCloudToken; - } -} +export * from "@langchain/community/llms/watsonx_ai"; \ No newline at end of file diff --git a/langchain/src/llms/writer.ts b/langchain/src/llms/writer.ts index 323167d41bdc..202b24f2783a 100644 --- a/langchain/src/llms/writer.ts +++ b/langchain/src/llms/writer.ts @@ -1,172 +1 @@ -import { Writer as WriterClient } from "@writerai/writer-sdk"; - -import { BaseLLMParams, LLM } from "./base.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface for the input parameters specific to the Writer model. - */ -export interface WriterInput extends BaseLLMParams { - /** Writer API key */ - apiKey?: string; - - /** Writer organization ID */ - orgId?: string | number; - - /** Model to use */ - model?: string; - - /** Sampling temperature to use */ - temperature?: number; - - /** Minimum number of tokens to generate. */ - minTokens?: number; - - /** Maximum number of tokens to generate in the completion. */ - maxTokens?: number; - - /** Generates this many completions server-side and returns the "best"." */ - bestOf?: number; - - /** Penalizes repeated tokens according to frequency. */ - frequencyPenalty?: number; - - /** Whether to return log probabilities. */ - logprobs?: number; - - /** Number of completions to generate. */ - n?: number; - - /** Penalizes repeated tokens regardless of frequency. */ - presencePenalty?: number; - - /** Total probability mass of tokens to consider at each step. */ - topP?: number; -} - -/** - * Class representing a Writer Large Language Model (LLM). It interacts - * with the Writer API to generate text completions. - */ -export class Writer extends LLM implements WriterInput { - static lc_name() { - return "Writer"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "WRITER_API_KEY", - orgId: "WRITER_ORG_ID", - }; - } - - get lc_aliases(): { [key: string]: string } | undefined { - return { - apiKey: "writer_api_key", - orgId: "writer_org_id", - }; - } - - lc_serializable = true; - - apiKey: string; - - orgId: number; - - model = "palmyra-instruct"; - - temperature?: number; - - minTokens?: number; - - maxTokens?: number; - - bestOf?: number; - - frequencyPenalty?: number; - - logprobs?: number; - - n?: number; - - presencePenalty?: number; - - topP?: number; - - constructor(fields?: WriterInput) { - super(fields ?? {}); - - const apiKey = fields?.apiKey ?? getEnvironmentVariable("WRITER_API_KEY"); - const orgId = fields?.orgId ?? getEnvironmentVariable("WRITER_ORG_ID"); - - if (!apiKey) { - throw new Error( - "Please set the WRITER_API_KEY environment variable or pass it to the constructor as the apiKey field." - ); - } - - if (!orgId) { - throw new Error( - "Please set the WRITER_ORG_ID environment variable or pass it to the constructor as the orgId field." - ); - } - - this.apiKey = apiKey; - this.orgId = typeof orgId === "string" ? parseInt(orgId, 10) : orgId; - this.model = fields?.model ?? this.model; - this.temperature = fields?.temperature ?? this.temperature; - this.minTokens = fields?.minTokens ?? this.minTokens; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.bestOf = fields?.bestOf ?? this.bestOf; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.logprobs = fields?.logprobs ?? this.logprobs; - this.n = fields?.n ?? this.n; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.topP = fields?.topP ?? this.topP; - } - - _llmType() { - return "writer"; - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - const sdk = new WriterClient({ - security: { - apiKey: this.apiKey, - }, - organizationId: this.orgId, - }); - - return this.caller.callWithOptions({ signal: options.signal }, async () => { - try { - const res = await sdk.completions.create({ - completionRequest: { - prompt, - stop: options.stop, - temperature: this.temperature, - minTokens: this.minTokens, - maxTokens: this.maxTokens, - bestOf: this.bestOf, - n: this.n, - frequencyPenalty: this.frequencyPenalty, - logprobs: this.logprobs, - presencePenalty: this.presencePenalty, - topP: this.topP, - }, - modelId: this.model, - }); - return ( - res.completionResponse?.choices?.[0].text ?? "No completion found." - ); - } catch (e) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (e as any).response = (e as any).rawResponse; - throw e; - } - }); - } -} +export * from "@langchain/community/llms/writer"; \ No newline at end of file diff --git a/langchain/src/llms/yandex.ts b/langchain/src/llms/yandex.ts index 96b70e7ced55..495c0dcf55a3 100644 --- a/langchain/src/llms/yandex.ts +++ b/langchain/src/llms/yandex.ts @@ -1,123 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { LLM, BaseLLMParams } from "./base.js"; - -const apiUrl = "https://llm.api.cloud.yandex.net/llm/v1alpha/instruct"; - -export interface YandexGPTInputs extends BaseLLMParams { - /** - * What sampling temperature to use. - * Should be a double number between 0 (inclusive) and 1 (inclusive). - */ - temperature?: number; - - /** - * Maximum limit on the total number of tokens - * used for both the input prompt and the generated response. - */ - maxTokens?: number; - - /** Model name to use. */ - model?: string; - - /** - * Yandex Cloud Api Key for service account - * with the `ai.languageModels.user` role. - */ - apiKey?: string; - - /** - * Yandex Cloud IAM token for service account - * with the `ai.languageModels.user` role. - */ - iamToken?: string; -} - -export class YandexGPT extends LLM implements YandexGPTInputs { - static lc_name() { - return "Yandex GPT"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "YC_API_KEY", - iamToken: "YC_IAM_TOKEN", - }; - } - - temperature = 0.6; - - maxTokens = 1700; - - model = "general"; - - apiKey?: string; - - iamToken?: string; - - constructor(fields?: YandexGPTInputs) { - super(fields ?? {}); - - const apiKey = fields?.apiKey ?? getEnvironmentVariable("YC_API_KEY"); - - const iamToken = fields?.iamToken ?? getEnvironmentVariable("YC_IAM_TOKEN"); - - if (apiKey === undefined && iamToken === undefined) { - throw new Error( - "Please set the YC_API_KEY or YC_IAM_TOKEN environment variable or pass it to the constructor as the apiKey or iamToken field." - ); - } - - this.apiKey = apiKey; - this.iamToken = iamToken; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.temperature = fields?.temperature ?? this.temperature; - this.model = fields?.model ?? this.model; - } - - _llmType() { - return "yandexgpt"; - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"] - ): Promise { - // Hit the `generate` endpoint on the `large` model - return this.caller.callWithOptions({ signal: options.signal }, async () => { - const headers = { "Content-Type": "application/json", Authorization: "" }; - if (this.apiKey !== undefined) { - headers.Authorization = `Api-Key ${this.apiKey}`; - } else { - headers.Authorization = `Bearer ${this.iamToken}`; - } - const bodyData = { - model: this.model, - generationOptions: { - temperature: this.temperature, - maxTokens: this.maxTokens, - }, - - requestText: prompt, - }; - - try { - const response = await fetch(apiUrl, { - method: "POST", - headers, - body: JSON.stringify(bodyData), - }); - if (!response.ok) { - throw new Error( - `Failed to fetch ${apiUrl} from YandexGPT: ${response.status}` - ); - } - - const responseData = await response.json(); - return responseData.result.alternatives[0].text; - } catch (error) { - throw new Error(`Failed to fetch ${apiUrl} from YandexGPT ${error}`); - } - }); - } -} +export * from "@langchain/community/llms/yandex"; \ No newline at end of file diff --git a/langchain/src/retrievers/amazon_kendra.ts b/langchain/src/retrievers/amazon_kendra.ts index fb2ba2123b4f..234e3e745f47 100644 --- a/langchain/src/retrievers/amazon_kendra.ts +++ b/langchain/src/retrievers/amazon_kendra.ts @@ -1,317 +1 @@ -import { - AttributeFilter, - DocumentAttribute, - DocumentAttributeValue, - KendraClient, - KendraClientConfig, - QueryCommand, - QueryCommandOutput, - QueryResultItem, - RetrieveCommand, - RetrieveCommandOutput, - RetrieveResultItem, -} from "@aws-sdk/client-kendra"; - -import { BaseRetriever } from "../schema/retriever.js"; -import { Document } from "../document.js"; - -/** - * Interface for the arguments required to initialize an - * AmazonKendraRetriever instance. - */ -export interface AmazonKendraRetrieverArgs { - indexId: string; - topK: number; - region: string; - attributeFilter?: AttributeFilter; - clientOptions?: KendraClientConfig; -} - -/** - * Class for interacting with Amazon Kendra, an intelligent search service - * provided by AWS. Extends the BaseRetriever class. - * @example - * ```typescript - * const retriever = new AmazonKendraRetriever({ - * topK: 10, - * indexId: "YOUR_INDEX_ID", - * region: "us-east-2", - * clientOptions: { - * credentials: { - * accessKeyId: "YOUR_ACCESS_KEY_ID", - * secretAccessKey: "YOUR_SECRET_ACCESS_KEY", - * }, - * }, - * }); - * - * const docs = await retriever.getRelevantDocuments("How are clouds formed?"); - * ``` - */ -export class AmazonKendraRetriever extends BaseRetriever { - static lc_name() { - return "AmazonKendraRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "amazon_kendra"]; - - indexId: string; - - topK: number; - - kendraClient: KendraClient; - - attributeFilter?: AttributeFilter; - - constructor({ - indexId, - topK = 10, - clientOptions, - attributeFilter, - region, - }: AmazonKendraRetrieverArgs) { - super(); - - if (!region) { - throw new Error("Please pass regionName field to the constructor!"); - } - - if (!indexId) { - throw new Error("Please pass Kendra Index Id to the constructor"); - } - - this.topK = topK; - this.kendraClient = new KendraClient({ - region, - ...clientOptions, - }); - this.attributeFilter = attributeFilter; - this.indexId = indexId; - } - - // A method to combine title and excerpt into a single string. - /** - * Combines title and excerpt into a single string. - * @param title The title of the document. - * @param excerpt An excerpt from the document. - * @returns A single string combining the title and excerpt. - */ - combineText(title?: string, excerpt?: string): string { - let text = ""; - if (title) { - text += `Document Title: ${title}\n`; - } - if (excerpt) { - text += `Document Excerpt: \n${excerpt}\n`; - } - return text; - } - - // A method to clean the result text by replacing sequences of whitespace with a single space and removing ellipses. - /** - * Cleans the result text by replacing sequences of whitespace with a - * single space and removing ellipses. - * @param resText The result text to clean. - * @returns The cleaned result text. - */ - cleanResult(resText: string) { - const res = resText.replace(/\s+/g, " ").replace(/\.\.\./g, ""); - return res; - } - - // A method to extract the attribute value from a DocumentAttributeValue object. - /** - * Extracts the attribute value from a DocumentAttributeValue object. - * @param value The DocumentAttributeValue object to extract the value from. - * @returns The extracted attribute value. - */ - getDocAttributeValue(value: DocumentAttributeValue) { - if (value.DateValue) { - return value.DateValue; - } - if (value.LongValue) { - return value.LongValue; - } - if (value.StringListValue) { - return value.StringListValue; - } - if (value.StringValue) { - return value.StringValue; - } - return ""; - } - - // A method to extract the attribute key-value pairs from an array of DocumentAttribute objects. - /** - * Extracts the attribute key-value pairs from an array of - * DocumentAttribute objects. - * @param documentAttributes The array of DocumentAttribute objects to extract the key-value pairs from. - * @returns An object containing the extracted attribute key-value pairs. - */ - getDocAttributes(documentAttributes?: DocumentAttribute[]): { - [key: string]: unknown; - } { - const attributes: { [key: string]: unknown } = {}; - if (documentAttributes) { - for (const attr of documentAttributes) { - if (attr.Key && attr.Value) { - attributes[attr.Key] = this.getDocAttributeValue(attr.Value); - } - } - } - return attributes; - } - - // A method to convert a RetrieveResultItem object into a Document object. - /** - * Converts a RetrieveResultItem object into a Document object. - * @param item The RetrieveResultItem object to convert. - * @returns A Document object. - */ - convertRetrieverItem(item: RetrieveResultItem) { - const title = item.DocumentTitle || ""; - const excerpt = item.Content ? this.cleanResult(item.Content) : ""; - const pageContent = this.combineText(title, excerpt); - const source = item.DocumentURI; - const attributes = this.getDocAttributes(item.DocumentAttributes); - const metadata = { - source, - title, - excerpt, - document_attributes: attributes, - }; - - return new Document({ pageContent, metadata }); - } - - // A method to extract the top-k documents from a RetrieveCommandOutput object. - /** - * Extracts the top-k documents from a RetrieveCommandOutput object. - * @param response The RetrieveCommandOutput object to extract the documents from. - * @param pageSize The number of documents to extract. - * @returns An array of Document objects. - */ - getRetrieverDocs( - response: RetrieveCommandOutput, - pageSize: number - ): Document[] { - if (!response.ResultItems) return []; - const { length } = response.ResultItems; - const count = length < pageSize ? length : pageSize; - - return response.ResultItems.slice(0, count).map((item) => - this.convertRetrieverItem(item) - ); - } - - // A method to extract the excerpt text from a QueryResultItem object. - /** - * Extracts the excerpt text from a QueryResultItem object. - * @param item The QueryResultItem object to extract the excerpt text from. - * @returns The extracted excerpt text. - */ - getQueryItemExcerpt(item: QueryResultItem) { - if ( - item.AdditionalAttributes && - item.AdditionalAttributes.length && - item.AdditionalAttributes[0].Key === "AnswerText" - ) { - if (!item.AdditionalAttributes) { - return ""; - } - if (!item.AdditionalAttributes[0]) { - return ""; - } - - return this.cleanResult( - item.AdditionalAttributes[0].Value?.TextWithHighlightsValue?.Text || "" - ); - } else if (item.DocumentExcerpt) { - return this.cleanResult(item.DocumentExcerpt.Text || ""); - } else { - return ""; - } - } - - // A method to convert a QueryResultItem object into a Document object. - /** - * Converts a QueryResultItem object into a Document object. - * @param item The QueryResultItem object to convert. - * @returns A Document object. - */ - convertQueryItem(item: QueryResultItem) { - const title = item.DocumentTitle?.Text || ""; - const excerpt = this.getQueryItemExcerpt(item); - const pageContent = this.combineText(title, excerpt); - const source = item.DocumentURI; - const attributes = this.getDocAttributes(item.DocumentAttributes); - const metadata = { - source, - title, - excerpt, - document_attributes: attributes, - }; - - return new Document({ pageContent, metadata }); - } - - // A method to extract the top-k documents from a QueryCommandOutput object. - /** - * Extracts the top-k documents from a QueryCommandOutput object. - * @param response The QueryCommandOutput object to extract the documents from. - * @param pageSize The number of documents to extract. - * @returns An array of Document objects. - */ - getQueryDocs(response: QueryCommandOutput, pageSize: number) { - if (!response.ResultItems) return []; - const { length } = response.ResultItems; - const count = length < pageSize ? length : pageSize; - return response.ResultItems.slice(0, count).map((item) => - this.convertQueryItem(item) - ); - } - - // A method to send a retrieve or query request to Kendra and return the top-k documents. - /** - * Sends a retrieve or query request to Kendra and returns the top-k - * documents. - * @param query The query to send to Kendra. - * @param topK The number of top documents to return. - * @param attributeFilter Optional filter to apply when retrieving documents. - * @returns A Promise that resolves to an array of Document objects. - */ - async queryKendra( - query: string, - topK: number, - attributeFilter?: AttributeFilter - ) { - const retrieveCommand = new RetrieveCommand({ - IndexId: this.indexId, - QueryText: query, - PageSize: topK, - AttributeFilter: attributeFilter, - }); - - const retrieveResponse = await this.kendraClient.send(retrieveCommand); - const retriveLength = retrieveResponse.ResultItems?.length; - - if (retriveLength === 0) { - // Retrieve API returned 0 results, call query API - const queryCommand = new QueryCommand({ - IndexId: this.indexId, - QueryText: query, - PageSize: topK, - AttributeFilter: attributeFilter, - }); - - const queryResponse = await this.kendraClient.send(queryCommand); - return this.getQueryDocs(queryResponse, this.topK); - } else { - return this.getRetrieverDocs(retrieveResponse, this.topK); - } - } - - async _getRelevantDocuments(query: string): Promise { - const docs = await this.queryKendra(query, this.topK, this.attributeFilter); - return docs; - } -} +export * from "@langchain/community/retrievers/amazon_kendra"; \ No newline at end of file diff --git a/langchain/src/retrievers/chaindesk.ts b/langchain/src/retrievers/chaindesk.ts index 26175ede4b98..c62294c251df 100644 --- a/langchain/src/retrievers/chaindesk.ts +++ b/langchain/src/retrievers/chaindesk.ts @@ -1,97 +1 @@ -import { BaseRetriever, type BaseRetrieverInput } from "../schema/retriever.js"; -import { Document } from "../document.js"; -import { AsyncCaller, type AsyncCallerParams } from "../util/async_caller.js"; - -export interface ChaindeskRetrieverArgs - extends AsyncCallerParams, - BaseRetrieverInput { - datastoreId: string; - topK?: number; - filter?: Record; - apiKey?: string; -} - -interface Berry { - text: string; - score: number; - source?: string; - [key: string]: unknown; -} - -/** - * @example - * ```typescript - * const retriever = new ChaindeskRetriever({ - * datastoreId: "DATASTORE_ID", - * apiKey: "CHAINDESK_API_KEY", - * topK: 8, - * }); - * const docs = await retriever.getRelevantDocuments("hello"); - * ``` - */ -export class ChaindeskRetriever extends BaseRetriever { - static lc_name() { - return "ChaindeskRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "chaindesk"]; - - caller: AsyncCaller; - - datastoreId: string; - - topK?: number; - - filter?: Record; - - apiKey?: string; - - constructor({ - datastoreId, - apiKey, - topK, - filter, - ...rest - }: ChaindeskRetrieverArgs) { - super(); - - this.caller = new AsyncCaller(rest); - this.datastoreId = datastoreId; - this.apiKey = apiKey; - this.topK = topK; - this.filter = filter; - } - - async getRelevantDocuments(query: string): Promise { - const r = await this.caller.call( - fetch, - `https://app.chaindesk.ai/api/datastores/${this.datastoreId}/query`, - { - method: "POST", - body: JSON.stringify({ - query, - ...(this.topK ? { topK: this.topK } : {}), - ...(this.filter ? { filters: this.filter } : {}), - }), - headers: { - "Content-Type": "application/json", - ...(this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}), - }, - } - ); - - const { results } = (await r.json()) as { results: Berry[] }; - - return results.map( - ({ text, score, source, ...rest }) => - new Document({ - pageContent: text, - metadata: { - score, - source, - ...rest, - }, - }) - ); - } -} +export * from "@langchain/community/retrievers/chaindesk"; \ No newline at end of file diff --git a/langchain/src/retrievers/databerry.ts b/langchain/src/retrievers/databerry.ts index 3a8358d5b82a..49932d11a3dd 100644 --- a/langchain/src/retrievers/databerry.ts +++ b/langchain/src/retrievers/databerry.ts @@ -1,94 +1 @@ -import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js"; -import { Document } from "../document.js"; -import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; - -/** - * Interface for the arguments required to create a new instance of - * DataberryRetriever. - */ -export interface DataberryRetrieverArgs - extends AsyncCallerParams, - BaseRetrieverInput { - datastoreUrl: string; - topK?: number; - apiKey?: string; -} - -/** - * Interface for the structure of a Berry object returned by the Databerry - * API. - */ -interface Berry { - text: string; - score: number; - source?: string; - [key: string]: unknown; -} - -/** - * A specific implementation of a document retriever for the Databerry - * API. It extends the BaseRetriever class, which is an abstract base - * class for a document retrieval system in LangChain. - */ -/** @deprecated Use "langchain/retrievers/chaindesk" instead */ -export class DataberryRetriever extends BaseRetriever { - static lc_name() { - return "DataberryRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "databerry"]; - - get lc_secrets() { - return { apiKey: "DATABERRY_API_KEY" }; - } - - get lc_aliases() { - return { apiKey: "api_key" }; - } - - caller: AsyncCaller; - - datastoreUrl: string; - - topK?: number; - - apiKey?: string; - - constructor(fields: DataberryRetrieverArgs) { - super(fields); - const { datastoreUrl, apiKey, topK, ...rest } = fields; - - this.caller = new AsyncCaller(rest); - this.datastoreUrl = datastoreUrl; - this.apiKey = apiKey; - this.topK = topK; - } - - async _getRelevantDocuments(query: string): Promise { - const r = await this.caller.call(fetch, this.datastoreUrl, { - method: "POST", - body: JSON.stringify({ - query, - ...(this.topK ? { topK: this.topK } : {}), - }), - headers: { - "Content-Type": "application/json", - ...(this.apiKey ? { Authorization: `Bearer ${this.apiKey}` } : {}), - }, - }); - - const { results } = (await r.json()) as { results: Berry[] }; - - return results.map( - ({ text, score, source, ...rest }) => - new Document({ - pageContent: text, - metadata: { - score, - source, - ...rest, - }, - }) - ); - } -} +export * from "@langchain/community/retrievers/databerry"; \ No newline at end of file diff --git a/langchain/src/retrievers/metal.ts b/langchain/src/retrievers/metal.ts index 2632e03826eb..92a8510d11b0 100644 --- a/langchain/src/retrievers/metal.ts +++ b/langchain/src/retrievers/metal.ts @@ -1,70 +1 @@ -import Metal from "@getmetal/metal-sdk"; - -import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js"; -import { Document } from "../document.js"; - -/** - * Interface for the fields required during the initialization of a - * `MetalRetriever` instance. It extends the `BaseRetrieverInput` - * interface and adds a `client` field of type `Metal`. - */ -export interface MetalRetrieverFields extends BaseRetrieverInput { - client: Metal; -} - -/** - * Interface to represent a response item from the Metal service. It - * contains a `text` field and an index signature to allow for additional - * unknown properties. - */ -interface ResponseItem { - text: string; - [key: string]: unknown; -} - -/** - * Class used to interact with the Metal service, a managed retrieval & - * memory platform. It allows you to index your data into Metal and run - * semantic search and retrieval on it. It extends the `BaseRetriever` - * class and requires a `Metal` instance and a dictionary of parameters to - * pass to the Metal API during its initialization. - * @example - * ```typescript - * const retriever = new MetalRetriever({ - * client: new Metal( - * process.env.METAL_API_KEY, - * process.env.METAL_CLIENT_ID, - * process.env.METAL_INDEX_ID, - * ), - * }); - * const docs = await retriever.getRelevantDocuments("hello"); - * ``` - */ -export class MetalRetriever extends BaseRetriever { - static lc_name() { - return "MetalRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "metal"]; - - private client: Metal; - - constructor(fields: MetalRetrieverFields) { - super(fields); - - this.client = fields.client; - } - - async _getRelevantDocuments(query: string): Promise { - const res = await this.client.search({ text: query }); - - const items = ("data" in res ? res.data : res) as ResponseItem[]; - return items.map( - ({ text, metadata }) => - new Document({ - pageContent: text, - metadata: metadata as Record, - }) - ); - } -} +export * from "@langchain/community/retrievers/metal"; \ No newline at end of file diff --git a/langchain/src/retrievers/supabase.ts b/langchain/src/retrievers/supabase.ts index ec906c42a8d7..9c18d35bf9ff 100644 --- a/langchain/src/retrievers/supabase.ts +++ b/langchain/src/retrievers/supabase.ts @@ -1,238 +1 @@ -import type { SupabaseClient } from "@supabase/supabase-js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js"; -import { - CallbackManagerForRetrieverRun, - Callbacks, -} from "../callbacks/manager.js"; - -interface SearchEmbeddingsParams { - query_embedding: number[]; - match_count: number; // int - filter?: Record; // jsonb -} - -interface SearchKeywordParams { - query_text: string; - match_count: number; // int -} - -interface SearchResponseRow { - id: number; - content: string; - metadata: object; - similarity: number; -} - -type SearchResult = [Document, number, number]; - -export interface SupabaseLibArgs extends BaseRetrieverInput { - client: SupabaseClient; - /** - * The table name on Supabase. Defaults to "documents". - */ - tableName?: string; - /** - * The name of the Similarity search function on Supabase. Defaults to "match_documents". - */ - similarityQueryName?: string; - /** - * The name of the Keyword search function on Supabase. Defaults to "kw_match_documents". - */ - keywordQueryName?: string; - /** - * The number of documents to return from the similarity search. Defaults to 2. - */ - similarityK?: number; - /** - * The number of documents to return from the keyword search. Defaults to 2. - */ - keywordK?: number; -} - -export interface SupabaseHybridSearchParams { - query: string; - similarityK: number; - keywordK: number; -} - -/** - * Class for performing hybrid search operations on a Supabase database. - * It extends the `BaseRetriever` class and implements methods for - * similarity search, keyword search, and hybrid search. - */ -export class SupabaseHybridSearch extends BaseRetriever { - static lc_name() { - return "SupabaseHybridSearch"; - } - - lc_namespace = ["langchain", "retrievers", "supabase"]; - - similarityK: number; - - query: string; - - keywordK: number; - - similarityQueryName: string; - - client: SupabaseClient; - - tableName: string; - - keywordQueryName: string; - - embeddings: Embeddings; - - constructor(embeddings: Embeddings, args: SupabaseLibArgs) { - super(args); - this.embeddings = embeddings; - this.client = args.client; - this.tableName = args.tableName || "documents"; - this.similarityQueryName = args.similarityQueryName || "match_documents"; - this.keywordQueryName = args.keywordQueryName || "kw_match_documents"; - this.similarityK = args.similarityK || 2; - this.keywordK = args.keywordK || 2; - } - - /** - * Performs a similarity search on the Supabase database using the - * provided query and returns the top 'k' similar documents. - * @param query The query to use for the similarity search. - * @param k The number of top similar documents to return. - * @param _callbacks Optional callbacks to pass to the embedQuery method. - * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its similarity score, and its ID. - */ - protected async similaritySearch( - query: string, - k: number, - _callbacks?: Callbacks // implement passing to embedQuery later - ): Promise { - const embeddedQuery = await this.embeddings.embedQuery(query); - - const matchDocumentsParams: SearchEmbeddingsParams = { - query_embedding: embeddedQuery, - match_count: k, - }; - - if (Object.keys(this.metadata ?? {}).length > 0) { - matchDocumentsParams.filter = this.metadata; - } - - const { data: searches, error } = await this.client.rpc( - this.similarityQueryName, - matchDocumentsParams - ); - - if (error) { - throw new Error( - `Error searching for documents: ${error.code} ${error.message} ${error.details}` - ); - } - - return (searches as SearchResponseRow[]).map((resp) => [ - new Document({ - metadata: resp.metadata, - pageContent: resp.content, - }), - resp.similarity, - resp.id, - ]); - } - - /** - * Performs a keyword search on the Supabase database using the provided - * query and returns the top 'k' documents that match the keywords. - * @param query The query to use for the keyword search. - * @param k The number of top documents to return that match the keywords. - * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its similarity score multiplied by 10, and its ID. - */ - protected async keywordSearch( - query: string, - k: number - ): Promise { - const kwMatchDocumentsParams: SearchKeywordParams = { - query_text: query, - match_count: k, - }; - - const { data: searches, error } = await this.client.rpc( - this.keywordQueryName, - kwMatchDocumentsParams - ); - - if (error) { - throw new Error( - `Error searching for documents: ${error.code} ${error.message} ${error.details}` - ); - } - - return (searches as SearchResponseRow[]).map((resp) => [ - new Document({ - metadata: resp.metadata, - pageContent: resp.content, - }), - resp.similarity * 10, - resp.id, - ]); - } - - /** - * Combines the results of the `similaritySearch` and `keywordSearch` - * methods and returns the top 'k' documents based on a combination of - * similarity and keyword matching. - * @param query The query to use for the hybrid search. - * @param similarityK The number of top similar documents to return. - * @param keywordK The number of top documents to return that match the keywords. - * @param callbacks Optional callbacks to pass to the similaritySearch method. - * @returns A promise that resolves to an array of search results. Each result is a tuple containing a Document, its combined score, and its ID. - */ - protected async hybridSearch( - query: string, - similarityK: number, - keywordK: number, - callbacks?: Callbacks - ): Promise { - const similarity_search = this.similaritySearch( - query, - similarityK, - callbacks - ); - - const keyword_search = this.keywordSearch(query, keywordK); - - return Promise.all([similarity_search, keyword_search]) - .then((results) => results.flat()) - .then((results) => { - const picks = new Map(); - - results.forEach((result) => { - const id = result[2]; - const nextScore = result[1]; - const prevScore = picks.get(id)?.[1]; - - if (prevScore === undefined || nextScore > prevScore) { - picks.set(id, result); - } - }); - - return Array.from(picks.values()); - }) - .then((results) => results.sort((a, b) => b[1] - a[1])); - } - - async _getRelevantDocuments( - query: string, - runManager?: CallbackManagerForRetrieverRun - ): Promise { - const searchResults = await this.hybridSearch( - query, - this.similarityK, - this.keywordK, - runManager?.getChild("hybrid_search") - ); - - return searchResults.map(([doc]) => doc); - } -} +export * from "@langchain/community/retrievers/supabase"; \ No newline at end of file diff --git a/langchain/src/retrievers/tavily_search_api.ts b/langchain/src/retrievers/tavily_search_api.ts index 7b65e36a6b89..1f906eeb638f 100644 --- a/langchain/src/retrievers/tavily_search_api.ts +++ b/langchain/src/retrievers/tavily_search_api.ts @@ -1,140 +1 @@ -import { Document } from "../document.js"; -import { CallbackManagerForRetrieverRun } from "../callbacks/manager.js"; -import { BaseRetriever, type BaseRetrieverInput } from "../schema/retriever.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Options for the HydeRetriever class, which includes a BaseLanguageModel - * instance, a VectorStore instance, and an optional promptTemplate which - * can either be a BasePromptTemplate instance or a PromptKey. - */ -export type TavilySearchAPIRetrieverFields = BaseRetrieverInput & { - k?: number; - includeGeneratedAnswer?: boolean; - includeRawContent?: boolean; - includeImages?: boolean; - searchDepth?: "basic" | "advanced"; - includeDomains?: string[]; - excludeDomains?: string[]; - kwargs?: Record; - apiKey?: string; -}; - -/** - * A class for retrieving documents related to a given search term - * using the Tavily Search API. - */ -export class TavilySearchAPIRetriever extends BaseRetriever { - static lc_name() { - return "TavilySearchAPIRetriever"; - } - - get lc_namespace(): string[] { - return ["langchain", "retrievers", "tavily_search_api"]; - } - - k = 10; - - includeGeneratedAnswer = false; - - includeRawContent = false; - - includeImages = false; - - searchDepth = "basic"; - - includeDomains?: string[]; - - excludeDomains?: string[]; - - kwargs: Record = {}; - - apiKey?: string; - - constructor(fields?: TavilySearchAPIRetrieverFields) { - super(fields); - this.k = fields?.k ?? this.k; - this.includeGeneratedAnswer = - fields?.includeGeneratedAnswer ?? this.includeGeneratedAnswer; - this.includeRawContent = - fields?.includeRawContent ?? this.includeRawContent; - this.includeImages = fields?.includeImages ?? this.includeImages; - this.searchDepth = fields?.searchDepth ?? this.searchDepth; - this.includeDomains = fields?.includeDomains ?? this.includeDomains; - this.excludeDomains = fields?.excludeDomains ?? this.excludeDomains; - this.kwargs = fields?.kwargs ?? this.kwargs; - this.apiKey = fields?.apiKey ?? getEnvironmentVariable("TAVILY_API_KEY"); - if (this.apiKey === undefined) { - throw new Error( - `No Tavily API key found. Either set an environment variable named "TAVILY_API_KEY" or pass an API key as "apiKey".` - ); - } - } - - async _getRelevantDocuments( - query: string, - _runManager?: CallbackManagerForRetrieverRun - ): Promise { - const body: Record = { - query, - include_answer: this.includeGeneratedAnswer, - include_raw_content: this.includeRawContent, - include_images: this.includeImages, - max_results: this.k, - search_depth: this.searchDepth, - api_key: this.apiKey, - }; - if (this.includeDomains) { - body.include_domains = this.includeDomains; - } - if (this.excludeDomains) { - body.exclude_domains = this.excludeDomains; - } - - const response = await fetch("https://api.tavily.com/search", { - method: "POST", - headers: { - "content-type": "application/json", - }, - body: JSON.stringify({ ...body, ...this.kwargs }), - }); - const json = await response.json(); - if (!response.ok) { - throw new Error( - `Request failed with status code ${response.status}: ${json.error}` - ); - } - if (!Array.isArray(json.results)) { - throw new Error(`Could not parse Tavily results. Please try again.`); - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const docs: Document[] = json.results.map((result: any) => { - const pageContent = this.includeRawContent - ? result.raw_content - : result.content; - const metadata = { - title: result.title, - source: result.url, - ...Object.fromEntries( - Object.entries(result).filter( - ([k]) => !["content", "title", "url", "raw_content"].includes(k) - ) - ), - images: json.images, - }; - return new Document({ pageContent, metadata }); - }); - if (this.includeGeneratedAnswer) { - docs.push( - new Document({ - pageContent: json.answer, - metadata: { - title: "Suggested Answer", - source: "https://tavily.com/", - }, - }) - ); - } - return docs; - } -} +export * from "@langchain/community/retrievers/tavily_search_api"; \ No newline at end of file diff --git a/langchain/src/retrievers/vespa.ts b/langchain/src/retrievers/vespa.ts index 09b3b3e46763..234e3e745f47 100644 --- a/langchain/src/retrievers/vespa.ts +++ b/langchain/src/retrievers/vespa.ts @@ -1,92 +1 @@ -import { Document } from "../document.js"; -import { - RemoteRetriever, - RemoteRetrieverValues, - RemoteRetrieverParams, -} from "./remote/base.js"; - -export interface VespaRetrieverParams extends RemoteRetrieverParams { - /** - * The body of the query to send to Vespa - */ - query_body: object; - /** - * The name of the field the content resides in - */ - content_field: string; -} - -/** - * Class responsible for retrieving data from Vespa. It extends the - * `RemoteRetriever` class and includes methods for creating the JSON body - * for a query and processing the JSON response from Vespa. - * @example - * ```typescript - * const retriever = new VespaRetriever({ - * url: "https: - * auth: false, - * query_body: { - * yql: "select content from paragraph where userQuery()", - * hits: 5, - * ranking: "documentation", - * locale: "en-us", - * }, - * content_field: "content", - * }); - * const result = await retriever.getRelevantDocuments("what is vespa?"); - * ``` - */ -export class VespaRetriever extends RemoteRetriever { - static lc_name() { - return "VespaRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "vespa"]; - - query_body: object; - - content_field: string; - - constructor(fields: VespaRetrieverParams) { - super(fields); - this.query_body = fields.query_body; - this.content_field = fields.content_field; - - this.url = `${this.url}/search/?`; - } - - /** - * Method that takes a query string as input and returns a JSON object - * that includes the query and the original `query_body`. - * @param query The query string to be sent to Vespa. - * @returns A JSON object that includes the query and the original `query_body`. - */ - createJsonBody(query: string): RemoteRetrieverValues { - return { - ...this.query_body, - query, - }; - } - - /** - * Method that processes the JSON response from Vespa into an array of - * `Document` instances. Each `Document` instance includes the content - * from the specified `content_field` and the document's ID. - * @param json The JSON response from Vespa. - * @returns An array of `Document` instances. - */ - processJsonResponse(json: RemoteRetrieverValues): Document[] { - return json.root.children.map( - (doc: { - id: string; - relevance: number; - source: string; - fields: Record; - }) => - new Document({ - pageContent: doc.fields[this.content_field] as string, - metadata: { id: doc.id }, - }) - ); - } -} +export * from "@langchain/community/retrievers/amazon_kendra"; \ No newline at end of file diff --git a/langchain/src/retrievers/zep.ts b/langchain/src/retrievers/zep.ts index f87d38ca79d8..6519b842da71 100644 --- a/langchain/src/retrievers/zep.ts +++ b/langchain/src/retrievers/zep.ts @@ -1,169 +1 @@ -import { - MemorySearchPayload, - MemorySearchResult, - NotFoundError, - ZepClient, -} from "@getzep/zep-js"; -import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js"; -import { Document } from "../document.js"; - -/** - * Configuration interface for the ZepRetriever class. Extends the - * BaseRetrieverInput interface. - * - * @argument {string} sessionId - The ID of the Zep session. - * @argument {string} url - The URL of the Zep API. - * @argument {number} [topK] - The number of results to return. - * @argument {string} [apiKey] - The API key for the Zep API. - * @argument [searchScope] [searchScope] - The scope of the search: "messages" or "summary". - * @argument [searchType] [searchType] - The type of search to perform: "similarity" or "mmr". - * @argument {number} [mmrLambda] - The lambda value for the MMR search. - * @argument {Record} [filter] - The metadata filter to apply to the search. - */ -export interface ZepRetrieverConfig extends BaseRetrieverInput { - sessionId: string; - url: string; - topK?: number; - apiKey?: string; - searchScope?: "messages" | "summary"; - searchType?: "similarity" | "mmr"; - mmrLambda?: number; - filter?: Record; -} - -/** - * Class for retrieving information from a Zep long-term memory store. - * Extends the BaseRetriever class. - * @example - * ```typescript - * const retriever = new ZepRetriever({ - * url: "http: - * sessionId: "session_exampleUUID", - * topK: 3, - * }); - * const query = "Can I drive red cars in France?"; - * const docs = await retriever.getRelevantDocuments(query); - * ``` - */ -export class ZepRetriever extends BaseRetriever { - static lc_name() { - return "ZepRetriever"; - } - - lc_namespace = ["langchain", "retrievers", "zep"]; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "ZEP_API_KEY", - url: "ZEP_API_URL", - }; - } - - get lc_aliases(): { [key: string]: string } | undefined { - return { apiKey: "api_key" }; - } - - zepClientPromise: Promise; - - private sessionId: string; - - private topK?: number; - - private searchScope?: "messages" | "summary"; - - private searchType?: "similarity" | "mmr"; - - private mmrLambda?: number; - - private filter?: Record; - - constructor(config: ZepRetrieverConfig) { - super(config); - this.sessionId = config.sessionId; - this.topK = config.topK; - this.searchScope = config.searchScope; - this.searchType = config.searchType; - this.mmrLambda = config.mmrLambda; - this.filter = config.filter; - this.zepClientPromise = ZepClient.init(config.url, config.apiKey); - } - - /** - * Converts an array of message search results to an array of Document objects. - * @param {MemorySearchResult[]} results - The array of search results. - * @returns {Document[]} An array of Document objects representing the search results. - */ - private searchMessageResultToDoc(results: MemorySearchResult[]): Document[] { - return results - .filter((r) => r.message) - .map( - ({ - message: { content, metadata: messageMetadata } = {}, - dist, - ...rest - }) => - new Document({ - pageContent: content ?? "", - metadata: { score: dist, ...messageMetadata, ...rest }, - }) - ); - } - - /** - * Converts an array of summary search results to an array of Document objects. - * @param {MemorySearchResult[]} results - The array of search results. - * @returns {Document[]} An array of Document objects representing the search results. - */ - private searchSummaryResultToDoc(results: MemorySearchResult[]): Document[] { - return results - .filter((r) => r.summary) - .map( - ({ - summary: { content, metadata: summaryMetadata } = {}, - dist, - ...rest - }) => - new Document({ - pageContent: content ?? "", - metadata: { score: dist, ...summaryMetadata, ...rest }, - }) - ); - } - - /** - * Retrieves the relevant documents based on the given query. - * @param {string} query - The query string. - * @returns {Promise} A promise that resolves to an array of relevant Document objects. - */ - async _getRelevantDocuments(query: string): Promise { - const payload: MemorySearchPayload = { - text: query, - metadata: this.filter, - search_scope: this.searchScope, - search_type: this.searchType, - mmr_lambda: this.mmrLambda, - }; - // Wait for ZepClient to be initialized - const zepClient = await this.zepClientPromise; - if (!zepClient) { - throw new Error("ZepClient is not initialized"); - } - try { - const results: MemorySearchResult[] = await zepClient.memory.searchMemory( - this.sessionId, - payload, - this.topK - ); - return this.searchScope === "summary" - ? this.searchSummaryResultToDoc(results) - : this.searchMessageResultToDoc(results); - } catch (error) { - // eslint-disable-next-line no-instanceof/no-instanceof - if (error instanceof NotFoundError) { - return Promise.resolve([]); // Return an empty Document array - } - // If it's not a NotFoundError, throw the error again - throw error; - } - } -} +export * from "@langchain/community/retrievers/zep"; \ No newline at end of file diff --git a/langchain/src/tools/IFTTTWebhook.ts b/langchain/src/tools/IFTTTWebhook.ts index 842dbc9d1aa1..f5f2f2afaae3 100644 --- a/langchain/src/tools/IFTTTWebhook.ts +++ b/langchain/src/tools/IFTTTWebhook.ts @@ -1,79 +1 @@ -/** From https://github.com/SidU/teams-langchain-js/wiki/Connecting-IFTTT-Services. - -# Creating a webhook -- Go to https://ifttt.com/create - -# Configuring the "If This" -- Click on the "If This" button in the IFTTT interface. -- Search for "Webhooks" in the search bar. -- Choose the first option for "Receive a web request with a JSON payload." -- Choose an Event Name that is specific to the service you plan to connect to. -This will make it easier for you to manage the webhook URL. -For example, if you're connecting to Spotify, you could use "Spotify" as your -Event Name. -- Click the "Create Trigger" button to save your settings and create your webhook. - -# Configuring the "Then That" -- Tap on the "Then That" button in the IFTTT interface. -- Search for the service you want to connect, such as Spotify. -- Choose an action from the service, such as "Add track to a playlist". -- Configure the action by specifying the necessary details, such as the playlist name, -e.g., "Songs from AI". -- Reference the JSON Payload received by the Webhook in your action. For the Spotify -scenario, choose "{{JsonPayload}}" as your search query. -- Tap the "Create Action" button to save your action settings. -- Once you have finished configuring your action, click the "Finish" button to -complete the setup. -- Congratulations! You have successfully connected the Webhook to the desired -service, and you're ready to start receiving data and triggering actions 🎉 - -# Finishing up -- To get your webhook URL go to https://ifttt.com/maker_webhooks/settings -- Copy the IFTTT key value from there. The URL is of the form -https://maker.ifttt.com/use/YOUR_IFTTT_KEY. Grab the YOUR_IFTTT_KEY value. - */ -import { Tool } from "./base.js"; - -/** - * Represents a tool for creating and managing webhooks with the IFTTT (If - * This Then That) service. The IFTTT service allows users to create - * chains of simple conditional statements, called applets, which are - * triggered based on changes to other web services. - */ -export class IFTTTWebhook extends Tool { - static lc_name() { - return "IFTTTWebhook"; - } - - private url: string; - - name: string; - - description: string; - - constructor(url: string, name: string, description: string) { - super(...arguments); - this.url = url; - this.name = name; - this.description = description; - } - - /** @ignore */ - async _call(input: string): Promise { - const headers = { "Content-Type": "application/json" }; - const body = JSON.stringify({ this: input }); - - const response = await fetch(this.url, { - method: "POST", - headers, - body, - }); - - if (!response.ok) { - throw new Error(`HTTP error ${response.status}`); - } - - const result = await response.text(); - return result; - } -} +export * from "@langchain/community/tools/ifttt"; \ No newline at end of file diff --git a/langchain/src/tools/aiplugin.ts b/langchain/src/tools/aiplugin.ts index c08720f7559e..dd4e140e4892 100644 --- a/langchain/src/tools/aiplugin.ts +++ b/langchain/src/tools/aiplugin.ts @@ -1,81 +1 @@ -import { Tool, ToolParams } from "./base.js"; - -/** - * Interface for parameters required to create an instance of - * AIPluginTool. - */ -export interface AIPluginToolParams extends ToolParams { - name: string; - description: string; - apiSpec: string; -} - -/** - * Class for creating instances of AI tools from plugins. It extends the - * Tool class and implements the AIPluginToolParams interface. - */ -export class AIPluginTool extends Tool implements AIPluginToolParams { - static lc_name() { - return "AIPluginTool"; - } - - private _name: string; - - private _description: string; - - apiSpec: string; - - get name() { - return this._name; - } - - get description() { - return this._description; - } - - constructor(params: AIPluginToolParams) { - super(params); - this._name = params.name; - this._description = params.description; - this.apiSpec = params.apiSpec; - } - - /** @ignore */ - async _call(_input: string) { - return this.apiSpec; - } - - /** - * Static method that creates an instance of AIPluginTool from a given - * plugin URL. It fetches the plugin and its API specification from the - * provided URL and returns a new instance of AIPluginTool with the - * fetched data. - * @param url The URL of the AI plugin. - * @returns A new instance of AIPluginTool. - */ - static async fromPluginUrl(url: string) { - const aiPluginRes = await fetch(url); - if (!aiPluginRes.ok) { - throw new Error( - `Failed to fetch plugin from ${url} with status ${aiPluginRes.status}` - ); - } - const aiPluginJson = await aiPluginRes.json(); - - const apiUrlRes = await fetch(aiPluginJson.api.url); - if (!apiUrlRes.ok) { - throw new Error( - `Failed to fetch API spec from ${aiPluginJson.api.url} with status ${apiUrlRes.status}` - ); - } - const apiUrlJson = await apiUrlRes.text(); - - return new AIPluginTool({ - name: aiPluginJson.name_for_model, - description: `Call this tool to get the OpenAPI spec (and usage guide) for interacting with the ${aiPluginJson.name_for_human} API. You should only call this ONCE! What is the ${aiPluginJson.name_for_human} API useful for? ${aiPluginJson.description_for_human}`, - apiSpec: `Usage Guide: ${aiPluginJson.description_for_model} - -OpenAPI Spec in JSON or YAML format:\n${apiUrlJson}`, - }); - } -} +export * from "@langchain/community/tools/aiplugin"; \ No newline at end of file diff --git a/langchain/src/tools/aws_sfn.ts b/langchain/src/tools/aws_sfn.ts index 8d375f88f540..f9cc54a9d737 100644 --- a/langchain/src/tools/aws_sfn.ts +++ b/langchain/src/tools/aws_sfn.ts @@ -1,225 +1 @@ -import { - SFNClient as Client, - StartExecutionCommand as Invoker, - DescribeExecutionCommand as Describer, - SendTaskSuccessCommand as TaskSuccessSender, -} from "@aws-sdk/client-sfn"; - -import { Tool, ToolParams } from "./base.js"; - -/** - * Interface for AWS Step Functions configuration. - */ -export interface SfnConfig { - stateMachineArn: string; - region?: string; - accessKeyId?: string; - secretAccessKey?: string; -} - -/** - * Interface for AWS Step Functions client constructor arguments. - */ -interface SfnClientConstructorArgs { - region?: string; - credentials?: { - accessKeyId: string; - secretAccessKey: string; - }; -} - -/** - * Class for starting the execution of an AWS Step Function. - */ -export class StartExecutionAWSSfnTool extends Tool { - static lc_name() { - return "StartExecutionAWSSfnTool"; - } - - private sfnConfig: SfnConfig; - - public name: string; - - public description: string; - - constructor({ - name, - description, - ...rest - }: SfnConfig & { name: string; description: string }) { - super(); - this.name = name; - this.description = description; - this.sfnConfig = rest; - } - - /** - * Generates a formatted description for the StartExecutionAWSSfnTool. - * @param name Name of the state machine. - * @param description Description of the state machine. - * @returns A formatted description string. - */ - static formatDescription(name: string, description: string): string { - return `Use to start executing the ${name} state machine. Use to run ${name} workflows. Whenever you need to start (or execute) an asynchronous workflow (or state machine) about ${description} you should ALWAYS use this. Input should be a valid JSON string.`; - } - - /** @ignore */ - async _call(input: string): Promise { - const clientConstructorArgs: SfnClientConstructorArgs = - getClientConstructorArgs(this.sfnConfig); - const sfnClient = new Client(clientConstructorArgs); - - return new Promise((resolve) => { - let payload; - try { - payload = JSON.parse(input); - } catch (e) { - console.error("Error starting state machine execution:", e); - resolve("failed to complete request"); - } - - const command = new Invoker({ - stateMachineArn: this.sfnConfig.stateMachineArn, - input: JSON.stringify(payload), - }); - - sfnClient - .send(command) - .then((response) => - resolve( - response.executionArn ? response.executionArn : "request completed." - ) - ) - .catch((error: Error) => { - console.error("Error starting state machine execution:", error); - resolve("failed to complete request"); - }); - }); - } -} - -/** - * Class for checking the status of an AWS Step Function execution. - */ -export class DescribeExecutionAWSSfnTool extends Tool { - static lc_name() { - return "DescribeExecutionAWSSfnTool"; - } - - name = "describe-execution-aws-sfn"; - - description = - "This tool should ALWAYS be used for checking the status of any AWS Step Function execution (aka. state machine execution). Input to this tool is a properly formatted AWS Step Function Execution ARN (executionArn). The output is a stringified JSON object containing the executionArn, name, status, startDate, stopDate, input, output, error, and cause of the execution."; - - sfnConfig: Omit; - - constructor(config: Omit & ToolParams) { - super(config); - this.sfnConfig = config; - } - - /** @ignore */ - async _call(input: string) { - const clientConstructorArgs: SfnClientConstructorArgs = - getClientConstructorArgs(this.sfnConfig); - const sfnClient = new Client(clientConstructorArgs); - - const command = new Describer({ - executionArn: input, - }); - return await sfnClient - .send(command) - .then((response) => - response.executionArn - ? JSON.stringify({ - executionArn: response.executionArn, - name: response.name, - status: response.status, - startDate: response.startDate, - stopDate: response.stopDate, - input: response.input, - output: response.output, - error: response.error, - cause: response.cause, - }) - : "{}" - ) - .catch((error: Error) => { - console.error("Error describing state machine execution:", error); - return "failed to complete request"; - }); - } -} - -/** - * Class for sending a task success signal to an AWS Step Function - * execution. - */ -export class SendTaskSuccessAWSSfnTool extends Tool { - static lc_name() { - return "SendTaskSuccessAWSSfnTool"; - } - - name = "send-task-success-aws-sfn"; - - description = - "This tool should ALWAYS be used for sending task success to an AWS Step Function execution (aka. statemachine exeuction). Input to this tool is a stringify JSON object containing the taskToken and output."; - - sfnConfig: Omit; - - constructor(config: Omit & ToolParams) { - super(config); - this.sfnConfig = config; - } - - /** @ignore */ - async _call(input: string) { - const clientConstructorArgs: SfnClientConstructorArgs = - getClientConstructorArgs(this.sfnConfig); - const sfnClient = new Client(clientConstructorArgs); - - let payload; - try { - payload = JSON.parse(input); - } catch (e) { - console.error("Error starting state machine execution:", e); - return "failed to complete request"; - } - - const command = new TaskSuccessSender({ - taskToken: payload.taskToken, - output: JSON.stringify(payload.output), - }); - - return await sfnClient - .send(command) - .then(() => "request completed.") - .catch((error: Error) => { - console.error( - "Error sending task success to state machine execution:", - error - ); - return "failed to complete request"; - }); - } -} - -/** - * Helper function to construct the AWS SFN client. - */ -function getClientConstructorArgs(config: Partial) { - const clientConstructorArgs: SfnClientConstructorArgs = {}; - - if (config.region) { - clientConstructorArgs.region = config.region; - } - - if (config.accessKeyId && config.secretAccessKey) { - clientConstructorArgs.credentials = { - accessKeyId: config.accessKeyId, - secretAccessKey: config.secretAccessKey, - }; - } - - return clientConstructorArgs; -} +export * from "@langchain/community/tools/aws_sfn"; \ No newline at end of file diff --git a/langchain/src/tools/bingserpapi.ts b/langchain/src/tools/bingserpapi.ts index 0a3010f30034..28d359da9662 100644 --- a/langchain/src/tools/bingserpapi.ts +++ b/langchain/src/tools/bingserpapi.ts @@ -1,78 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * A tool for web search functionality using Bing's search engine. It - * extends the base `Tool` class and implements the `_call` method to - * perform the search operation. Requires an API key for Bing's search - * engine, which can be set in the environment variables. Also accepts - * additional parameters for the search query. - */ -class BingSerpAPI extends Tool { - static lc_name() { - return "BingSerpAPI"; - } - - /** - * Not implemented. Will throw an error if called. - */ - toJSON() { - return this.toJSONNotImplemented(); - } - - name = "bing-search"; - - description = - "a search engine. useful for when you need to answer questions about current events. input should be a search query."; - - key: string; - - params: Record; - - constructor( - apiKey: string | undefined = getEnvironmentVariable("BingApiKey"), - params: Record = {} - ) { - super(...arguments); - - if (!apiKey) { - throw new Error( - "BingSerpAPI API key not set. You can set it as BingApiKey in your .env file." - ); - } - - this.key = apiKey; - this.params = params; - } - - /** @ignore */ - async _call(input: string): Promise { - const headers = { "Ocp-Apim-Subscription-Key": this.key }; - const params = { q: input, textDecorations: "true", textFormat: "HTML" }; - const searchUrl = new URL("https://api.bing.microsoft.com/v7.0/search"); - - Object.entries(params).forEach(([key, value]) => { - searchUrl.searchParams.append(key, value); - }); - - const response = await fetch(searchUrl, { headers }); - - if (!response.ok) { - throw new Error(`HTTP error ${response.status}`); - } - - const res = await response.json(); - const results: [] = res.webPages.value; - - if (results.length === 0) { - return "No good results found."; - } - const snippets = results - .map((result: { snippet: string }) => result.snippet) - .join(" "); - - return snippets; - } -} - -export { BingSerpAPI }; +export * from "@langchain/community/tools/bingserpapi"; \ No newline at end of file diff --git a/langchain/src/tools/brave_search.ts b/langchain/src/tools/brave_search.ts index 7d5498f2b95b..d26347aed42b 100644 --- a/langchain/src/tools/brave_search.ts +++ b/langchain/src/tools/brave_search.ts @@ -1,77 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * Interface for the parameters required to instantiate a BraveSearch - * instance. - */ -export interface BraveSearchParams { - apiKey?: string; -} - -/** - * Class for interacting with the Brave Search engine. It extends the Tool - * class and requires an API key to function. The API key can be passed in - * during instantiation or set as an environment variable named - * 'BRAVE_SEARCH_API_KEY'. - */ -export class BraveSearch extends Tool { - static lc_name() { - return "BraveSearch"; - } - - name = "brave-search"; - - description = - "a search engine. useful for when you need to answer questions about current events. input should be a search query."; - - apiKey: string; - - constructor( - fields: BraveSearchParams = { - apiKey: getEnvironmentVariable("BRAVE_SEARCH_API_KEY"), - } - ) { - super(); - - if (!fields.apiKey) { - throw new Error( - `Brave API key not set. Please pass it in or set it as an environment variable named "BRAVE_SEARCH_API_KEY".` - ); - } - - this.apiKey = fields.apiKey; - } - - /** @ignore */ - async _call(input: string): Promise { - const headers = { - "X-Subscription-Token": this.apiKey, - Accept: "application/json", - }; - const searchUrl = new URL( - `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent( - input - )}` - ); - - const response = await fetch(searchUrl, { headers }); - - if (!response.ok) { - throw new Error(`HTTP error ${response.status}`); - } - - const parsedResponse = await response.json(); - const webSearchResults = parsedResponse.web?.results; - const finalResults = Array.isArray(webSearchResults) - ? webSearchResults.map( - (item: { title?: string; url?: string; description?: string }) => ({ - title: item.title, - link: item.url, - snippet: item.description, - }) - ) - : []; - return JSON.stringify(finalResults); - } -} +export * from "@langchain/community/tools/brave_search"; \ No newline at end of file diff --git a/langchain/src/tools/connery.ts b/langchain/src/tools/connery.ts index 5ee594ba85a9..8b7542db9940 100644 --- a/langchain/src/tools/connery.ts +++ b/langchain/src/tools/connery.ts @@ -1,353 +1 @@ -import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * An object containing configuration parameters for the ConneryService class. - * @extends AsyncCallerParams - */ -export interface ConneryServiceParams extends AsyncCallerParams { - runnerUrl: string; - apiKey: string; -} - -type ApiResponse = { - status: "success"; - data: T; -}; - -type ApiErrorResponse = { - status: "error"; - error: { - message: string; - }; -}; - -type Parameter = { - key: string; - title: string; - description: string; - type: string; - validation?: { - required?: boolean; - }; -}; - -type Action = { - id: string; - key: string; - title: string; - description: string; - type: string; - inputParameters: Parameter[]; - outputParameters: Parameter[]; - pluginId: string; -}; - -type Input = { - [key: string]: string; -}; - -type Output = { - [key: string]: string; -}; - -type RunActionResult = { - output: Output; - used: { - actionId: string; - input: Input; - }; -}; - -/** - * A LangChain Tool object wrapping a Connery action. - * @extends Tool - */ -export class ConneryAction extends Tool { - name: string; - - description: string; - - /** - * Creates a ConneryAction instance based on the provided Connery action. - * @param _action The Connery action. - * @param _service The ConneryService instance. - * @returns A ConneryAction instance. - */ - constructor(protected _action: Action, protected _service: ConneryService) { - super(); - - this.name = this._action.title; - this.description = this.getDescription(); - } - - /** - * Runs the Connery action. - * @param prompt This is a plain English prompt with all the information needed to run the action. - * @returns A promise that resolves to a JSON string containing the output of the action. - */ - protected _call(prompt: string): Promise { - return this._service.runAction(this._action.id, prompt); - } - - /** - * Returns the description of the Connery action. - * @returns A string containing the description of the Connery action together with the instructions on how to use it. - */ - protected getDescription(): string { - const { title, description } = this._action; - const inputParameters = this.prepareJsonForTemplate( - this._action.inputParameters - ); - const example1InputParametersSchema = this.prepareJsonForTemplate([ - { - key: "recipient", - title: "Email Recipient", - description: "Email address of the email recipient.", - type: "string", - validation: { - required: true, - }, - }, - { - key: "subject", - title: "Email Subject", - description: "Subject of the email.", - type: "string", - validation: { - required: true, - }, - }, - { - key: "body", - title: "Email Body", - description: "Body of the email.", - type: "string", - validation: { - required: true, - }, - }, - ]); - - const descriptionTemplate = - "# Instructions about tool input:\n" + - "The input to this tool is a plain English prompt with all the input parameters needed to call it. " + - "The input parameters schema of this tool is provided below. " + - "Use the input parameters schema to construct the prompt for the tool. " + - "If the input parameter is required in the schema, it must be provided in the prompt. " + - "Do not come up with the values for the input parameters yourself. " + - "If you do not have enough information to fill in the input parameter, ask the user to provide it. " + - "See examples below on how to construct the prompt based on the provided tool information. " + - "\n\n" + - "# Instructions about tool output:\n" + - "The output of this tool is a JSON string. " + - "Retrieve the output parameters from the JSON string and use them in the next tool. " + - "Do not return the JSON string as the output of the tool. " + - "\n\n" + - "# Example:\n" + - "Tool information:\n" + - "- Title: Send email\n" + - "- Description: Send an email to a recipient.\n" + - `- Input parameters schema in JSON fromat: ${example1InputParametersSchema}\n` + - "The tool input prompt:\n" + - "recipient: test@example.com, subject: 'Test email', body: 'This is a test email sent from Langchain Connery tool.'\n" + - "\n\n" + - "# The tool information\n" + - `- Title: ${title}\n` + - `- Description: ${description}\n` + - `- Input parameters schema in JSON fromat: ${inputParameters}\n`; - - return descriptionTemplate; - } - - /** - * Converts the provided object to a JSON string and escapes '{' and '}' characters. - * @param obj The object to convert to a JSON string. - * @returns A string containing the JSON representation of the provided object with '{' and '}' characters escaped. - */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - protected prepareJsonForTemplate(obj: any): string { - // Convert the object to a JSON string - const jsonString = JSON.stringify(obj); - - // Replace '{' with '{{' and '}' with '}}' - const escapedJSON = jsonString.replace(/{/g, "{{").replace(/}/g, "}}"); - - return escapedJSON; - } -} - -/** - * A service for working with Connery actions. - * - * Connery is an open-source plugin infrastructure for AI. - * Source code: https://github.com/connery-io/connery-platform - */ -export class ConneryService { - protected runnerUrl: string; - - protected apiKey: string; - - protected asyncCaller: AsyncCaller; - - /** - * Creates a ConneryService instance. - * @param params A ConneryServiceParams object. - * If not provided, the values are retrieved from the CONNERY_RUNNER_URL - * and CONNERY_RUNNER_API_KEY environment variables. - * @returns A ConneryService instance. - */ - constructor(params?: ConneryServiceParams) { - const runnerUrl = - params?.runnerUrl ?? getEnvironmentVariable("CONNERY_RUNNER_URL"); - const apiKey = - params?.apiKey ?? getEnvironmentVariable("CONNERY_RUNNER_API_KEY"); - - if (!runnerUrl || !apiKey) { - throw new Error( - "CONNERY_RUNNER_URL and CONNERY_RUNNER_API_KEY environment variables must be set." - ); - } - - this.runnerUrl = runnerUrl; - this.apiKey = apiKey; - - this.asyncCaller = new AsyncCaller(params ?? {}); - } - - /** - * Returns the list of Connery actions wrapped as a LangChain Tool objects. - * @returns A promise that resolves to an array of ConneryAction objects. - */ - async listActions(): Promise { - const actions = await this._listActions(); - return actions.map((action) => new ConneryAction(action, this)); - } - - /** - * Returns the specified Connery action wrapped as a LangChain Tool object. - * @param actionId The ID of the action to return. - * @returns A promise that resolves to a ConneryAction object. - */ - async getAction(actionId: string): Promise { - const action = await this._getAction(actionId); - return new ConneryAction(action, this); - } - - /** - * Runs the specified Connery action with the provided input. - * @param actionId The ID of the action to run. - * @param prompt This is a plain English prompt with all the information needed to run the action. - * @param input The input expected by the action. - * If provided together with the prompt, the input takes precedence over the input specified in the prompt. - * @returns A promise that resolves to a JSON string containing the output of the action. - */ - async runAction( - actionId: string, - prompt?: string, - input?: Input - ): Promise { - const result = await this._runAction(actionId, prompt, input); - return JSON.stringify(result); - } - - /** - * Returns the list of actions available in the Connery runner. - * @returns A promise that resolves to an array of Action objects. - */ - protected async _listActions(): Promise { - const response = await this.asyncCaller.call( - fetch, - `${this.runnerUrl}/v1/actions`, - { - method: "GET", - headers: this._getHeaders(), - } - ); - await this._handleError(response, "Failed to list actions"); - - const apiResponse: ApiResponse = await response.json(); - return apiResponse.data; - } - - /** - * Returns the specified action available in the Connery runner. - * @param actionId The ID of the action to return. - * @returns A promise that resolves to an Action object. - * @throws An error if the action with the specified ID is not found. - */ - protected async _getAction(actionId: string): Promise { - const actions = await this._listActions(); - const action = actions.find((a) => a.id === actionId); - if (!action) { - throw new Error( - `The action with ID "${actionId}" was not found in the list of available actions in the Connery runner.` - ); - } - return action; - } - - /** - * Runs the specified Connery action with the provided input. - * @param actionId The ID of the action to run. - * @param prompt This is a plain English prompt with all the information needed to run the action. - * @param input The input object expected by the action. - * If provided together with the prompt, the input takes precedence over the input specified in the prompt. - * @returns A promise that resolves to a RunActionResult object. - */ - protected async _runAction( - actionId: string, - prompt?: string, - input?: Input - ): Promise { - const response = await this.asyncCaller.call( - fetch, - `${this.runnerUrl}/v1/actions/${actionId}/run`, - { - method: "POST", - headers: this._getHeaders(), - body: JSON.stringify({ - prompt, - input, - }), - } - ); - await this._handleError(response, "Failed to run action"); - - const apiResponse: ApiResponse = await response.json(); - return apiResponse.data.output; - } - - /** - * Returns a standard set of HTTP headers to be used in API calls to the Connery runner. - * @returns An object containing the standard set of HTTP headers. - */ - protected _getHeaders(): Record { - return { - "Content-Type": "application/json", - "x-api-key": this.apiKey, - }; - } - - /** - * Shared error handler for API calls to the Connery runner. - * If the response is not ok, an error is thrown containing the error message returned by the Connery runner. - * Otherwise, the promise resolves to void. - * @param response The response object returned by the Connery runner. - * @param errorMessage The error message to be used in the error thrown if the response is not ok. - * @returns A promise that resolves to void. - * @throws An error containing the error message returned by the Connery runner. - */ - protected async _handleError( - response: Response, - errorMessage: string - ): Promise { - if (response.ok) return; - - const apiErrorResponse: ApiErrorResponse = await response.json(); - throw new Error( - `${errorMessage}. Status code: ${response.status}. Error message: ${apiErrorResponse.error.message}` - ); - } -} +export * from "@langchain/community/tools/connery"; \ No newline at end of file diff --git a/langchain/src/tools/dadjokeapi.ts b/langchain/src/tools/dadjokeapi.ts index 856b7a519155..c7ec315bdc27 100644 --- a/langchain/src/tools/dadjokeapi.ts +++ b/langchain/src/tools/dadjokeapi.ts @@ -1,44 +1 @@ -import { Tool } from "./base.js"; - -/** - * The DadJokeAPI class is a tool for generating dad jokes based on a - * specific topic. It fetches jokes from an external API and returns a - * random joke from the results. If no jokes are found for the given - * search term, it returns a message indicating that no jokes were found. - */ -class DadJokeAPI extends Tool { - static lc_name() { - return "DadJokeAPI"; - } - - name = "dadjoke"; - - description = - "a dad joke generator. get a dad joke about a specific topic. input should be a search term."; - - /** @ignore */ - async _call(input: string): Promise { - const headers = { Accept: "application/json" }; - const searchUrl = `https://icanhazdadjoke.com/search?term=${input}`; - - const response = await fetch(searchUrl, { headers }); - - if (!response.ok) { - throw new Error(`HTTP error ${response.status}`); - } - - const data = await response.json(); - const jokes = data.results; - - if (jokes.length === 0) { - return `No dad jokes found about ${input}`; - } - - const randomIndex = Math.floor(Math.random() * jokes.length); - const randomJoke = jokes[randomIndex].joke; - - return randomJoke; - } -} - -export { DadJokeAPI }; +export * from "@langchain/community/tools/dadjokeapi"; \ No newline at end of file diff --git a/langchain/src/tools/dataforseo_api_search.ts b/langchain/src/tools/dataforseo_api_search.ts index 6f6b246351b1..344f916c14f3 100644 --- a/langchain/src/tools/dataforseo_api_search.ts +++ b/langchain/src/tools/dataforseo_api_search.ts @@ -1,378 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * @interface DataForSeoApiConfig - * @description Represents the configuration object used to set up a DataForSeoAPISearch instance. - */ -export interface DataForSeoApiConfig { - /** - * @property apiLogin - * @type {string} - * @description The API login credential for DataForSEO. If not provided, it will be fetched from environment variables. - */ - apiLogin?: string; - - /** - * @property apiPassword - * @type {string} - * @description The API password credential for DataForSEO. If not provided, it will be fetched from environment variables. - */ - apiPassword?: string; - - /** - * @property params - * @type {Record} - * @description Additional parameters to customize the API request. - */ - params?: Record; - - /** - * @property useJsonOutput - * @type {boolean} - * @description Determines if the output should be in JSON format. - */ - useJsonOutput?: boolean; - - /** - * @property jsonResultTypes - * @type {Array} - * @description Specifies the types of results to include in the output. - */ - jsonResultTypes?: Array; - - /** - * @property jsonResultFields - * @type {Array} - * @description Specifies the fields to include in each result object. - */ - jsonResultFields?: Array; - - /** - * @property topCount - * @type {number} - * @description Specifies the maximum number of results to return. - */ - topCount?: number; -} - -/** - * Represents a task in the API response. - */ -type Task = { - id: string; - status_code: number; - status_message: string; - time: string; - result: Result[]; -}; - -/** - * Represents a result in the API response. - */ -type Result = { - keyword: string; - check_url: string; - datetime: string; - spell?: string; - item_types: string[]; - se_results_count: number; - items_count: number; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - items: any[]; -}; - -/** - * Represents the API response. - */ -type ApiResponse = { - status_code: number; - status_message: string; - tasks: Task[]; -}; - -/** - * @class DataForSeoAPISearch - * @extends {Tool} - * @description Represents a wrapper class to work with DataForSEO SERP API. - */ -export class DataForSeoAPISearch extends Tool { - static lc_name() { - return "DataForSeoAPISearch"; - } - - name = "dataforseo-api-wrapper"; - - description = - "A robust Google Search API provided by DataForSeo. This tool is handy when you need information about trending topics or current events."; - - protected apiLogin: string; - - protected apiPassword: string; - - /** - * @property defaultParams - * @type {Record} - * @description These are the default parameters to be used when making an API request. - */ - protected defaultParams: Record = { - location_name: "United States", - language_code: "en", - depth: 10, - se_name: "google", - se_type: "organic", - }; - - protected params: Record = {}; - - protected jsonResultTypes: Array | undefined; - - protected jsonResultFields: Array | undefined; - - protected topCount: number | undefined; - - protected useJsonOutput = false; - - /** - * @constructor - * @param {DataForSeoApiConfig} config - * @description Sets up the class, throws an error if the API login/password isn't provided. - */ - constructor(config: DataForSeoApiConfig = {}) { - super(); - const apiLogin = - config.apiLogin ?? getEnvironmentVariable("DATAFORSEO_LOGIN"); - const apiPassword = - config.apiPassword ?? getEnvironmentVariable("DATAFORSEO_PASSWORD"); - const params = config.params ?? {}; - if (!apiLogin || !apiPassword) { - throw new Error( - "DataForSEO login or password not set. You can set it as DATAFORSEO_LOGIN and DATAFORSEO_PASSWORD in your .env file, or pass it to DataForSeoAPISearch." - ); - } - this.params = { ...this.defaultParams, ...params }; - this.apiLogin = apiLogin; - this.apiPassword = apiPassword; - this.jsonResultTypes = config.jsonResultTypes; - this.jsonResultFields = config.jsonResultFields; - this.useJsonOutput = config.useJsonOutput ?? false; - this.topCount = config.topCount; - } - - /** - * @method _call - * @param {string} keyword - * @returns {Promise} - * @description Initiates a call to the API and processes the response. - */ - async _call(keyword: string): Promise { - return this.useJsonOutput - ? JSON.stringify(await this.results(keyword)) - : this.processResponse(await this.getResponseJson(keyword)); - } - - /** - * @method results - * @param {string} keyword - * @returns {Promise>} - * @description Fetches the results from the API for the given keyword. - */ - // eslint-disable-next-line @typescript-eslint/no-explicit-any - async results(keyword: string): Promise> { - const res = await this.getResponseJson(keyword); - return this.filterResults(res, this.jsonResultTypes); - } - - /** - * @method prepareRequest - * @param {string} keyword - * @returns {{url: string; headers: HeadersInit; data: BodyInit}} - * @description Prepares the request details for the API call. - */ - protected prepareRequest(keyword: string): { - url: string; - headers: HeadersInit; - data: BodyInit; - } { - if (this.apiLogin === undefined || this.apiPassword === undefined) { - throw new Error("api_login or api_password is not provided"); - } - - const credentials = Buffer.from( - `${this.apiLogin}:${this.apiPassword}`, - "utf-8" - ).toString("base64"); - const headers = { - Authorization: `Basic ${credentials}`, - "Content-Type": "application/json", - }; - - const params = { ...this.params }; - params.keyword ??= keyword; - const data = [params]; - - return { - url: `https://api.dataforseo.com/v3/serp/${params.se_name}/${params.se_type}/live/advanced`, - headers, - data: JSON.stringify(data), - }; - } - - /** - * @method getResponseJson - * @param {string} keyword - * @returns {Promise} - * @description Executes a POST request to the provided URL and returns a parsed JSON response. - */ - protected async getResponseJson(keyword: string): Promise { - const requestDetails = this.prepareRequest(keyword); - const response = await fetch(requestDetails.url, { - method: "POST", - headers: requestDetails.headers, - body: requestDetails.data, - }); - - if (!response.ok) { - throw new Error( - `Got ${response.status} error from DataForSEO: ${response.statusText}` - ); - } - - const result: ApiResponse = await response.json(); - return this.checkResponse(result); - } - - /** - * @method checkResponse - * @param {ApiResponse} response - * @returns {ApiResponse} - * @description Checks the response status code. - */ - private checkResponse(response: ApiResponse): ApiResponse { - if (response.status_code !== 20000) { - throw new Error( - `Got error from DataForSEO SERP API: ${response.status_message}` - ); - } - for (const task of response.tasks) { - if (task.status_code !== 20000) { - throw new Error( - `Got error from DataForSEO SERP API: ${task.status_message}` - ); - } - } - return response; - } - - /* eslint-disable @typescript-eslint/no-explicit-any */ - /** - * @method filterResults - * @param {ApiResponse} res - * @param {Array | undefined} types - * @returns {Array} - * @description Filters the results based on the specified result types. - */ - private filterResults( - res: ApiResponse, - types: Array | undefined - ): Array { - const output: Array = []; - for (const task of res.tasks || []) { - for (const result of task.result || []) { - for (const item of result.items || []) { - if ( - types === undefined || - types.length === 0 || - types.includes(item.type) - ) { - const newItem = this.cleanupUnnecessaryItems(item); - if (Object.keys(newItem).length !== 0) { - output.push(newItem); - } - } - if (this.topCount !== undefined && output.length >= this.topCount) { - break; - } - } - } - } - return output; - } - - /* eslint-disable @typescript-eslint/no-explicit-any */ - /* eslint-disable no-param-reassign */ - /** - * @method cleanupUnnecessaryItems - * @param {any} d - * @description Removes unnecessary items from the response. - */ - private cleanupUnnecessaryItems(d: any): any { - if (Array.isArray(d)) { - return d.map((item) => this.cleanupUnnecessaryItems(item)); - } - - const toRemove = ["xpath", "position", "rectangle"]; - if (typeof d === "object" && d !== null) { - return Object.keys(d).reduce((newObj: any, key: string) => { - if ( - (this.jsonResultFields === undefined || - this.jsonResultFields.includes(key)) && - !toRemove.includes(key) - ) { - if (typeof d[key] === "object" && d[key] !== null) { - newObj[key] = this.cleanupUnnecessaryItems(d[key]); - } else { - newObj[key] = d[key]; - } - } - return newObj; - }, {}); - } - - return d; - } - - /** - * @method processResponse - * @param {ApiResponse} res - * @returns {string} - * @description Processes the response to extract meaningful data. - */ - protected processResponse(res: ApiResponse): string { - let returnValue = "No good search result found"; - for (const task of res.tasks || []) { - for (const result of task.result || []) { - const { item_types } = result; - const items = result.items || []; - if (item_types.includes("answer_box")) { - returnValue = items.find( - (item: { type: string; text: string }) => item.type === "answer_box" - ).text; - } else if (item_types.includes("knowledge_graph")) { - returnValue = items.find( - (item: { type: string; description: string }) => - item.type === "knowledge_graph" - ).description; - } else if (item_types.includes("featured_snippet")) { - returnValue = items.find( - (item: { type: string; description: string }) => - item.type === "featured_snippet" - ).description; - } else if (item_types.includes("shopping")) { - returnValue = items.find( - (item: { type: string; price: string }) => item.type === "shopping" - ).price; - } else if (item_types.includes("organic")) { - returnValue = items.find( - (item: { type: string; description: string }) => - item.type === "organic" - ).description; - } - if (returnValue) { - break; - } - } - } - return returnValue; - } -} +export * from "@langchain/community/tools/dataforseo_api_search"; \ No newline at end of file diff --git a/langchain/src/tools/gmail/base.ts b/langchain/src/tools/gmail/base.ts deleted file mode 100644 index 7977f53387d3..000000000000 --- a/langchain/src/tools/gmail/base.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { gmail_v1, google } from "googleapis"; -import { z } from "zod"; -import { StructuredTool } from "../base.js"; -import { getEnvironmentVariable } from "../../util/env.js"; - -export interface GmailBaseToolParams { - credentials?: { - clientEmail?: string; - privateKey?: string; - keyfile?: string; - }; - scopes?: string[]; -} - -export abstract class GmailBaseTool extends StructuredTool { - private CredentialsSchema = z - .object({ - clientEmail: z - .string() - .min(1) - .default(getEnvironmentVariable("GMAIL_CLIENT_EMAIL") ?? ""), - privateKey: z - .string() - .default(getEnvironmentVariable("GMAIL_PRIVATE_KEY") ?? ""), - keyfile: z - .string() - .default(getEnvironmentVariable("GMAIL_KEYFILE") ?? ""), - }) - .refine( - (credentials) => - credentials.privateKey !== "" || credentials.keyfile !== "", - { - message: - "Missing GMAIL_PRIVATE_KEY or GMAIL_KEYFILE to interact with Gmail", - } - ); - - private GmailBaseToolParamsSchema = z - .object({ - credentials: this.CredentialsSchema.default({}), - scopes: z.array(z.string()).default(["https://mail.google.com/"]), - }) - .default({}); - - name = "Gmail"; - - description = "A tool to send and view emails through Gmail"; - - protected gmail: gmail_v1.Gmail; - - constructor(fields?: Partial) { - super(...arguments); - - const { credentials, scopes } = - this.GmailBaseToolParamsSchema.parse(fields); - - this.gmail = this.getGmail( - scopes, - credentials.clientEmail, - credentials.privateKey, - credentials.keyfile - ); - } - - private getGmail( - scopes: string[], - email: string, - key?: string, - keyfile?: string - ) { - const auth = new google.auth.JWT(email, keyfile, key, scopes); - - return google.gmail({ version: "v1", auth }); - } -} diff --git a/langchain/src/tools/gmail/create_draft.ts b/langchain/src/tools/gmail/create_draft.ts deleted file mode 100644 index b2d4b56e89c3..000000000000 --- a/langchain/src/tools/gmail/create_draft.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { z } from "zod"; -import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; -import { CREATE_DRAFT_DESCRIPTION } from "./descriptions.js"; - -export class GmailCreateDraft extends GmailBaseTool { - name = "create_gmail_draft"; - - schema = z.object({ - message: z.string(), - to: z.array(z.string()), - subject: z.string(), - cc: z.array(z.string()).optional(), - bcc: z.array(z.string()).optional(), - }); - - description = CREATE_DRAFT_DESCRIPTION; - - constructor(fields?: GmailBaseToolParams) { - super(fields); - } - - private prepareDraftMessage( - message: string, - to: string[], - subject: string, - cc?: string[], - bcc?: string[] - ) { - const draftMessage = { - message: { - raw: "", - }, - }; - - const email = [ - `To: ${to.join(", ")}`, - `Subject: ${subject}`, - cc ? `Cc: ${cc.join(", ")}` : "", - bcc ? `Bcc: ${bcc.join(", ")}` : "", - "", - message, - ].join("\n"); - - draftMessage.message.raw = Buffer.from(email).toString("base64url"); - - return draftMessage; - } - - async _call(arg: z.output) { - const { message, to, subject, cc, bcc } = arg; - const create_message = this.prepareDraftMessage( - message, - to, - subject, - cc, - bcc - ); - - const response = await this.gmail.users.drafts.create({ - userId: "me", - requestBody: create_message, - }); - - return `Draft created. Draft Id: ${response.data.id}`; - } -} - -export type CreateDraftSchema = { - message: string; - to: string[]; - subject: string; - cc?: string[]; - bcc?: string[]; -}; diff --git a/langchain/src/tools/gmail/descriptions.ts b/langchain/src/tools/gmail/descriptions.ts deleted file mode 100644 index 15193966b232..000000000000 --- a/langchain/src/tools/gmail/descriptions.ts +++ /dev/null @@ -1,119 +0,0 @@ -export const CREATE_DRAFT_DESCRIPTION = `A tool for creating draft emails in Gmail. - -INPUT example: -{ - "message": "Hello, this is a test draft", - "to": ["example1@email.com", "example2@email.com"], - "subject": "Test Draft", - "cc": ["cc1@email.com"], - "bcc": ["bcc1@email.com"] -} - -OUTPUT: -The output is a confirmation message with the draft ID. -`; - -export const GET_MESSAGE_DESCRIPTION = `A tool for retrieving a specific email message from Gmail using its message ID. - -INPUT example: -{ - "messageId": "unique_message_id_string" -} - -OUTPUT: -The output includes detailed information about the retrieved email message. This includes the subject, body, sender (from), recipients (to), date of the email, and the message ID. If any of these details are not available in the email, the tool will throw an error indicating the missing information. - -Example Output: -"Result for the prompt unique_message_id_string -{ - 'subject': 'Email Subject', - 'body': 'Email Body Content', - 'from': 'sender@email.com', - 'to': 'recipient@email.com', - 'date': 'Email Date', - 'messageId': 'unique_message_id_string' -}" -`; - -export const GET_THREAD_DESCRIPTION = `A tool for retrieving an entire email thread from Gmail using the thread ID. - -INPUT example: -{ - "threadId": "unique_thread_id_string" -} - -OUTPUT: -The output includes an array of all the messages in the specified thread. Each message in the array contains detailed information including the subject, body, sender (from), recipients (to), date of the email, and the message ID. If any of these details are not available in a message, the tool will throw an error indicating the missing information. - -Example Output: -"Result for the prompt unique_thread_id_string -[ - { - 'subject': 'Email Subject', - 'body': 'Email Body Content', - 'from': 'sender@email.com', - 'to': 'recipient@email.com', - 'date': 'Email Date', - 'messageId': 'unique_message_id_string' - }, - ... (other messages in the thread) -]" -`; - -export const SEND_MESSAGE_DESCRIPTION = `A tool for sending an email message using Gmail. It allows users to specify recipients, subject, and the content of the message, along with optional cc and bcc fields. - -INPUT example: -{ - "message": "Hello, this is a test email", - "to": ["recipient1@email.com", "recipient2@email.com"], - "subject": "Test Email", - "cc": ["cc1@email.com"], - "bcc": ["bcc1@email.com"] -} - -OUTPUT: -The output is a confirmation message with the ID of the sent email. If there is an error during the sending process, the tool will throw an error with a description of the problem. - -Example Output: -"Message sent. Message Id: unique_message_id_string" -`; - -export const SEARCH_DESCRIPTION = `A tool for searching email messages or threads in Gmail using a specific query. It offers the flexibility to choose between messages and threads as the search resource. - -INPUT example: -{ - "query": "specific search query", - "maxResults": 10, // Optional: number of results to return - "resource": "messages" // Optional: can be "messages" or "threads" -} - -OUTPUT: -The output is a JSON list of either email messages or threads, depending on the specified resource, that matches the search query. For 'messages', the output includes details like the message ID, thread ID, snippet, body, subject, and sender of each message. For 'threads', it includes the thread ID, snippet, body, subject, and sender of the first message in each thread. If no data is returned, or if the specified resource is invalid, the tool throws an error with a relevant message. - -Example Output for 'messages': -"Result for the query 'specific search query': -[ - { - 'id': 'message_id', - 'threadId': 'thread_id', - 'snippet': 'message snippet', - 'body': 'message body', - 'subject': 'message subject', - 'sender': 'sender's email' - }, - ... (other messages matching the query) -]" - -Example Output for 'threads': -"Result for the query 'specific search query': -[ - { - 'id': 'thread_id', - 'snippet': 'thread snippet', - 'body': 'first message body', - 'subject': 'first message subject', - 'sender': 'first message sender' - }, - ... (other threads matching the query) -]" -`; diff --git a/langchain/src/tools/gmail/get_message.ts b/langchain/src/tools/gmail/get_message.ts deleted file mode 100644 index 5864c427d7ce..000000000000 --- a/langchain/src/tools/gmail/get_message.ts +++ /dev/null @@ -1,95 +0,0 @@ -import { z } from "zod"; -import { GmailBaseToolParams, GmailBaseTool } from "./base.js"; -import { GET_MESSAGE_DESCRIPTION } from "./descriptions.js"; - -export class GmailGetMessage extends GmailBaseTool { - name = "gmail_get_message"; - - schema = z.object({ - messageId: z.string(), - }); - - description = GET_MESSAGE_DESCRIPTION; - - constructor(fields?: GmailBaseToolParams) { - super(fields); - } - - async _call(arg: z.output) { - const { messageId } = arg; - - const message = await this.gmail.users.messages.get({ - userId: "me", - id: messageId, - }); - - const { data } = message; - - if (!data) { - throw new Error("No data returned from Gmail"); - } - - const { payload } = data; - - if (!payload) { - throw new Error("No payload returned from Gmail"); - } - - const { headers } = payload; - - if (!headers) { - throw new Error("No headers returned from Gmail"); - } - - const subject = headers.find((header) => header.name === "Subject"); - - if (!subject) { - throw new Error("No subject returned from Gmail"); - } - - const body = headers.find((header) => header.name === "Body"); - - if (!body) { - throw new Error("No body returned from Gmail"); - } - - const from = headers.find((header) => header.name === "From"); - - if (!from) { - throw new Error("No from returned from Gmail"); - } - - const to = headers.find((header) => header.name === "To"); - - if (!to) { - throw new Error("No to returned from Gmail"); - } - - const date = headers.find((header) => header.name === "Date"); - - if (!date) { - throw new Error("No date returned from Gmail"); - } - - const messageIdHeader = headers.find( - (header) => header.name === "Message-ID" - ); - - if (!messageIdHeader) { - throw new Error("No message id returned from Gmail"); - } - - return `Result for the prompt ${messageId} \n${JSON.stringify({ - subject: subject.value, - body: body.value, - from: from.value, - to: to.value, - date: date.value, - messageId: messageIdHeader.value, - })}`; - } -} - -export type GetMessageSchema = { - messageId: string; -}; diff --git a/langchain/src/tools/gmail/get_thread.ts b/langchain/src/tools/gmail/get_thread.ts deleted file mode 100644 index 0310bf053d0c..000000000000 --- a/langchain/src/tools/gmail/get_thread.ts +++ /dev/null @@ -1,105 +0,0 @@ -import { z } from "zod"; -import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; -import { GET_THREAD_DESCRIPTION } from "./descriptions.js"; - -export class GmailGetThread extends GmailBaseTool { - name = "gmail_get_thread"; - - schema = z.object({ - threadId: z.string(), - }); - - description = GET_THREAD_DESCRIPTION; - - constructor(fields?: GmailBaseToolParams) { - super(fields); - } - - async _call(arg: z.output) { - const { threadId } = arg; - - const thread = await this.gmail.users.threads.get({ - userId: "me", - id: threadId, - }); - - const { data } = thread; - - if (!data) { - throw new Error("No data returned from Gmail"); - } - - const { messages } = data; - - if (!messages) { - throw new Error("No messages returned from Gmail"); - } - - return `Result for the prompt ${threadId} \n${JSON.stringify( - messages.map((message) => { - const { payload } = message; - - if (!payload) { - throw new Error("No payload returned from Gmail"); - } - - const { headers } = payload; - - if (!headers) { - throw new Error("No headers returned from Gmail"); - } - - const subject = headers.find((header) => header.name === "Subject"); - - if (!subject) { - throw new Error("No subject returned from Gmail"); - } - - const body = headers.find((header) => header.name === "Body"); - - if (!body) { - throw new Error("No body returned from Gmail"); - } - - const from = headers.find((header) => header.name === "From"); - - if (!from) { - throw new Error("No from returned from Gmail"); - } - - const to = headers.find((header) => header.name === "To"); - - if (!to) { - throw new Error("No to returned from Gmail"); - } - - const date = headers.find((header) => header.name === "Date"); - - if (!date) { - throw new Error("No date returned from Gmail"); - } - - const messageIdHeader = headers.find( - (header) => header.name === "Message-ID" - ); - - if (!messageIdHeader) { - throw new Error("No message id returned from Gmail"); - } - - return { - subject: subject.value, - body: body.value, - from: from.value, - to: to.value, - date: date.value, - messageId: messageIdHeader.value, - }; - }) - )}`; - } -} - -export type GetThreadSchema = { - threadId: string; -}; diff --git a/langchain/src/tools/gmail/index.ts b/langchain/src/tools/gmail/index.ts index d2f854da54a4..1756394505f0 100644 --- a/langchain/src/tools/gmail/index.ts +++ b/langchain/src/tools/gmail/index.ts @@ -1,12 +1 @@ -export { GmailCreateDraft } from "./create_draft.js"; -export { GmailGetMessage } from "./get_message.js"; -export { GmailGetThread } from "./get_thread.js"; -export { GmailSearch } from "./search.js"; -export { GmailSendMessage } from "./send_message.js"; - -export type { GmailBaseToolParams } from "./base.js"; -export type { CreateDraftSchema } from "./create_draft.js"; -export type { GetMessageSchema } from "./get_message.js"; -export type { GetThreadSchema } from "./get_thread.js"; -export type { SearchSchema } from "./search.js"; -export type { SendMessageSchema } from "./send_message.js"; +export * from "@langchain/community/tools/gmail"; \ No newline at end of file diff --git a/langchain/src/tools/gmail/search.ts b/langchain/src/tools/gmail/search.ts deleted file mode 100644 index 9957a11c8c3b..000000000000 --- a/langchain/src/tools/gmail/search.ts +++ /dev/null @@ -1,135 +0,0 @@ -import { gmail_v1 } from "googleapis"; -import { z } from "zod"; -import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; -import { SEARCH_DESCRIPTION } from "./descriptions.js"; - -export class GmailSearch extends GmailBaseTool { - name = "search_gmail"; - - schema = z.object({ - query: z.string(), - maxResults: z.number().optional(), - resource: z.enum(["messages", "threads"]).optional(), - }); - - description = SEARCH_DESCRIPTION; - - constructor(fields?: GmailBaseToolParams) { - super(fields); - } - - async _call(arg: z.output) { - const { query, maxResults = 10, resource = "messages" } = arg; - - const response = await this.gmail.users.messages.list({ - userId: "me", - q: query, - maxResults, - }); - - const { data } = response; - - if (!data) { - throw new Error("No data returned from Gmail"); - } - - const { messages } = data; - - if (!messages) { - throw new Error("No messages returned from Gmail"); - } - - if (resource === "messages") { - const parsedMessages = await this.parseMessages(messages); - return `Result for the query ${query}:\n${JSON.stringify( - parsedMessages - )}`; - } else if (resource === "threads") { - const parsedThreads = await this.parseThreads(messages); - return `Result for the query ${query}:\n${JSON.stringify(parsedThreads)}`; - } - - throw new Error(`Invalid resource: ${resource}`); - } - - async parseMessages( - messages: gmail_v1.Schema$Message[] - ): Promise { - const parsedMessages = await Promise.all( - messages.map(async (message) => { - const messageData = await this.gmail.users.messages.get({ - userId: "me", - format: "raw", - id: message.id ?? "", - }); - - const headers = messageData.data.payload?.headers || []; - - const subject = headers.find((header) => header.name === "Subject"); - const sender = headers.find((header) => header.name === "From"); - - let body = ""; - if (messageData.data.payload?.parts) { - body = messageData.data.payload.parts - .map((part) => part.body?.data ?? "") - .join(""); - } else if (messageData.data.payload?.body?.data) { - body = messageData.data.payload.body.data; - } - - return { - id: message.id, - threadId: message.threadId, - snippet: message.snippet, - body, - subject, - sender, - }; - }) - ); - return parsedMessages; - } - - async parseThreads( - threads: gmail_v1.Schema$Thread[] - ): Promise { - const parsedThreads = await Promise.all( - threads.map(async (thread) => { - const threadData = await this.gmail.users.threads.get({ - userId: "me", - format: "raw", - id: thread.id ?? "", - }); - - const headers = threadData.data.messages?.[0]?.payload?.headers || []; - - const subject = headers.find((header) => header.name === "Subject"); - const sender = headers.find((header) => header.name === "From"); - - let body = ""; - if (threadData.data.messages?.[0]?.payload?.parts) { - body = threadData.data.messages[0].payload.parts - .map((part) => part.body?.data ?? "") - .join(""); - } else if (threadData.data.messages?.[0]?.payload?.body?.data) { - body = threadData.data.messages[0].payload.body.data; - } - - return { - id: thread.id, - snippet: thread.snippet, - body, - subject, - sender, - }; - }) - ); - return parsedThreads; - } -} - -export type SearchSchema = { - query: string; - maxResults?: number; - resource?: "messages" | "threads"; -}; diff --git a/langchain/src/tools/gmail/send_message.ts b/langchain/src/tools/gmail/send_message.ts deleted file mode 100644 index b995dfbff0b2..000000000000 --- a/langchain/src/tools/gmail/send_message.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { z } from "zod"; -import { GmailBaseTool, GmailBaseToolParams } from "./base.js"; -import { GET_MESSAGE_DESCRIPTION } from "./descriptions.js"; - -export class GmailSendMessage extends GmailBaseTool { - name = "gmail_send_message"; - - schema = z.object({ - message: z.string(), - to: z.array(z.string()), - subject: z.string(), - cc: z.array(z.string()).optional(), - bcc: z.array(z.string()).optional(), - }); - - description = GET_MESSAGE_DESCRIPTION; - - constructor(fields?: GmailBaseToolParams) { - super(fields); - } - - private createEmailMessage({ - message, - to, - subject, - cc, - bcc, - }: z.infer): string { - const emailLines: string[] = []; - - // Format the recipient(s) - const formatEmailList = (emails: string | string[]): string => - Array.isArray(emails) ? emails.join(",") : emails; - - emailLines.push(`To: ${formatEmailList(to)}`); - if (cc) emailLines.push(`Cc: ${formatEmailList(cc)}`); - if (bcc) emailLines.push(`Bcc: ${formatEmailList(bcc)}`); - emailLines.push(`Subject: ${subject}`); - emailLines.push(""); - emailLines.push(message); - - // Convert the email message to base64url string - const email = emailLines.join("\r\n").trim(); - // this encode may be an issue - return Buffer.from(email).toString("base64url"); - } - - async _call({ - message, - to, - subject, - cc, - bcc, - }: z.output): Promise { - const rawMessage = this.createEmailMessage({ - message, - to, - subject, - cc, - bcc, - }); - - try { - const response = await this.gmail.users.messages.send({ - userId: "me", - requestBody: { - raw: rawMessage, - }, - }); - - return `Message sent. Message Id: ${response.data.id}`; - } catch (error) { - throw new Error(`An error occurred while sending the message: ${error}`); - } - } -} - -export type SendMessageSchema = { - message: string; - to: string[]; - subject: string; - cc?: string[]; - bcc?: string[]; -}; diff --git a/langchain/src/tools/google_custom_search.ts b/langchain/src/tools/google_custom_search.ts index 003353d2419a..7fc55bf7e3b7 100644 --- a/langchain/src/tools/google_custom_search.ts +++ b/langchain/src/tools/google_custom_search.ts @@ -1,83 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * Interface for parameters required by GoogleCustomSearch class. - */ -export interface GoogleCustomSearchParams { - apiKey?: string; - googleCSEId?: string; -} - -/** - * Class that uses the Google Search API to perform custom searches. - * Requires environment variables `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` to - * be set. - */ -export class GoogleCustomSearch extends Tool { - static lc_name() { - return "GoogleCustomSearch"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "GOOGLE_API_KEY", - }; - } - - name = "google-custom-search"; - - protected apiKey: string; - - protected googleCSEId: string; - - description = - "a custom search engine. useful for when you need to answer questions about current events. input should be a search query. outputs a JSON array of results."; - - constructor( - fields: GoogleCustomSearchParams = { - apiKey: getEnvironmentVariable("GOOGLE_API_KEY"), - googleCSEId: getEnvironmentVariable("GOOGLE_CSE_ID"), - } - ) { - super(...arguments); - if (!fields.apiKey) { - throw new Error( - `Google API key not set. You can set it as "GOOGLE_API_KEY" in your environment variables.` - ); - } - if (!fields.googleCSEId) { - throw new Error( - `Google custom search engine id not set. You can set it as "GOOGLE_CSE_ID" in your environment variables.` - ); - } - this.apiKey = fields.apiKey; - this.googleCSEId = fields.googleCSEId; - } - - async _call(input: string) { - const res = await fetch( - `https://www.googleapis.com/customsearch/v1?key=${this.apiKey}&cx=${ - this.googleCSEId - }&q=${encodeURIComponent(input)}` - ); - - if (!res.ok) { - throw new Error( - `Got ${res.status} error from Google custom search: ${res.statusText}` - ); - } - - const json = await res.json(); - - const results = - json?.items?.map( - (item: { title?: string; link?: string; snippet?: string }) => ({ - title: item.title, - link: item.link, - snippet: item.snippet, - }) - ) ?? []; - return JSON.stringify(results); - } -} +export * from "@langchain/community/tools/google_custom_search"; \ No newline at end of file diff --git a/langchain/src/tools/google_places.ts b/langchain/src/tools/google_places.ts index 826a33e22c74..518b22160db9 100644 --- a/langchain/src/tools/google_places.ts +++ b/langchain/src/tools/google_places.ts @@ -1,96 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * Interface for parameters required by GooglePlacesAPI class. - */ -export interface GooglePlacesAPIParams { - apiKey?: string; -} - -/** - * Tool that queries the Google Places API - */ -export class GooglePlacesAPI extends Tool { - static lc_name() { - return "GooglePlacesAPI"; - } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiKey: "GOOGLE_PLACES_API_KEY", - }; - } - - name = "google_places"; - - protected apiKey: string; - - description = `A wrapper around Google Places API. Useful for when you need to validate or - discover addresses from ambiguous text. Input should be a search query.`; - - constructor(fields?: GooglePlacesAPIParams) { - super(...arguments); - const apiKey = - fields?.apiKey ?? getEnvironmentVariable("GOOGLE_PLACES_API_KEY"); - if (apiKey === undefined) { - throw new Error( - `Google Places API key not set. You can set it as "GOOGLE_PLACES_API_KEY" in your environment variables.` - ); - } - this.apiKey = apiKey; - } - - async _call(input: string) { - const res = await fetch( - `https://places.googleapis.com/v1/places:searchText`, - { - method: "POST", - body: JSON.stringify({ - textQuery: input, - languageCode: "en", - }), - headers: { - "X-Goog-Api-Key": this.apiKey, - "X-Goog-FieldMask": - "places.displayName,places.formattedAddress,places.id,places.internationalPhoneNumber,places.websiteUri", - "Content-Type": "application/json", - }, - } - ); - - if (!res.ok) { - let message; - try { - const json = await res.json(); - message = json.error.message; - } catch (e) { - message = - "Unable to parse error message: Google did not return a JSON response."; - } - throw new Error( - `Got ${res.status}: ${res.statusText} error from Google Places API: ${message}` - ); - } - - const json = await res.json(); - - const results = - json?.places?.map( - (place: { - id?: string; - internationalPhoneNumber?: string; - formattedAddress?: string; - websiteUri?: string; - displayName?: { text?: string }; - }) => ({ - name: place.displayName?.text, - id: place.id, - address: place.formattedAddress, - phoneNumber: place.internationalPhoneNumber, - website: place.websiteUri, - }) - ) ?? []; - return JSON.stringify(results); - } -} +export * from "@langchain/community/tools/google_places"; \ No newline at end of file diff --git a/langchain/src/tools/searchapi.ts b/langchain/src/tools/searchapi.ts index fb48c34581db..98eec816d9fb 100644 --- a/langchain/src/tools/searchapi.ts +++ b/langchain/src/tools/searchapi.ts @@ -1,204 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -type JSONPrimitive = string | number | boolean | null; -type JSONValue = JSONPrimitive | JSONObject | JSONArray; -interface JSONObject { - [key: string]: JSONValue; -} -interface JSONArray extends Array {} - -function isJSONObject(value: JSONValue): value is JSONObject { - return value !== null && typeof value === "object" && !Array.isArray(value); -} - -/** - * SearchApiParameters Type Definition. - * - * For more parameters and supported search engines, refer specific engine documentation: - * Google - https://www.searchapi.io/docs/google - * Google News - https://www.searchapi.io/docs/google-news - * Google Scholar - https://www.searchapi.io/docs/google-scholar - * YouTube Transcripts - https://www.searchapi.io/docs/youtube-transcripts - * and others. - * - */ -export type SearchApiParameters = { - [key: string]: JSONValue; -}; - -/** - * SearchApi Class Definition. - * - * Provides a wrapper around the SearchApi. - * - * Ensure you've set the SEARCHAPI_API_KEY environment variable for authentication. - * You can obtain a free API key from https://www.searchapi.io/. - * @example - * ```typescript - * const searchApi = new SearchApi("your-api-key", { - * engine: "google_news", - * }); - * const agent = RunnableSequence.from([ - * ChatPromptTemplate.fromMessages([ - * ["ai", "Answer the following questions using a bulleted list markdown format.""], - * ["human", "{input}"], - * ]), - * new ChatOpenAI({ temperature: 0 }), - * (input: BaseMessageChunk) => ({ - * log: "test", - * returnValues: { - * output: input, - * }, - * }), - * ]); - * const executor = AgentExecutor.fromAgentAndTools({ - * agent, - * tools: [searchApi], - * }); - * const res = await executor.invoke({ - * input: "What's happening in Ukraine today?"", - * }); - * console.log(res); - * ``` - */ -export class SearchApi extends Tool { - static lc_name() { - return "SearchApi"; - } - - /** - * Converts the SearchApi instance to JSON. This method is not implemented - * and will throw an error if called. - * @returns Throws an error. - */ - toJSON() { - return this.toJSONNotImplemented(); - } - - protected apiKey: string; - - protected params: Partial; - - constructor( - apiKey: string | undefined = getEnvironmentVariable("SEARCHAPI_API_KEY"), - params: Partial = {} - ) { - super(...arguments); - - if (!apiKey) { - throw new Error( - "SearchApi requires an API key. Please set it as SEARCHAPI_API_KEY in your .env file, or pass it as a parameter to the SearchApi constructor." - ); - } - - this.apiKey = apiKey; - this.params = params; - } - - name = "search"; - - /** - * Builds a URL for the SearchApi request. - * @param parameters The parameters for the request. - * @returns A string representing the built URL. - */ - protected buildUrl(searchQuery: string): string { - const preparedParams: [string, string][] = Object.entries({ - engine: "google", - api_key: this.apiKey, - ...this.params, - q: searchQuery, - }) - .filter( - ([key, value]) => - value !== undefined && value !== null && key !== "apiKey" - ) - .map(([key, value]) => [key, `${value}`]); - - const searchParams = new URLSearchParams(preparedParams); - return `https://www.searchapi.io/api/v1/search?${searchParams}`; - } - - /** @ignore */ - /** - * Calls the SearchAPI. - * - * Accepts an input query and fetches the result from SearchApi. - * - * @param {string} input - Search query. - * @returns {string} - Formatted search results or an error message. - * - * NOTE: This method is the core search handler and processes various types - * of search results including Google organic results, videos, jobs, and images. - */ - async _call(input: string) { - const resp = await fetch(this.buildUrl(input)); - - const json = await resp.json(); - - if (json.error) { - throw new Error( - `Failed to load search results from SearchApi due to: ${json.error}` - ); - } - - // Google Search results - if (json.answer_box?.answer) { - return json.answer_box.answer; - } - - if (json.answer_box?.snippet) { - return json.answer_box.snippet; - } - - if (json.knowledge_graph?.description) { - return json.knowledge_graph.description; - } - - // Organic results (Google, Google News) - if (json.organic_results) { - const snippets = json.organic_results - .filter((r: JSONObject) => r.snippet) - .map((r: JSONObject) => r.snippet); - return snippets.join("\n"); - } - - // Google Jobs results - if (json.jobs) { - const jobDescriptions = json.jobs - .slice(0, 1) - .filter((r: JSONObject) => r.description) - .map((r: JSONObject) => r.description); - return jobDescriptions.join("\n"); - } - - // Google Videos results - if (json.videos) { - const videoInfo = json.videos - .filter((r: JSONObject) => r.title && r.link) - .map((r: JSONObject) => `Title: "${r.title}" Link: ${r.link}`); - return videoInfo.join("\n"); - } - - // Google Images results - if (json.images) { - const image_results = json.images.slice(0, 15); - const imageInfo = image_results - .filter( - (r: JSONObject) => - r.title && r.original && isJSONObject(r.original) && r.original.link - ) - .map( - (r: JSONObject) => - `Title: "${r.title}" Link: ${(r.original as JSONObject).link}` - ); - return imageInfo.join("\n"); - } - - return "No good search result found"; - } - - description = - "a search engine. useful for when you need to answer questions about current events. input should be a search query."; -} +export * from "@langchain/community/tools/searchapi"; \ No newline at end of file diff --git a/langchain/src/tools/searxng_search.ts b/langchain/src/tools/searxng_search.ts index 8b1c353e949d..6fe25a45e714 100644 --- a/langchain/src/tools/searxng_search.ts +++ b/langchain/src/tools/searxng_search.ts @@ -1,258 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * Interface for the results returned by the Searxng search. - */ -interface SearxngResults { - query: string; - number_of_results: number; - results: Array<{ - url: string; - title: string; - content: string; - img_src: string; - engine: string; - parsed_url: Array; - template: string; - engines: Array; - positions: Array; - score: number; - category: string; - pretty_url: string; - open_group?: boolean; - close_group?: boolean; - }>; - answers: Array; - corrections: Array; - infoboxes: Array<{ - infobox: string; - content: string; - engine: string; - engines: Array; - }>; - suggestions: Array; - unresponsive_engines: Array; -} - -/** - * Interface for custom headers used in the Searxng search. - */ -interface SearxngCustomHeaders { - [key: string]: string; -} - -interface SearxngSearchParams { - /** - * @default 10 - * Number of results included in results - */ - numResults?: number; - /** Comma separated list, specifies the active search categories - * https://docs.searxng.org/user/configured_engines.html#configured-engines - */ - categories?: string; - - /** Comma separated list, specifies the active search engines - * https://docs.searxng.org/user/configured_engines.html#configured-engines - */ - engines?: string; - - /** Code of the language. */ - language?: string; - /** Search page number. */ - pageNumber?: number; - /** - * day / month / year - * - * Time range of search for engines which support it. See if an engine supports time range search in the preferences page of an instance. - */ - timeRange?: number; - - /** - * Throws Error if format is set anything other than "json" - * Output format of results. Format needs to be activated in search: - */ - format?: "json"; - /** Open search results on new tab. */ - resultsOnNewTab?: 0 | 1; - /** Proxy image results through SearXNG. */ - imageProxy?: boolean; - autocomplete?: string; - /** - * Filter search results of engines which support safe search. See if an engine supports safe search in the preferences page of an instance. - */ - safesearch?: 0 | 1 | 2; -} - -/** - * SearxngSearch class represents a meta search engine tool. - * Use this class when you need to answer questions about current events. - * The input should be a search query, and the output is a JSON array of the query results. - * - * note: works best with *agentType*: `structured-chat-zero-shot-react-description` - * https://github.com/searxng/searxng - * @example - * ```typescript - * const executor = AgentExecutor.fromAgentAndTools({ - * agent, - * tools: [ - * new SearxngSearch({ - * params: { - * format: "json", - * engines: "google", - * }, - * headers: {}, - * }), - * ], - * }); - * const result = await executor.invoke({ - * input: `What is Langchain? Describe in 50 words`, - * }); - * ``` - */ -export class SearxngSearch extends Tool { - static lc_name() { - return "SearxngSearch"; - } - - name = "searxng-search"; - - description = - "A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query. Output is a JSON array of the query results"; - - protected apiBase?: string; - - protected params?: SearxngSearchParams = { - numResults: 10, - pageNumber: 1, - format: "json", - imageProxy: true, - safesearch: 0, - }; - - protected headers?: SearxngCustomHeaders; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - apiBase: "SEARXNG_API_BASE", - }; - } - - /** - * Constructor for the SearxngSearch class - * @param apiBase Base URL of the Searxng instance - * @param params SearxNG parameters - * @param headers Custom headers - */ - constructor({ - apiBase, - params, - headers, - }: { - /** Base URL of Searxng instance */ - apiBase?: string; - - /** SearxNG Paramerters - * - * https://docs.searxng.org/dev/search_api.html check here for more details - */ - params?: SearxngSearchParams; - - /** - * Custom headers - * Set custom headers if you're using a api from RapidAPI (https://rapidapi.com/iamrony777/api/searxng) - * No headers needed for a locally self-hosted instance - */ - headers?: SearxngCustomHeaders; - }) { - super(...arguments); - - this.apiBase = getEnvironmentVariable("SEARXNG_API_BASE") || apiBase; - this.headers = { "content-type": "application/json", ...headers }; - - if (!this.apiBase) { - throw new Error( - `SEARXNG_API_BASE not set. You can set it as "SEARXNG_API_BASE" in your environment variables.` - ); - } - - if (params) { - this.params = { ...this.params, ...params }; - } - } - - /** - * Builds the URL for the Searxng search. - * @param path The path for the URL. - * @param parameters The parameters for the URL. - * @param baseUrl The base URL. - * @returns The complete URL as a string. - */ - protected buildUrl

( - path: string, - parameters: P, - baseUrl: string - ): string { - const nonUndefinedParams: [string, string][] = Object.entries(parameters) - .filter(([_, value]) => value !== undefined) - .map(([key, value]) => [key, value.toString()]); // Avoid string conversion - const searchParams = new URLSearchParams(nonUndefinedParams); - return `${baseUrl}/${path}?${searchParams}`; - } - - async _call(input: string): Promise { - const queryParams = { - q: input, - ...this.params, - }; - const url = this.buildUrl("search", queryParams, this.apiBase as string); - - const resp = await fetch(url, { - method: "POST", - headers: this.headers, - signal: AbortSignal.timeout(5 * 1000), // 5 seconds - }); - - if (!resp.ok) { - throw new Error(resp.statusText); - } - - const res: SearxngResults = await resp.json(); - - if ( - !res.results.length && - !res.answers.length && - !res.infoboxes.length && - !res.suggestions.length - ) { - return "No good results found."; - } else if (res.results.length) { - const response: string[] = []; - - res.results.forEach((r) => { - response.push( - JSON.stringify({ - title: r.title || "", - link: r.url || "", - snippet: r.content || "", - }) - ); - }); - - return response.slice(0, this.params?.numResults).toString(); - } else if (res.answers.length) { - return res.answers[0]; - } else if (res.infoboxes.length) { - return res.infoboxes[0]?.content.replaceAll(/<[^>]+>/gi, ""); - } else if (res.suggestions.length) { - let suggestions = "Suggestions: "; - res.suggestions.forEach((s) => { - suggestions += `${s}, `; - }); - return suggestions; - } else { - return "No good results found."; - } - } -} +export * from "@langchain/community/tools/searxng_search"; \ No newline at end of file diff --git a/langchain/src/tools/serpapi.ts b/langchain/src/tools/serpapi.ts index fc3044e9d658..b4b4c57b92e8 100644 --- a/langchain/src/tools/serpapi.ts +++ b/langchain/src/tools/serpapi.ts @@ -1,505 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * This does not use the `serpapi` package because it appears to cause issues - * when used in `jest` tests. Part of the issue seems to be that the `serpapi` - * package imports a wasm module to use instead of native `fetch`, which we - * don't want anyway. - * - * NOTE: you must provide location, gl and hl or your region and language will - * may not match your location, and will not be deterministic. - */ - -// Copied over from `serpapi` package -interface BaseParameters { - /** - * Parameter defines the device to use to get the results. It can be set to - * `desktop` (default) to use a regular browser, `tablet` to use a tablet browser - * (currently using iPads), or `mobile` to use a mobile browser (currently - * using iPhones). - */ - device?: "desktop" | "tablet" | "mobile"; - /** - * Parameter will force SerpApi to fetch the Google results even if a cached - * version is already present. A cache is served only if the query and all - * parameters are exactly the same. Cache expires after 1h. Cached searches - * are free, and are not counted towards your searches per month. It can be set - * to `false` (default) to allow results from the cache, or `true` to disallow - * results from the cache. `no_cache` and `async` parameters should not be used together. - */ - no_cache?: boolean; - /** - * Specify the client-side timeout of the request. In milliseconds. - */ - timeout?: number; -} - -export interface SerpAPIParameters extends BaseParameters { - /** - * Search Query - * Parameter defines the query you want to search. You can use anything that you - * would use in a regular Google search. e.g. `inurl:`, `site:`, `intitle:`. We - * also support advanced search query parameters such as as_dt and as_eq. See the - * [full list](https://serpapi.com/advanced-google-query-parameters) of supported - * advanced search query parameters. - */ - q: string; - /** - * Location - * Parameter defines from where you want the search to originate. If several - * locations match the location requested, we'll pick the most popular one. Head to - * [/locations.json API](https://serpapi.com/locations-api) if you need more - * precise control. location and uule parameters can't be used together. Avoid - * utilizing location when setting the location outside the U.S. when using Google - * Shopping and/or Google Product API. - */ - location?: string; - /** - * Encoded Location - * Parameter is the Google encoded location you want to use for the search. uule - * and location parameters can't be used together. - */ - uule?: string; - /** - * Google Place ID - * Parameter defines the id (`CID`) of the Google My Business listing you want to - * scrape. Also known as Google Place ID. - */ - ludocid?: string; - /** - * Additional Google Place ID - * Parameter that you might have to use to force the knowledge graph map view to - * show up. You can find the lsig ID by using our [Local Pack - * API](https://serpapi.com/local-pack) or [Local Places Results - * API](https://serpapi.com/local-results). - * lsig ID is also available via a redirect Google uses within [Google My - * Business](https://www.google.com/business/). - */ - lsig?: string; - /** - * Google Knowledge Graph ID - * Parameter defines the id (`KGMID`) of the Google Knowledge Graph listing you - * want to scrape. Also known as Google Knowledge Graph ID. Searches with kgmid - * parameter will return results for the originally encrypted search parameters. - * For some searches, kgmid may override all other parameters except start, and num - * parameters. - */ - kgmid?: string; - /** - * Google Cached Search Parameters ID - * Parameter defines the cached search parameters of the Google Search you want to - * scrape. Searches with si parameter will return results for the originally - * encrypted search parameters. For some searches, si may override all other - * parameters except start, and num parameters. si can be used to scrape Google - * Knowledge Graph Tabs. - */ - si?: string; - /** - * Domain - * Parameter defines the Google domain to use. It defaults to `google.com`. Head to - * the [Google domains page](https://serpapi.com/google-domains) for a full list of - * supported Google domains. - */ - google_domain?: string; - /** - * Country - * Parameter defines the country to use for the Google search. It's a two-letter - * country code. (e.g., `us` for the United States, `uk` for United Kingdom, or - * `fr` for France). Head to the [Google countries - * page](https://serpapi.com/google-countries) for a full list of supported Google - * countries. - */ - gl?: string; - /** - * Language - * Parameter defines the language to use for the Google search. It's a two-letter - * language code. (e.g., `en` for English, `es` for Spanish, or `fr` for French). - * Head to the [Google languages page](https://serpapi.com/google-languages) for a - * full list of supported Google languages. - */ - hl?: string; - /** - * Set Multiple Languages - * Parameter defines one or multiple languages to limit the search to. It uses - * `lang_{two-letter language code}` to specify languages and `|` as a delimiter. - * (e.g., `lang_fr|lang_de` will only search French and German pages). Head to the - * [Google lr languages page](https://serpapi.com/google-lr-languages) for a full - * list of supported languages. - */ - lr?: string; - /** - * as_dt - * Parameter controls whether to include or exclude results from the site named in - * the as_sitesearch parameter. - */ - as_dt?: string; - /** - * as_epq - * Parameter identifies a phrase that all documents in the search results must - * contain. You can also use the [phrase - * search](https://developers.google.com/custom-search/docs/xml_results#PhraseSearchqt) - * query term to search for a phrase. - */ - as_epq?: string; - /** - * as_eq - * Parameter identifies a word or phrase that should not appear in any documents in - * the search results. You can also use the [exclude - * query](https://developers.google.com/custom-search/docs/xml_results#Excludeqt) - * term to ensure that a particular word or phrase will not appear in the documents - * in a set of search results. - */ - as_eq?: string; - /** - * as_lq - * Parameter specifies that all search results should contain a link to a - * particular URL. You can also use the - * [link:](https://developers.google.com/custom-search/docs/xml_results#BackLinksqt) - * query term for this type of query. - */ - as_lq?: string; - /** - * as_nlo - * Parameter specifies the starting value for a search range. Use as_nlo and as_nhi - * to append an inclusive search range. - */ - as_nlo?: string; - /** - * as_nhi - * Parameter specifies the ending value for a search range. Use as_nlo and as_nhi - * to append an inclusive search range. - */ - as_nhi?: string; - /** - * as_oq - * Parameter provides additional search terms to check for in a document, where - * each document in the search results must contain at least one of the additional - * search terms. You can also use the [Boolean - * OR](https://developers.google.com/custom-search/docs/xml_results#BooleanOrqt) - * query term for this type of query. - */ - as_oq?: string; - /** - * as_q - * Parameter provides search terms to check for in a document. This parameter is - * also commonly used to allow users to specify additional terms to search for - * within a set of search results. - */ - as_q?: string; - /** - * as_qdr - * Parameter requests search results from a specified time period (quick date - * range). The following values are supported: - * `d[number]`: requests results from the specified number of past days. Example - * for the past 10 days: `as_qdr=d10` - * `w[number]`: requests results from the specified number of past weeks. - * `m[number]`: requests results from the specified number of past months. - * `y[number]`: requests results from the specified number of past years. Example - * for the past year: `as_qdr=y` - */ - as_qdr?: string; - /** - * as_rq - * Parameter specifies that all search results should be pages that are related to - * the specified URL. The parameter value should be a URL. You can also use the - * [related:](https://developers.google.com/custom-search/docs/xml_results#RelatedLinksqt) - * query term for this type of query. - */ - as_rq?: string; - /** - * as_sitesearch - * Parameter allows you to specify that all search results should be pages from a - * given site. By setting the as_dt parameter, you can also use it to exclude pages - * from a given site from your search resutls. - */ - as_sitesearch?: string; - /** - * Advanced Search Parameters - * (to be searched) parameter defines advanced search parameters that aren't - * possible in the regular query field. (e.g., advanced search for patents, dates, - * news, videos, images, apps, or text contents). - */ - tbs?: string; - /** - * Adult Content Filtering - * Parameter defines the level of filtering for adult content. It can be set to - * `active`, or `off` (default). - */ - safe?: string; - /** - * Exclude Auto-corrected Results - * Parameter defines the exclusion of results from an auto-corrected query that is - * spelled wrong. It can be set to `1` to exclude these results, or `0` to include - * them (default). - */ - nfpr?: string; - /** - * Results Filtering - * Parameter defines if the filters for 'Similar Results' and 'Omitted Results' are - * on or off. It can be set to `1` (default) to enable these filters, or `0` to - * disable these filters. - */ - filter?: string; - /** - * Search Type - * (to be matched) parameter defines the type of search you want to do. - * It can be set to: - * `(no tbm parameter)`: regular Google Search, - * `isch`: [Google Images API](https://serpapi.com/images-results), - * `lcl` - [Google Local API](https://serpapi.com/local-results) - * `vid`: [Google Videos API](https://serpapi.com/videos-results), - * `nws`: [Google News API](https://serpapi.com/news-results), - * `shop`: [Google Shopping API](https://serpapi.com/shopping-results), - * or any other Google service. - */ - tbm?: string; - /** - * Result Offset - * Parameter defines the result offset. It skips the given number of results. It's - * used for pagination. (e.g., `0` (default) is the first page of results, `10` is - * the 2nd page of results, `20` is the 3rd page of results, etc.). - * Google Local Results only accepts multiples of `20`(e.g. `20` for the second - * page results, `40` for the third page results, etc.) as the start value. - */ - start?: number; - /** - * Number of Results - * Parameter defines the maximum number of results to return. (e.g., `10` (default) - * returns 10 results, `40` returns 40 results, and `100` returns 100 results). - */ - num?: string; - /** - * Page Number (images) - * Parameter defines the page number for [Google - * Images](https://serpapi.com/images-results). There are 100 images per page. This - * parameter is equivalent to start (offset) = ijn * 100. This parameter works only - * for [Google Images](https://serpapi.com/images-results) (set tbm to `isch`). - */ - ijn?: string; -} - -type UrlParameters = Record< - string, - string | number | boolean | undefined | null ->; - -/** - * Wrapper around SerpAPI. - * - * To use, you should have the `serpapi` package installed and the SERPAPI_API_KEY environment variable set. - */ -export class SerpAPI extends Tool { - static lc_name() { - return "SerpAPI"; - } - - toJSON() { - return this.toJSONNotImplemented(); - } - - protected key: string; - - protected params: Partial; - - protected baseUrl: string; - - constructor( - apiKey: string | undefined = getEnvironmentVariable("SERPAPI_API_KEY"), - params: Partial = {}, - baseUrl = "https://serpapi.com" - ) { - super(...arguments); - - if (!apiKey) { - throw new Error( - "SerpAPI API key not set. You can set it as SERPAPI_API_KEY in your .env file, or pass it to SerpAPI." - ); - } - - this.key = apiKey; - this.params = params; - this.baseUrl = baseUrl; - } - - name = "search"; - - /** - * Builds a URL for the SerpAPI request. - * @param path The path for the request. - * @param parameters The parameters for the request. - * @param baseUrl The base URL for the request. - * @returns A string representing the built URL. - */ - protected buildUrl

( - path: string, - parameters: P, - baseUrl: string - ): string { - const nonUndefinedParams: [string, string][] = Object.entries(parameters) - .filter(([_, value]) => value !== undefined) - .map(([key, value]) => [key, `${value}`]); - const searchParams = new URLSearchParams(nonUndefinedParams); - return `${baseUrl}/${path}?${searchParams}`; - } - - /** @ignore */ - async _call(input: string) { - const { timeout, ...params } = this.params; - const resp = await fetch( - this.buildUrl( - "search", - { - ...params, - api_key: this.key, - q: input, - }, - this.baseUrl - ), - { - signal: timeout ? AbortSignal.timeout(timeout) : undefined, - } - ); - - const res = await resp.json(); - - if (res.error) { - throw new Error(`Got error from serpAPI: ${res.error}`); - } - - const answer_box = res.answer_box_list - ? res.answer_box_list[0] - : res.answer_box; - if (answer_box) { - if (answer_box.result) { - return answer_box.result; - } else if (answer_box.answer) { - return answer_box.answer; - } else if (answer_box.snippet) { - return answer_box.snippet; - } else if (answer_box.snippet_highlighted_words) { - return answer_box.snippet_highlighted_words.toString(); - } else { - const answer: { [key: string]: string } = {}; - Object.keys(answer_box) - .filter( - (k) => - !Array.isArray(answer_box[k]) && - typeof answer_box[k] !== "object" && - !( - typeof answer_box[k] === "string" && - answer_box[k].startsWith("http") - ) - ) - .forEach((k) => { - answer[k] = answer_box[k]; - }); - return JSON.stringify(answer); - } - } - - if (res.events_results) { - return JSON.stringify(res.events_results); - } - - if (res.sports_results) { - return JSON.stringify(res.sports_results); - } - - if (res.top_stories) { - return JSON.stringify(res.top_stories); - } - - if (res.news_results) { - return JSON.stringify(res.news_results); - } - - if (res.jobs_results?.jobs) { - return JSON.stringify(res.jobs_results.jobs); - } - - if (res.questions_and_answers) { - return JSON.stringify(res.questions_and_answers); - } - - if (res.popular_destinations?.destinations) { - return JSON.stringify(res.popular_destinations.destinations); - } - - if (res.top_sights?.sights) { - const sights: Array<{ [key: string]: string }> = res.top_sights.sights - .map((s: { [key: string]: string }) => ({ - title: s.title, - description: s.description, - price: s.price, - })) - .slice(0, 8); - return JSON.stringify(sights); - } - - if (res.shopping_results && res.shopping_results[0]?.title) { - return JSON.stringify(res.shopping_results.slice(0, 3)); - } - - if (res.images_results && res.images_results[0]?.thumbnail) { - return res.images_results - .map((ir: { thumbnail: string }) => ir.thumbnail) - .slice(0, 10) - .toString(); - } - - const snippets = []; - if (res.knowledge_graph) { - if (res.knowledge_graph.description) { - snippets.push(res.knowledge_graph.description); - } - - const title = res.knowledge_graph.title || ""; - Object.keys(res.knowledge_graph) - .filter( - (k) => - typeof res.knowledge_graph[k] === "string" && - k !== "title" && - k !== "description" && - !k.endsWith("_stick") && - !k.endsWith("_link") && - !k.startsWith("http") - ) - .forEach((k) => - snippets.push(`${title} ${k}: ${res.knowledge_graph[k]}`) - ); - } - - const first_organic_result = res.organic_results?.[0]; - if (first_organic_result) { - if (first_organic_result.snippet) { - snippets.push(first_organic_result.snippet); - } else if (first_organic_result.snippet_highlighted_words) { - snippets.push(first_organic_result.snippet_highlighted_words); - } else if (first_organic_result.rich_snippet) { - snippets.push(first_organic_result.rich_snippet); - } else if (first_organic_result.rich_snippet_table) { - snippets.push(first_organic_result.rich_snippet_table); - } else if (first_organic_result.link) { - snippets.push(first_organic_result.link); - } - } - - if (res.buying_guide) { - snippets.push(res.buying_guide); - } - - if (res.local_results?.places) { - snippets.push(res.local_results.places); - } - - if (snippets.length > 0) { - return JSON.stringify(snippets); - } else { - return "No good search result found"; - } - } - - description = - "a search engine. useful for when you need to answer questions about current events. input should be a search query."; -} +export * from "@langchain/community/tools/serpapi"; \ No newline at end of file diff --git a/langchain/src/tools/serper.ts b/langchain/src/tools/serper.ts index 622dd6398aa9..e3b237d66e84 100644 --- a/langchain/src/tools/serper.ts +++ b/langchain/src/tools/serper.ts @@ -1,107 +1 @@ -import { getEnvironmentVariable } from "../util/env.js"; -import { Tool } from "./base.js"; - -/** - * Defines the parameters that can be passed to the Serper class during - * instantiation. It includes `gl` and `hl` which are optional. - */ -export type SerperParameters = { - gl?: string; - hl?: string; -}; - -/** - * Wrapper around serper. - * - * You can create a free API key at https://serper.dev. - * - * To use, you should have the SERPER_API_KEY environment variable set. - */ -export class Serper extends Tool { - static lc_name() { - return "Serper"; - } - - /** - * Converts the Serper instance to JSON. This method is not implemented - * and will throw an error if called. - * @returns Throws an error. - */ - toJSON() { - return this.toJSONNotImplemented(); - } - - protected key: string; - - protected params: Partial; - - constructor( - apiKey: string | undefined = getEnvironmentVariable("SERPER_API_KEY"), - params: Partial = {} - ) { - super(); - - if (!apiKey) { - throw new Error( - "Serper API key not set. You can set it as SERPER_API_KEY in your .env file, or pass it to Serper." - ); - } - - this.key = apiKey; - this.params = params; - } - - name = "search"; - - /** @ignore */ - async _call(input: string) { - const options = { - method: "POST", - headers: { - "X-API-KEY": this.key, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - q: input, - ...this.params, - }), - }; - - const res = await fetch("https://google.serper.dev/search", options); - - if (!res.ok) { - throw new Error(`Got ${res.status} error from serper: ${res.statusText}`); - } - - const json = await res.json(); - - if (json.answerBox?.answer) { - return json.answerBox.answer; - } - - if (json.answerBox?.snippet) { - return json.answerBox.snippet; - } - - if (json.answerBox?.snippet_highlighted_words) { - return json.answerBox.snippet_highlighted_words[0]; - } - - if (json.sportsResults?.game_spotlight) { - return json.sportsResults.game_spotlight; - } - - if (json.knowledgeGraph?.description) { - return json.knowledgeGraph.description; - } - - if (json.organic?.[0]?.snippet) { - return json.organic[0].snippet; - } - - return "No good search result found"; - } - - description = - "a search engine. useful for when you need to answer questions about current events. input should be a search query."; -} +export * from "@langchain/community/tools/serper"; \ No newline at end of file diff --git a/langchain/src/tools/wikipedia_query_run.ts b/langchain/src/tools/wikipedia_query_run.ts index 127010b46cf3..50a73f33b228 100644 --- a/langchain/src/tools/wikipedia_query_run.ts +++ b/langchain/src/tools/wikipedia_query_run.ts @@ -1,181 +1 @@ -import { Tool } from "./base.js"; - -/** - * Interface for the parameters that can be passed to the - * WikipediaQueryRun constructor. - */ -export interface WikipediaQueryRunParams { - topKResults?: number; - maxDocContentLength?: number; - baseUrl?: string; -} - -/** - * Type alias for URL parameters. Represents a record where keys are - * strings and values can be string, number, boolean, undefined, or null. - */ -type UrlParameters = Record< - string, - string | number | boolean | undefined | null ->; - -/** - * Interface for the structure of search results returned by the Wikipedia - * API. - */ -interface SearchResults { - query: { - search: Array<{ - title: string; - }>; - }; -} - -/** - * Interface for the structure of a page returned by the Wikipedia API. - */ -interface Page { - pageid: number; - ns: number; - title: string; - extract: string; -} - -/** - * Interface for the structure of a page result returned by the Wikipedia - * API. - */ -interface PageResult { - batchcomplete: string; - query: { - pages: Record; - }; -} - -/** - * Class for interacting with and fetching data from the Wikipedia API. It - * extends the Tool class. - * @example - * ```typescript - * const wikipediaQuery = new WikipediaQueryRun({ - * topKResults: 3, - * maxDocContentLength: 4000, - * }); - * const result = await wikipediaQuery.call("Langchain"); - * ``` - */ -export class WikipediaQueryRun extends Tool { - static lc_name() { - return "WikipediaQueryRun"; - } - - name = "wikipedia-api"; - - description = - "A tool for interacting with and fetching data from the Wikipedia API."; - - protected topKResults = 3; - - protected maxDocContentLength = 4000; - - protected baseUrl = "https://en.wikipedia.org/w/api.php"; - - constructor(params: WikipediaQueryRunParams = {}) { - super(); - - this.topKResults = params.topKResults ?? this.topKResults; - this.maxDocContentLength = - params.maxDocContentLength ?? this.maxDocContentLength; - this.baseUrl = params.baseUrl ?? this.baseUrl; - } - - async _call(query: string): Promise { - const searchResults = await this._fetchSearchResults(query); - const summaries: string[] = []; - - for ( - let i = 0; - i < Math.min(this.topKResults, searchResults.query.search.length); - i += 1 - ) { - const page = searchResults.query.search[i].title; - const pageDetails = await this._fetchPage(page, true); - - if (pageDetails) { - const summary = `Page: ${page}\nSummary: ${pageDetails.extract}`; - summaries.push(summary); - } - } - - if (summaries.length === 0) { - return "No good Wikipedia Search Result was found"; - } else { - return summaries.join("\n\n").slice(0, this.maxDocContentLength); - } - } - - /** - * Fetches the content of a specific Wikipedia page. It returns the - * extracted content as a string. - * @param page The specific Wikipedia page to fetch its content. - * @param redirect A boolean value to indicate whether to redirect or not. - * @returns The extracted content of the specific Wikipedia page as a string. - */ - public async content(page: string, redirect = true): Promise { - try { - const result = await this._fetchPage(page, redirect); - return result.extract; - } catch (error) { - throw new Error(`Failed to fetch content for page "${page}": ${error}`); - } - } - - /** - * Builds a URL for the Wikipedia API using the provided parameters. - * @param parameters The parameters to be used in building the URL. - * @returns A string representing the built URL. - */ - protected buildUrl

(parameters: P): string { - const nonUndefinedParams: [string, string][] = Object.entries(parameters) - .filter(([_, value]) => value !== undefined) - .map(([key, value]) => [key, `${value}`]); - const searchParams = new URLSearchParams(nonUndefinedParams); - return `${this.baseUrl}?${searchParams}`; - } - - private async _fetchSearchResults(query: string): Promise { - const searchParams = new URLSearchParams({ - action: "query", - list: "search", - srsearch: query, - format: "json", - }); - - const response = await fetch(`${this.baseUrl}?${searchParams.toString()}`); - if (!response.ok) throw new Error("Network response was not ok"); - - const data: SearchResults = await response.json(); - - return data; - } - - private async _fetchPage(page: string, redirect: boolean): Promise { - const params = new URLSearchParams({ - action: "query", - prop: "extracts", - explaintext: "true", - redirects: redirect ? "1" : "0", - format: "json", - titles: page, - }); - - const response = await fetch(`${this.baseUrl}?${params.toString()}`); - if (!response.ok) throw new Error("Network response was not ok"); - - const data: PageResult = await response.json(); - const { pages } = data.query; - const pageId = Object.keys(pages)[0]; - - return pages[pageId]; - } -} +export * from "@langchain/community/tools/wikipedia_query_run"; \ No newline at end of file diff --git a/langchain/src/tools/wolframalpha.ts b/langchain/src/tools/wolframalpha.ts index 76658428a734..fd5efb48da91 100644 --- a/langchain/src/tools/wolframalpha.ts +++ b/langchain/src/tools/wolframalpha.ts @@ -1,41 +1 @@ -import { Tool, ToolParams } from "./base.js"; - -/** - * @example - * ```typescript - * const tool = new WolframAlphaTool({ - * appid: "YOUR_APP_ID", - * }); - * const res = await tool.invoke("What is 2 * 2?"); - * ``` - */ -export class WolframAlphaTool extends Tool { - appid: string; - - name = "wolfram_alpha"; - - description = `A wrapper around Wolfram Alpha. Useful for when you need to answer questions about Math, Science, Technology, Culture, Society and Everyday Life. Input should be a search query.`; - - constructor(fields: ToolParams & { appid: string }) { - super(fields); - - this.appid = fields.appid; - } - - get lc_namespace() { - return [...super.lc_namespace, "wolframalpha"]; - } - - static lc_name() { - return "WolframAlphaTool"; - } - - async _call(query: string): Promise { - const url = `https://www.wolframalpha.com/api/v1/llm-api?appid=${ - this.appid - }&input=${encodeURIComponent(query)}`; - const res = await fetch(url); - - return res.text(); - } -} +export * from "@langchain/community/tools/wolframalpha"; \ No newline at end of file diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index ac4231d97dad..df5ca14400ed 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -298,15 +298,15 @@ retrievers/tavily_search_api.d.ts retrievers/zep.cjs retrievers/zep.js retrievers/zep.d.ts -cache/cloudflare_kv.cjs -cache/cloudflare_kv.js -cache/cloudflare_kv.d.ts -cache/momento.cjs -cache/momento.js -cache/momento.d.ts -cache/upstash_redis.cjs -cache/upstash_redis.js -cache/upstash_redis.d.ts +caches/cloudflare_kv.cjs +caches/cloudflare_kv.js +caches/cloudflare_kv.d.ts +caches/momento.cjs +caches/momento.js +caches/momento.d.ts +caches/upstash_redis.cjs +caches/upstash_redis.js +caches/upstash_redis.d.ts graphs/neo4j_graph.cjs graphs/neo4j_graph.js graphs/neo4j_graph.d.ts diff --git a/libs/langchain-community/cache/cloudflare_kv.cjs b/libs/langchain-community/cache/cloudflare_kv.cjs new file mode 100644 index 000000000000..b2c7c86e3052 --- /dev/null +++ b/libs/langchain-community/cache/cloudflare_kv.cjs @@ -0,0 +1 @@ +module.exports = require('../dist/cache/cloudflare_kv.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/cloudflare_kv.d.ts b/libs/langchain-community/cache/cloudflare_kv.d.ts new file mode 100644 index 000000000000..694d0a8efdb2 --- /dev/null +++ b/libs/langchain-community/cache/cloudflare_kv.d.ts @@ -0,0 +1 @@ +export * from '../dist/cache/cloudflare_kv.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/cloudflare_kv.js b/libs/langchain-community/cache/cloudflare_kv.js new file mode 100644 index 000000000000..694d0a8efdb2 --- /dev/null +++ b/libs/langchain-community/cache/cloudflare_kv.js @@ -0,0 +1 @@ +export * from '../dist/cache/cloudflare_kv.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.cjs b/libs/langchain-community/cache/momento.cjs new file mode 100644 index 000000000000..8720815d3117 --- /dev/null +++ b/libs/langchain-community/cache/momento.cjs @@ -0,0 +1 @@ +module.exports = require('../dist/cache/momento.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.d.ts b/libs/langchain-community/cache/momento.d.ts new file mode 100644 index 000000000000..4dc60e32d9e8 --- /dev/null +++ b/libs/langchain-community/cache/momento.d.ts @@ -0,0 +1 @@ +export * from '../dist/cache/momento.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.js b/libs/langchain-community/cache/momento.js new file mode 100644 index 000000000000..4dc60e32d9e8 --- /dev/null +++ b/libs/langchain-community/cache/momento.js @@ -0,0 +1 @@ +export * from '../dist/cache/momento.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.cjs b/libs/langchain-community/cache/upstash_redis.cjs new file mode 100644 index 000000000000..ac5c1eb58ab0 --- /dev/null +++ b/libs/langchain-community/cache/upstash_redis.cjs @@ -0,0 +1 @@ +module.exports = require('../dist/cache/upstash_redis.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.d.ts b/libs/langchain-community/cache/upstash_redis.d.ts new file mode 100644 index 000000000000..7bc892103ff9 --- /dev/null +++ b/libs/langchain-community/cache/upstash_redis.d.ts @@ -0,0 +1 @@ +export * from '../dist/cache/upstash_redis.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.js b/libs/langchain-community/cache/upstash_redis.js new file mode 100644 index 000000000000..7bc892103ff9 --- /dev/null +++ b/libs/langchain-community/cache/upstash_redis.js @@ -0,0 +1 @@ +export * from '../dist/cache/upstash_redis.js' \ No newline at end of file diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 1fbb67b17711..0666be1d853c 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1095,20 +1095,20 @@ "import": "./retrievers/zep.js", "require": "./retrievers/zep.cjs" }, - "./cache/cloudflare_kv": { - "types": "./cache/cloudflare_kv.d.ts", - "import": "./cache/cloudflare_kv.js", - "require": "./cache/cloudflare_kv.cjs" + "./caches/cloudflare_kv": { + "types": "./caches/cloudflare_kv.d.ts", + "import": "./caches/cloudflare_kv.js", + "require": "./caches/cloudflare_kv.cjs" }, - "./cache/momento": { - "types": "./cache/momento.d.ts", - "import": "./cache/momento.js", - "require": "./cache/momento.cjs" + "./caches/momento": { + "types": "./caches/momento.d.ts", + "import": "./caches/momento.js", + "require": "./caches/momento.cjs" }, - "./cache/upstash_redis": { - "types": "./cache/upstash_redis.d.ts", - "import": "./cache/upstash_redis.js", - "require": "./cache/upstash_redis.cjs" + "./caches/upstash_redis": { + "types": "./caches/upstash_redis.d.ts", + "import": "./caches/upstash_redis.js", + "require": "./caches/upstash_redis.cjs" }, "./graphs/neo4j_graph": { "types": "./graphs/neo4j_graph.d.ts", @@ -1419,15 +1419,15 @@ "retrievers/zep.cjs", "retrievers/zep.js", "retrievers/zep.d.ts", - "cache/cloudflare_kv.cjs", - "cache/cloudflare_kv.js", - "cache/cloudflare_kv.d.ts", - "cache/momento.cjs", - "cache/momento.js", - "cache/momento.d.ts", - "cache/upstash_redis.cjs", - "cache/upstash_redis.js", - "cache/upstash_redis.d.ts", + "caches/cloudflare_kv.cjs", + "caches/cloudflare_kv.js", + "caches/cloudflare_kv.d.ts", + "caches/momento.cjs", + "caches/momento.js", + "caches/momento.d.ts", + "caches/upstash_redis.cjs", + "caches/upstash_redis.js", + "caches/upstash_redis.d.ts", "graphs/neo4j_graph.cjs", "graphs/neo4j_graph.js", "graphs/neo4j_graph.d.ts" diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index 64e2c738a76d..507e8b4f6e8a 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -115,9 +115,9 @@ const entrypoints = { "retrievers/tavily_search_api": "retrievers/tavily_search_api", "retrievers/zep": "retrievers/zep", // cache - "cache/cloudflare_kv": "cache/cloudflare_kv", - "cache/momento": "cache/momento", - "cache/upstash_redis": "cache/upstash_redis", + "caches/cloudflare_kv": "caches/cloudflare_kv", + "caches/momento": "caches/momento", + "caches/upstash_redis": "caches/upstash_redis", // graphs "graphs/neo4j_graph": "graphs/neo4j_graph", }; diff --git a/libs/langchain-community/src/cache/cloudflare_kv.ts b/libs/langchain-community/src/caches/cloudflare_kv.ts similarity index 100% rename from libs/langchain-community/src/cache/cloudflare_kv.ts rename to libs/langchain-community/src/caches/cloudflare_kv.ts diff --git a/libs/langchain-community/src/cache/momento.ts b/libs/langchain-community/src/caches/momento.ts similarity index 98% rename from libs/langchain-community/src/cache/momento.ts rename to libs/langchain-community/src/caches/momento.ts index 9243d5efbd34..565286037518 100644 --- a/libs/langchain-community/src/cache/momento.ts +++ b/libs/langchain-community/src/caches/momento.ts @@ -14,7 +14,7 @@ import { } from "@langchain/core/caches"; import { Generation } from "@langchain/core/outputs"; -import { ensureCacheExists } from "../util/momento.js"; +import { ensureCacheExists } from "../utils/momento.js"; /** * The settings to instantiate the Momento standard cache. diff --git a/libs/langchain-community/src/cache/tests/momento.test.ts b/libs/langchain-community/src/caches/tests/momento.test.ts similarity index 100% rename from libs/langchain-community/src/cache/tests/momento.test.ts rename to libs/langchain-community/src/caches/tests/momento.test.ts diff --git a/libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts b/libs/langchain-community/src/caches/tests/upstash_redis.int.test.ts similarity index 100% rename from libs/langchain-community/src/cache/tests/upstash_redis.int.test.ts rename to libs/langchain-community/src/caches/tests/upstash_redis.int.test.ts diff --git a/libs/langchain-community/src/cache/tests/upstash_redis.test.ts b/libs/langchain-community/src/caches/tests/upstash_redis.test.ts similarity index 100% rename from libs/langchain-community/src/cache/tests/upstash_redis.test.ts rename to libs/langchain-community/src/caches/tests/upstash_redis.test.ts diff --git a/libs/langchain-community/src/cache/upstash_redis.ts b/libs/langchain-community/src/caches/upstash_redis.ts similarity index 100% rename from libs/langchain-community/src/cache/upstash_redis.ts rename to libs/langchain-community/src/caches/upstash_redis.ts diff --git a/libs/langchain-community/src/chat_models/bedrock/index.ts b/libs/langchain-community/src/chat_models/bedrock/index.ts index 07b5ab549ace..a7c2cd0d116e 100644 --- a/libs/langchain-community/src/chat_models/bedrock/index.ts +++ b/libs/langchain-community/src/chat_models/bedrock/index.ts @@ -2,7 +2,7 @@ import { defaultProvider } from "@aws-sdk/credential-provider-node"; import type { BaseChatModelParams } from "@langchain/core/language_models/chat_models"; -import { BaseBedrockInput } from "../../util/bedrock.js"; +import { BaseBedrockInput } from "../../utils/bedrock.js"; import { BedrockChat as BaseBedrockChat } from "./web.js"; /** diff --git a/libs/langchain-community/src/chat_models/bedrock/web.ts b/libs/langchain-community/src/chat_models/bedrock/web.ts index 35581ed68483..a6a8b224c469 100644 --- a/libs/langchain-community/src/chat_models/bedrock/web.ts +++ b/libs/langchain-community/src/chat_models/bedrock/web.ts @@ -22,7 +22,7 @@ import { BaseBedrockInput, BedrockLLMInputOutputAdapter, type CredentialType, -} from "../../util/bedrock.js"; +} from "../../utils/bedrock.js"; import type { SerializedFields } from "../../load/map_keys.js"; function convertOneMessageToText( diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts index 72d793d11a8c..2c76df6fb55c 100644 --- a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -13,7 +13,7 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import type { CloudflareWorkersAIInput } from "../llms/cloudflare_workersai.js"; -import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; +import { convertEventStreamToIterableReadableDataStream } from "../utils/event-source-parse.js"; /** * An interface defining the options for a Cloudflare Workers AI call. It extends diff --git a/libs/langchain-community/src/chat_models/googlevertexai/common.ts b/libs/langchain-community/src/chat_models/googlevertexai/common.ts index 7d55deb143d4..82bf1b3f99e4 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/common.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/common.ts @@ -17,7 +17,7 @@ import { import { GoogleVertexAILLMConnection, GoogleVertexAIStream, -} from "../../util/googlevertexai-connection.js"; +} from "../../utils/googlevertexai-connection.js"; import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, diff --git a/libs/langchain-community/src/chat_models/googlevertexai/index.ts b/libs/langchain-community/src/chat_models/googlevertexai/index.ts index e8a3a07da320..d93693e8fcc0 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/index.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/index.ts @@ -1,7 +1,7 @@ import { GoogleAuthOptions } from "google-auth-library"; import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; -import { GAuthClient } from "../../util/googlevertexai-gauth.js"; +import { GoogleVertexAILLMConnection } from "../../utils/googlevertexai-connection.js"; +import { GAuthClient } from "../../utils/googlevertexai-gauth.js"; /** * Enables calls to the Google Cloud's Vertex AI API to access diff --git a/libs/langchain-community/src/chat_models/googlevertexai/web.ts b/libs/langchain-community/src/chat_models/googlevertexai/web.ts index acbaa9144f4c..503058fc0e21 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/web.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/web.ts @@ -1,8 +1,8 @@ -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { GoogleVertexAILLMConnection } from "../../utils/googlevertexai-connection.js"; import { WebGoogleAuthOptions, WebGoogleAuth, -} from "../../util/googlevertexai-webauth.js"; +} from "../../utils/googlevertexai-webauth.js"; import { BaseChatGoogleVertexAI, GoogleVertexAIChatInput } from "./common.js"; /** diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts index 9f736e1916d0..d88d1ad696b1 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts @@ -10,7 +10,7 @@ import { import { BaseWebSocketStream, WebSocketStreamOptions, -} from "../../util/iflytek_websocket_stream.js"; +} from "../../utils/iflytek_websocket_stream.js"; /** * Type representing the role of a message in the Xinghuo chat model. diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts index ac54461be18a..681d6bdbc299 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/index.ts @@ -3,7 +3,7 @@ import { BaseChatIflytekXinghuo } from "./common.js"; import { BaseWebSocketStream, WebSocketStreamOptions, -} from "../../util/iflytek_websocket_stream.js"; +} from "../../utils/iflytek_websocket_stream.js"; class WebSocketStream extends BaseWebSocketStream { // eslint-disable-next-line @typescript-eslint/ban-ts-comment diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts index 87b372b802ad..df0db076085a 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/web.ts @@ -2,7 +2,7 @@ import { BaseChatIflytekXinghuo } from "./common.js"; import { WebSocketStreamOptions, BaseWebSocketStream, -} from "../../util/iflytek_websocket_stream.js"; +} from "../../utils/iflytek_websocket_stream.js"; class WebSocketStream extends BaseWebSocketStream { openWebSocket(url: string, options: WebSocketStreamOptions): WebSocket { diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts index f17b5be3ea39..6ecd35200cbf 100644 --- a/libs/langchain-community/src/chat_models/llama_cpp.ts +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -21,7 +21,7 @@ import { LlamaBaseCppInputs, createLlamaModel, createLlamaContext, -} from "../util/llama_cpp.js"; +} from "../utils/llama_cpp.js"; /** * Note that the modelPath is the only required parameter. For testing you diff --git a/libs/langchain-community/src/chat_models/ollama.ts b/libs/langchain-community/src/chat_models/ollama.ts index aaab895ea998..0eb695075354 100644 --- a/libs/langchain-community/src/chat_models/ollama.ts +++ b/libs/langchain-community/src/chat_models/ollama.ts @@ -12,7 +12,7 @@ import { import { ChatGenerationChunk } from "@langchain/core/outputs"; import type { StringWithAutocomplete } from "@langchain/core/utils/types"; -import { createOllamaStream, OllamaInput } from "../util/ollama.js"; +import { createOllamaStream, OllamaInput } from "../utils/ollama.js"; /** * An interface defining the options for an Ollama API call. It extends diff --git a/libs/langchain-community/src/embeddings/bedrock.ts b/libs/langchain-community/src/embeddings/bedrock.ts index 7fedd2f9a8f4..305387007cd0 100644 --- a/libs/langchain-community/src/embeddings/bedrock.ts +++ b/libs/langchain-community/src/embeddings/bedrock.ts @@ -3,7 +3,7 @@ import { InvokeModelCommand, } from "@aws-sdk/client-bedrock-runtime"; import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import type { CredentialType } from "../util/bedrock.js"; +import type { CredentialType } from "../utils/bedrock.js"; /** * Interface that extends EmbeddingsParams and defines additional diff --git a/libs/langchain-community/src/embeddings/cloudflare_workersai.ts b/libs/langchain-community/src/embeddings/cloudflare_workersai.ts index a6501e47b5af..20ff93e070c7 100644 --- a/libs/langchain-community/src/embeddings/cloudflare_workersai.ts +++ b/libs/langchain-community/src/embeddings/cloudflare_workersai.ts @@ -1,7 +1,7 @@ import { Ai } from "@cloudflare/ai"; import { Fetcher } from "@cloudflare/workers-types"; import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; type AiTextEmbeddingsInput = { text: string | string[]; diff --git a/libs/langchain-community/src/embeddings/cohere.ts b/libs/langchain-community/src/embeddings/cohere.ts index 7d6f102ae7ce..86e151ab7653 100644 --- a/libs/langchain-community/src/embeddings/cohere.ts +++ b/libs/langchain-community/src/embeddings/cohere.ts @@ -1,6 +1,6 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; /** * Interface that extends EmbeddingsParams and defines additional diff --git a/libs/langchain-community/src/embeddings/googlevertexai.ts b/libs/langchain-community/src/embeddings/googlevertexai.ts index 9e5aa25e741c..54b9e3d4ac89 100644 --- a/libs/langchain-community/src/embeddings/googlevertexai.ts +++ b/libs/langchain-community/src/embeddings/googlevertexai.ts @@ -6,8 +6,8 @@ import { GoogleVertexAIBaseLLMInput, GoogleVertexAILLMPredictions, } from "../types/googlevertexai-types.js"; -import { GoogleVertexAILLMConnection } from "../util/googlevertexai-connection.js"; -import { chunkArray } from "../util/chunk.js"; +import { GoogleVertexAILLMConnection } from "../utils/googlevertexai-connection.js"; +import { chunkArray } from "../utils/chunk.js"; /** * Defines the parameters required to initialize a diff --git a/libs/langchain-community/src/embeddings/gradient_ai.ts b/libs/langchain-community/src/embeddings/gradient_ai.ts index 0dbc3e932e70..f64f38483475 100644 --- a/libs/langchain-community/src/embeddings/gradient_ai.ts +++ b/libs/langchain-community/src/embeddings/gradient_ai.ts @@ -1,7 +1,7 @@ import { Gradient } from "@gradientai/nodejs-sdk"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; /** * Interface for GradientEmbeddings parameters. Extends EmbeddingsParams and diff --git a/libs/langchain-community/src/embeddings/hf_transformers.ts b/libs/langchain-community/src/embeddings/hf_transformers.ts index 92edeae1d4b0..b70b9fe4c646 100644 --- a/libs/langchain-community/src/embeddings/hf_transformers.ts +++ b/libs/langchain-community/src/embeddings/hf_transformers.ts @@ -1,6 +1,6 @@ import { Pipeline, pipeline } from "@xenova/transformers"; import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; export interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams { diff --git a/libs/langchain-community/src/embeddings/llama_cpp.ts b/libs/langchain-community/src/embeddings/llama_cpp.ts index 266f4b610948..aad4163ac499 100644 --- a/libs/langchain-community/src/embeddings/llama_cpp.ts +++ b/libs/langchain-community/src/embeddings/llama_cpp.ts @@ -4,7 +4,7 @@ import { LlamaBaseCppInputs, createLlamaModel, createLlamaContext, -} from "../util/llama_cpp.js"; +} from "../utils/llama_cpp.js"; /** * Note that the modelPath is the only required parameter. For testing you diff --git a/libs/langchain-community/src/embeddings/minimax.ts b/libs/langchain-community/src/embeddings/minimax.ts index 110116abd7c2..bf4594f9b5d3 100644 --- a/libs/langchain-community/src/embeddings/minimax.ts +++ b/libs/langchain-community/src/embeddings/minimax.ts @@ -1,6 +1,6 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; import { ConfigurationParameters } from "../chat_models/minimax.js"; /** diff --git a/libs/langchain-community/src/embeddings/ollama.ts b/libs/langchain-community/src/embeddings/ollama.ts index 39f8da6b0aa6..b1e63b8d7005 100644 --- a/libs/langchain-community/src/embeddings/ollama.ts +++ b/libs/langchain-community/src/embeddings/ollama.ts @@ -1,5 +1,5 @@ import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; -import { OllamaInput, OllamaRequestParams } from "../util/ollama.js"; +import { OllamaInput, OllamaRequestParams } from "../utils/ollama.js"; type CamelCasedRequestOptions = Omit< OllamaInput, diff --git a/libs/langchain-community/src/embeddings/voyage.ts b/libs/langchain-community/src/embeddings/voyage.ts index 30aa556478fe..8a023d5d7e28 100644 --- a/libs/langchain-community/src/embeddings/voyage.ts +++ b/libs/langchain-community/src/embeddings/voyage.ts @@ -1,6 +1,6 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; /** * Interface that extends EmbeddingsParams and defines additional diff --git a/libs/langchain-community/src/llms/bedrock/index.ts b/libs/langchain-community/src/llms/bedrock/index.ts index d87440d0641f..64f39a279671 100644 --- a/libs/langchain-community/src/llms/bedrock/index.ts +++ b/libs/langchain-community/src/llms/bedrock/index.ts @@ -1,6 +1,6 @@ import { defaultProvider } from "@aws-sdk/credential-provider-node"; import type { BaseLLMParams } from "@langchain/core/language_models/llms"; -import { BaseBedrockInput } from "../../util/bedrock.js"; +import { BaseBedrockInput } from "../../utils/bedrock.js"; import { Bedrock as BaseBedrock } from "./web.js"; export class Bedrock extends BaseBedrock { diff --git a/libs/langchain-community/src/llms/bedrock/web.ts b/libs/langchain-community/src/llms/bedrock/web.ts index a5d56edd36e0..638fe4ce91fe 100644 --- a/libs/langchain-community/src/llms/bedrock/web.ts +++ b/libs/langchain-community/src/llms/bedrock/web.ts @@ -14,7 +14,7 @@ import { BaseBedrockInput, BedrockLLMInputOutputAdapter, type CredentialType, -} from "../../util/bedrock.js"; +} from "../../utils/bedrock.js"; import type { SerializedFields } from "../../load/map_keys.js"; /** diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts index fe9b878ef26f..fd6f0135f1f6 100644 --- a/libs/langchain-community/src/llms/cloudflare_workersai.ts +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -3,7 +3,7 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { GenerationChunk } from "@langchain/core/outputs"; -import { convertEventStreamToIterableReadableDataStream } from "../util/event-source-parse.js"; +import { convertEventStreamToIterableReadableDataStream } from "../utils/event-source-parse.js"; /** * Interface for CloudflareWorkersAI input parameters. diff --git a/libs/langchain-community/src/llms/googlevertexai/common.ts b/libs/langchain-community/src/llms/googlevertexai/common.ts index 58ab3007ac94..90660fb2dba3 100644 --- a/libs/langchain-community/src/llms/googlevertexai/common.ts +++ b/libs/langchain-community/src/llms/googlevertexai/common.ts @@ -11,7 +11,7 @@ import { GoogleVertexAILLMConnection, GoogleVertexAIStream, GoogleVertexAILLMResponse, -} from "../../util/googlevertexai-connection.js"; +} from "../../utils/googlevertexai-connection.js"; import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, diff --git a/libs/langchain-community/src/llms/googlevertexai/index.ts b/libs/langchain-community/src/llms/googlevertexai/index.ts index c3c7cbd6127a..9406c3e01013 100644 --- a/libs/langchain-community/src/llms/googlevertexai/index.ts +++ b/libs/langchain-community/src/llms/googlevertexai/index.ts @@ -1,8 +1,8 @@ import { GoogleAuthOptions } from "google-auth-library"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +import { GoogleVertexAILLMConnection } from "../../utils/googlevertexai-connection.js"; import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; import { BaseGoogleVertexAI } from "./common.js"; -import { GAuthClient } from "../../util/googlevertexai-gauth.js"; +import { GAuthClient } from "../../utils/googlevertexai-gauth.js"; /** * Interface representing the input to the Google Vertex AI model. diff --git a/libs/langchain-community/src/llms/googlevertexai/web.ts b/libs/langchain-community/src/llms/googlevertexai/web.ts index 0b656308d53b..9ceb3cbf2285 100644 --- a/libs/langchain-community/src/llms/googlevertexai/web.ts +++ b/libs/langchain-community/src/llms/googlevertexai/web.ts @@ -1,8 +1,8 @@ import { WebGoogleAuth, WebGoogleAuthOptions, -} from "../../util/googlevertexai-webauth.js"; -import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js"; +} from "../../utils/googlevertexai-webauth.js"; +import { GoogleVertexAILLMConnection } from "../../utils/googlevertexai-connection.js"; import { GoogleVertexAIBaseLLMInput } from "../../types/googlevertexai-types.js"; import { BaseGoogleVertexAI } from "./common.js"; diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts index d5877fd34588..74c3397a408d 100644 --- a/libs/langchain-community/src/llms/llama_cpp.ts +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -12,7 +12,7 @@ import { createLlamaModel, createLlamaContext, createLlamaSession, -} from "../util/llama_cpp.js"; +} from "../utils/llama_cpp.js"; /** * Note that the modelPath is the only required parameter. For testing you diff --git a/libs/langchain-community/src/llms/ollama.ts b/libs/langchain-community/src/llms/ollama.ts index 31fb52e6b100..9b769e18b35c 100644 --- a/libs/langchain-community/src/llms/ollama.ts +++ b/libs/langchain-community/src/llms/ollama.ts @@ -7,7 +7,7 @@ import { createOllamaStream, OllamaInput, OllamaCallOptions, -} from "../util/ollama.js"; +} from "../utils/ollama.js"; /** * Class that represents the Ollama language model. It extends the base diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index 0b2952c46419..079aebba8ff5 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -63,8 +63,5 @@ export const optionalImportEntrypoints = [ "@langchain/community/retrievers/metal", "@langchain/community/retrievers/supabase", "@langchain/community/retrievers/zep", - "@langchain/community/cache/cloudflare_kv", - "@langchain/community/cache/momento", - "@langchain/community/cache/upstash_redis", "@langchain/community/graphs/neo4j_graph", ]; diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index d62b3d21484e..67274f55160f 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -37,3 +37,6 @@ export * as chat_models__yandex from "../chat_models/yandex.js"; export * as retrievers__chaindesk from "../retrievers/chaindesk.js"; export * as retrievers__databerry from "../retrievers/databerry.js"; export * as retrievers__tavily_search_api from "../retrievers/tavily_search_api.js"; +export * as caches__cloudflare_kv from "../caches/cloudflare_kv.js"; +export * as caches__momento from "../caches/momento.js"; +export * as caches__upstash_redis from "../caches/upstash_redis.js"; diff --git a/libs/langchain-community/src/load/import_type.d.ts b/libs/langchain-community/src/load/import_type.d.ts index 81cdc29e7d71..b30dc747f249 100644 --- a/libs/langchain-community/src/load/import_type.d.ts +++ b/libs/langchain-community/src/load/import_type.d.ts @@ -187,15 +187,6 @@ export interface OptionalImportMap { "@langchain/community/retrievers/zep"?: | typeof import("../retrievers/zep.js") | Promise; - "@langchain/community/cache/cloudflare_kv"?: - | typeof import("../cache/cloudflare_kv.js") - | Promise; - "@langchain/community/cache/momento"?: - | typeof import("../cache/momento.js") - | Promise; - "@langchain/community/cache/upstash_redis"?: - | typeof import("../cache/upstash_redis.js") - | Promise; "@langchain/community/graphs/neo4j_graph"?: | typeof import("../graphs/neo4j_graph.js") | Promise; diff --git a/libs/langchain-community/src/util/bedrock.ts b/libs/langchain-community/src/utils/bedrock.ts similarity index 100% rename from libs/langchain-community/src/util/bedrock.ts rename to libs/langchain-community/src/utils/bedrock.ts diff --git a/libs/langchain-community/src/util/chunk.ts b/libs/langchain-community/src/utils/chunk.ts similarity index 100% rename from libs/langchain-community/src/util/chunk.ts rename to libs/langchain-community/src/utils/chunk.ts diff --git a/libs/langchain-community/src/util/convex.ts b/libs/langchain-community/src/utils/convex.ts similarity index 100% rename from libs/langchain-community/src/util/convex.ts rename to libs/langchain-community/src/utils/convex.ts diff --git a/libs/langchain-community/src/util/event-source-parse.ts b/libs/langchain-community/src/utils/event-source-parse.ts similarity index 100% rename from libs/langchain-community/src/util/event-source-parse.ts rename to libs/langchain-community/src/utils/event-source-parse.ts diff --git a/libs/langchain-community/src/util/googlevertexai-connection.ts b/libs/langchain-community/src/utils/googlevertexai-connection.ts similarity index 100% rename from libs/langchain-community/src/util/googlevertexai-connection.ts rename to libs/langchain-community/src/utils/googlevertexai-connection.ts diff --git a/libs/langchain-community/src/util/googlevertexai-gauth.ts b/libs/langchain-community/src/utils/googlevertexai-gauth.ts similarity index 100% rename from libs/langchain-community/src/util/googlevertexai-gauth.ts rename to libs/langchain-community/src/utils/googlevertexai-gauth.ts diff --git a/libs/langchain-community/src/util/googlevertexai-webauth.ts b/libs/langchain-community/src/utils/googlevertexai-webauth.ts similarity index 100% rename from libs/langchain-community/src/util/googlevertexai-webauth.ts rename to libs/langchain-community/src/utils/googlevertexai-webauth.ts diff --git a/libs/langchain-community/src/util/iflytek_websocket_stream.ts b/libs/langchain-community/src/utils/iflytek_websocket_stream.ts similarity index 100% rename from libs/langchain-community/src/util/iflytek_websocket_stream.ts rename to libs/langchain-community/src/utils/iflytek_websocket_stream.ts diff --git a/libs/langchain-community/src/util/llama_cpp.ts b/libs/langchain-community/src/utils/llama_cpp.ts similarity index 100% rename from libs/langchain-community/src/util/llama_cpp.ts rename to libs/langchain-community/src/utils/llama_cpp.ts diff --git a/libs/langchain-community/src/util/momento.ts b/libs/langchain-community/src/utils/momento.ts similarity index 100% rename from libs/langchain-community/src/util/momento.ts rename to libs/langchain-community/src/utils/momento.ts diff --git a/libs/langchain-community/src/util/ollama.ts b/libs/langchain-community/src/utils/ollama.ts similarity index 100% rename from libs/langchain-community/src/util/ollama.ts rename to libs/langchain-community/src/utils/ollama.ts diff --git a/libs/langchain-community/src/util/testing.ts b/libs/langchain-community/src/utils/testing.ts similarity index 100% rename from libs/langchain-community/src/util/testing.ts rename to libs/langchain-community/src/utils/testing.ts diff --git a/libs/langchain-community/src/util/time.ts b/libs/langchain-community/src/utils/time.ts similarity index 100% rename from libs/langchain-community/src/util/time.ts rename to libs/langchain-community/src/utils/time.ts diff --git a/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts index b6204f916119..605aa7ab44ec 100644 --- a/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts +++ b/libs/langchain-community/src/vectorstores/cloudflare_vectorize.ts @@ -11,7 +11,7 @@ import { AsyncCaller, type AsyncCallerParams, } from "@langchain/core/utils/async_caller"; -import { chunkArray } from "../util/chunk.js"; +import { chunkArray } from "../utils/chunk.js"; export interface VectorizeLibArgs extends AsyncCallerParams { index: VectorizeIndex; diff --git a/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts deleted file mode 100644 index 46abb53472f5..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts +++ /dev/null @@ -1,169 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable import/no-extraneous-dependencies */ -import { test } from "@jest/globals"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; - -import { AnalyticDBVectorStore } from "../analyticdb.js"; - -const connectionOptions = { - host: process.env.ANALYTICDB_HOST || "localhost", - port: Number(process.env.ANALYTICDB_PORT) || 5432, - database: process.env.ANALYTICDB_DATABASE || "your_database", - user: process.env.ANALYTICDB_USERNAME || "username", - password: process.env.ANALYTICDB_PASSWORD || "password", -}; - -const embeddings = new OpenAIEmbeddings(); -const _LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536; - -beforeAll(async () => { - expect(process.env.ANALYTICDB_HOST).toBeDefined(); - expect(process.env.ANALYTICDB_PORT).toBeDefined(); - expect(process.env.ANALYTICDB_DATABASE).toBeDefined(); - expect(process.env.ANALYTICDB_USERNAME).toBeDefined(); - expect(process.env.ANALYTICDB_USERNAME).toBeDefined(); -}); - -test.skip("test analyticdb", async () => { - const vectorStore = new AnalyticDBVectorStore(embeddings, { - connectionOptions, - collectionName: "test_collection", - preDeleteCollection: true, - }); - expect(vectorStore).toBeDefined(); - - const createdAt = new Date().getTime(); - await vectorStore.addDocuments([ - { pageContent: "hi", metadata: { a: createdAt } }, - { pageContent: "bye", metadata: { a: createdAt } }, - { pageContent: "what's this", metadata: { a: createdAt } }, - { pageContent: createdAt.toString(), metadata: { a: createdAt } }, - ]); - - const results = await vectorStore.similaritySearch("what's this", 1); - - expect(results).toHaveLength(1); - expect(results).toEqual([ - new Document({ - pageContent: "what's this", - metadata: { a: createdAt }, - }), - ]); - - await vectorStore.end(); -}); - -test.skip("test analyticdb using filter", async () => { - const vectorStore = new AnalyticDBVectorStore(embeddings, { - connectionOptions, - collectionName: "test_collection", - embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, - preDeleteCollection: true, - }); - expect(vectorStore).toBeDefined(); - - const createdAt = new Date().getTime(); - await vectorStore.addDocuments([ - { pageContent: "foo", metadata: { a: createdAt, b: createdAt + 6 } }, - { pageContent: "bar", metadata: { a: createdAt + 1, b: createdAt + 7 } }, - { pageContent: "baz", metadata: { a: createdAt + 2, b: createdAt + 8 } }, - { pageContent: "foo", metadata: { a: createdAt + 3, b: createdAt + 9 } }, - { pageContent: "bar", metadata: { a: createdAt + 4, b: createdAt + 10 } }, - { pageContent: "baz", metadata: { a: createdAt + 5, b: createdAt + 11 } }, - ]); - - const results = await vectorStore.similaritySearch("bar", 1, { - a: createdAt + 4, - b: createdAt + 10, - }); - - expect(results).toHaveLength(1); - expect(results).toEqual([ - new Document({ - pageContent: "bar", - metadata: { a: createdAt + 4, b: createdAt + 10 }, - }), - ]); - - await vectorStore.end(); -}); - -test.skip("test analyticdb from texts", async () => { - const vectorStore = await AnalyticDBVectorStore.fromTexts( - ["Bye bye", "Hello world", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - embeddings, - { - connectionOptions, - collectionName: "test_collection", - embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, - preDeleteCollection: true, - } - ); - expect(vectorStore).toBeDefined(); - - const results = await vectorStore.similaritySearch("hello world", 1); - - expect(results).toHaveLength(1); - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 1, name: "1" }, - }), - ]); - - await vectorStore.end(); -}); - -test.skip("test analyticdb from existing index", async () => { - await AnalyticDBVectorStore.fromTexts( - ["Bye bye", "Hello world", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - embeddings, - { - connectionOptions, - collectionName: "test_collection", - embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, - preDeleteCollection: true, - } - ); - - const vectorStore = await AnalyticDBVectorStore.fromExistingIndex( - embeddings, - { - connectionOptions, - collectionName: "test_collection", - embeddingDimension: _LANGCHAIN_DEFAULT_EMBEDDING_DIM, - preDeleteCollection: false, - } - ); - - const result1 = await vectorStore.similaritySearch("hello world", 1); - expect(result1).toHaveLength(1); - expect(result1).toEqual([ - { pageContent: "Hello world", metadata: { id: 1, name: "1" } }, - ]); - - await vectorStore.addDocuments([ - { pageContent: "bar", metadata: { id: 4, name: "4" } }, - { pageContent: "baz", metadata: { id: 5, name: "5" } }, - ]); - - const result2 = await vectorStore.similaritySearch("bar", 2); - expect(result2).toHaveLength(2); - expect(result2).toEqual([ - { pageContent: "bar", metadata: { id: 4, name: "4" } }, - { pageContent: "baz", metadata: { id: 5, name: "5" } }, - ]); - - await vectorStore.end(); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts b/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts deleted file mode 100644 index d9d4df2d3a3d..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts +++ /dev/null @@ -1,363 +0,0 @@ -/* eslint-disable no-process-env */ -import { test, expect, describe } from "@jest/globals"; - -import { Client } from "cassandra-driver"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { CassandraStore } from "../cassandra.js"; - -const cassandraConfig = { - cloud: { - secureConnectBundle: process.env.CASSANDRA_SCB as string, - }, - credentials: { - username: "token", - password: process.env.CASSANDRA_TOKEN as string, - }, - keyspace: "test", - table: "test", -}; -const client = new Client(cassandraConfig); - -const noPartitionConfig = { - ...cassandraConfig, - dimensions: 1536, - primaryKey: { - name: "id", - type: "int", - }, - metadataColumns: [ - { - name: "name", - type: "text", - }, - { - name: "seq", - type: "int", - }, - ], -}; - -// yarn test:single /langchain/src/vectorstores/tests/cassandra.int.test.ts -// Note there are multiple describe functions that need to be un-skipped for internal testing -describe.skip("CassandraStore - no explicit partition key", () => { - beforeAll(async () => { - await client.execute("DROP TABLE IF EXISTS test.test;"); - }); - - test("CassandraStore.fromText", async () => { - const vectorStore = await CassandraStore.fromTexts( - ["I am blue", "Green yellow purple", "Hello there hello"], - [ - { id: 2, name: "Alex" }, - { id: 1, name: "Scott" }, - { id: 3, name: "Bubba" }, - ], - new OpenAIEmbeddings(), - noPartitionConfig - ); - - const results = await vectorStore.similaritySearch( - "Green yellow purple", - 1 - ); - expect(results).toEqual([ - new Document({ - pageContent: "Green yellow purple", - metadata: { id: 1, name: "Scott" }, - }), - ]); - }); - - test("CassandraStore.fromExistingIndex", async () => { - await CassandraStore.fromTexts( - ["Hey", "Whats up", "Hello"], - [ - { id: 2, name: "Alex" }, - { id: 1, name: "Scott" }, - { id: 3, name: "Bubba" }, - ], - new OpenAIEmbeddings(), - noPartitionConfig - ); - - const vectorStore = await CassandraStore.fromExistingIndex( - new OpenAIEmbeddings(), - noPartitionConfig - ); - - const results = await vectorStore.similaritySearch("Whats up", 1); - expect(results).toEqual([ - new Document({ - pageContent: "Whats up", - metadata: { id: 1, name: "Scott" }, - }), - ]); - }); - - test("CassandraStore.fromExistingIndex (with filter)", async () => { - const testConfig = { - ...noPartitionConfig, - indices: [ - { - name: "name", - value: "(name)", - }, - ], - }; - - await CassandraStore.fromTexts( - ["Hey", "Whats up", "Hello"], - [ - { id: 2, name: "Alex" }, - { id: 1, name: "Scott" }, - { id: 3, name: "Bubba" }, - ], - new OpenAIEmbeddings(), - testConfig - ); - - const vectorStore = await CassandraStore.fromExistingIndex( - new OpenAIEmbeddings(), - testConfig - ); - - const results = await vectorStore.similaritySearch("Hey", 1, { - name: "Bubba", - }); - expect(results).toEqual([ - new Document({ - pageContent: "Hello", - metadata: { id: 3, name: "Bubba" }, - }), - ]); - }); - - test("CassandraStore.fromExistingIndex (with inequality filter)", async () => { - const testConfig = { - ...noPartitionConfig, - indices: [ - { - name: "seq", - value: "(seq)", - }, - ], - }; - - await CassandraStore.fromTexts( - ["Hey", "Whats up", "Hello"], - [ - { id: 2, name: "Alex", seq: 99 }, - { id: 1, name: "Scott", seq: 88 }, - { id: 3, name: "Bubba", seq: 77 }, - ], - new OpenAIEmbeddings(), - testConfig - ); - - const vectorStore = await CassandraStore.fromExistingIndex( - new OpenAIEmbeddings(), - testConfig - ); - - // With out the filter this would match on Scott, but we are using > filter - const results = await vectorStore.similaritySearch("Whats up", 1, [ - { name: "seq", operator: ">", value: "88" }, - ]); - expect(results).toEqual([ - new Document({ - pageContent: "Hey", - metadata: { id: 2, name: "Alex", seq: 99 }, - }), - ]); - }); - - test("CassandraStore.addDocuments (with batch))", async () => { - const testConfig = { - ...noPartitionConfig, - maxConcurrency: 1, - batchSize: 5, - }; - - const docs: Document[] = []; - docs.push( - new Document({ - pageContent: "Hello Muddah, hello Faddah", - metadata: { id: 1, name: "Alex" }, - }) - ); - docs.push( - new Document({ - pageContent: "Here I am at Camp Granada", - metadata: { id: 2, name: "Blair" }, - }) - ); - docs.push( - new Document({ - pageContent: "Camp is very entertaining", - metadata: { id: 3, name: "Casey" }, - }) - ); - docs.push( - new Document({ - pageContent: "And they say we'll have some fun if it stops raining", - metadata: { id: 4, name: "Dana" }, - }) - ); - - docs.push( - new Document({ - pageContent: "I went hiking with Joe Spivey", - metadata: { id: 5, name: "Amber" }, - }) - ); - docs.push( - new Document({ - pageContent: "He developed poison ivy", - metadata: { id: 6, name: "Blair" }, - }) - ); - docs.push( - new Document({ - pageContent: "You remember Leonard Skinner", - metadata: { id: 7, name: "Casey" }, - }) - ); - docs.push( - new Document({ - pageContent: "He got Ptomaine poisoning last night after dinner", - metadata: { id: 8, name: "Dana" }, - }) - ); - - docs.push( - new Document({ - pageContent: "All the counsellors hate the waiters", - metadata: { id: 9, name: "Amber" }, - }) - ); - docs.push( - new Document({ - pageContent: "And the lake has alligators", - metadata: { id: 10, name: "Blair" }, - }) - ); - docs.push( - new Document({ - pageContent: "And the head coach wants no sissies", - metadata: { id: 11, name: "Casey" }, - }) - ); - docs.push( - new Document({ - pageContent: "So he reads to us from something called Ulysses", - metadata: { id: 12, name: "Dana" }, - }) - ); - - const vectorStore = await CassandraStore.fromExistingIndex( - new OpenAIEmbeddings(), - testConfig - ); - - await vectorStore.addDocuments(docs); - - const results = await vectorStore.similaritySearch( - "something called Ulysses", - 1 - ); - expect(results).toEqual([ - new Document({ - pageContent: "So he reads to us from something called Ulysses", - metadata: { id: 12, name: "Dana" }, - }), - ]); - }); -}); - -const partitionConfig = { - ...noPartitionConfig, - primaryKey: [ - { - name: "group", - type: "int", - partition: true, - }, - { - name: "ts", - type: "timestamp", - }, - { - name: "id", - type: "int", - }, - ], - withClause: "CLUSTERING ORDER BY (ts DESC)", -}; - -describe.skip("CassandraStore - with explicit partition key", () => { - beforeAll(async () => { - await client.execute("DROP TABLE IF EXISTS test.test;"); - }); - - test("CassandraStore.partitionKey", async () => { - const vectorStore = await CassandraStore.fromTexts( - ["Hey", "Hey"], - [ - { group: 1, ts: new Date(1655377200000), id: 1, name: "Alex" }, - { group: 2, ts: new Date(1655377200000), id: 1, name: "Alice" }, - ], - new OpenAIEmbeddings(), - partitionConfig - ); - - const results = await vectorStore.similaritySearch("Hey", 1, { - group: 2, - }); - - console.debug(`results: ${JSON.stringify(results)}`); - - expect(results).toEqual([ - new Document({ - pageContent: "Hey", - metadata: { - group: 2, - ts: new Date(1655377200000), - id: 1, - name: "Alice", - }, - }), - ]); - }); - - // Test needs to be skipped until https://github.com/datastax/cassandra/pull/839 - test.skip("CassandraStore.partition with cluster filter", async () => { - const vectorStore = await CassandraStore.fromTexts( - ["Apple", "Banana", "Cherry", "Date", "Elderberry"], - [ - { group: 3, ts: new Date(1655377200000), id: 1, name: "Alex" }, - { group: 3, ts: new Date(1655377201000), id: 2, name: "Alex" }, - { group: 3, ts: new Date(1655377202000), id: 3, name: "Alex" }, - { group: 3, ts: new Date(1655377203000), id: 4, name: "Alex" }, - { group: 3, ts: new Date(1655377204000), id: 5, name: "Alex" }, - ], - new OpenAIEmbeddings(), - partitionConfig - ); - - await expect( - vectorStore.similaritySearch("Banana", 1, [ - { name: "group", value: 1 }, - { name: "ts", value: new Date(1655377202000), operator: ">" }, - ]) - ).rejects.toThrow(); - - // Once Cassandra supports filtering against cluster columns, the following should work - // expect(results).toEqual([ - // new Document({ - // pageContent: "Elderberry", - // metadata: { group: 1, ts: new Date(1655377204000), id: 5, name: "Alex", seq: null} - // }), - // ]); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts deleted file mode 100644 index af9da7661dc6..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts +++ /dev/null @@ -1,163 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { beforeEach, describe, expect, test } from "@jest/globals"; -import { ChromaClient } from "chromadb"; -import { faker } from "@faker-js/faker"; -import * as uuid from "uuid"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Chroma } from "../chroma.js"; - -describe.skip("Chroma", () => { - let chromaStore: Chroma; - - beforeEach(async () => { - const embeddings = new OpenAIEmbeddings(); - chromaStore = new Chroma(embeddings, { - url: "http://localhost:8000", - collectionName: "test-collection", - }); - }); - - test.skip("auto-generated ids", async () => { - const pageContent = faker.lorem.sentence(5); - - await chromaStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]); - - const results = await chromaStore.similaritySearch(pageContent, 1); - - expect(results).toEqual([ - new Document({ metadata: { foo: "bar" }, pageContent }), - ]); - }); - - test.skip("metadata filtering", async () => { - const pageContent = faker.lorem.sentence(5); - const id = uuid.v4(); - - await chromaStore.addDocuments([ - { pageContent, metadata: { foo: "bar" } }, - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: "qux" } }, - ]); - - // If the filter wasn't working, we'd get all 3 documents back - const results = await chromaStore.similaritySearch(pageContent, 3, { - foo: id, - }); - - expect(results).toEqual([ - new Document({ metadata: { foo: id }, pageContent }), - ]); - }); - - test.skip("upsert", async () => { - const pageContent = faker.lorem.sentence(5); - const id = uuid.v4(); - - const ids = await chromaStore.addDocuments([ - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: id } }, - ]); - - const results = await chromaStore.similaritySearch(pageContent, 4, { - foo: id, - }); - - expect(results.length).toEqual(2); - - const ids2 = await chromaStore.addDocuments( - [ - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: id } }, - ], - { ids } - ); - - expect(ids).toEqual(ids2); - - const newResults = await chromaStore.similaritySearch(pageContent, 4, { - foo: id, - }); - - expect(newResults.length).toEqual(2); - }); - - test.skip("delete by ids", async () => { - const pageContent = faker.lorem.sentence(5); - const id = uuid.v4(); - - const ids = await chromaStore.addDocuments([ - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: id } }, - ]); - - const results = await chromaStore.similaritySearch(pageContent, 2, { - foo: id, - }); - - expect(results.length).toEqual(2); - - await chromaStore.delete({ ids: ids.slice(0, 1) }); - - const newResults = await chromaStore.similaritySearch(pageContent, 2, { - foo: id, - }); - - expect(newResults.length).toEqual(1); - }); - - test.skip("delete by filter", async () => { - const pageContent = faker.lorem.sentence(5); - const id = uuid.v4(); - const id2 = uuid.v4(); - - await chromaStore.addDocuments([ - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: id, bar: id2 } }, - ]); - - const results = await chromaStore.similaritySearch(pageContent, 2, { - foo: id, - }); - - expect(results.length).toEqual(2); - - await chromaStore.delete({ - filter: { - bar: id2, - }, - }); - - const newResults = await chromaStore.similaritySearch(pageContent, 2, { - foo: id, - }); - - expect(newResults.length).toEqual(1); - }); - - test.skip("load from client instance", async () => { - const pageContent = faker.lorem.sentence(5); - const id = uuid.v4(); - - const chromaStoreFromClient = new Chroma(new OpenAIEmbeddings(), { - index: new ChromaClient({ - path: "http://localhost:8000", - }), - collectionName: "test-collection", - }); - - await chromaStoreFromClient.addDocuments([ - { pageContent, metadata: { foo: "bar" } }, - { pageContent, metadata: { foo: id } }, - { pageContent, metadata: { foo: "qux" } }, - ]); - - const results = await chromaStoreFromClient.similaritySearch( - pageContent, - 3 - ); - - expect(results.length).toEqual(3); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/chroma.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.test.ts deleted file mode 100644 index 5ba32646645e..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/chroma.test.ts +++ /dev/null @@ -1,135 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect } from "@jest/globals"; - -import { type Collection } from "chromadb"; -import { Chroma } from "../chroma.js"; -import { FakeEmbeddings } from "../../util/testing.js"; - -const mockCollection = { - count: jest.fn().mockResolvedValue(5), - upsert: jest.fn().mockResolvedValue(undefined as any), - delete: jest.fn().mockResolvedValue(undefined as any), - // add: jest.fn().mockResolvedValue(undefined as any), - // modify: jest.fn().mockResolvedValue(undefined as any), - // get: jest.fn().mockResolvedValue(undefined as any), - // update: jest.fn().mockResolvedValue({ success: true }), - // query: jest.fn().mockResolvedValue(undefined as any), - // peek: jest.fn().mockResolvedValue(undefined as any), -} as any; - -const mockClient = { - getOrCreateCollection: jest.fn().mockResolvedValue(mockCollection), -} as any; - -describe("Chroma", () => { - beforeEach(() => { - jest.clearAllMocks(); - }); - test("imports correctly", async () => { - const { ChromaClient } = await Chroma.imports(); - - expect(ChromaClient).toBeDefined(); - }); - - test("constructor works", async () => { - const chromaStore = new Chroma(new FakeEmbeddings(), { - index: mockClient, - collectionName: "test-collection", - }); - - expect(chromaStore).toBeDefined(); - }); - - test("should add vectors to the collection", async () => { - const expectedPageContents = ["Document 1", "Document 2"]; - const embeddings = new FakeEmbeddings(); - jest.spyOn(embeddings, "embedDocuments"); - const args = { - collectionName: "testCollection", - index: mockClient, - collectionMetadata: { "hnsw:space": "cosine" }, - }; - const documents = expectedPageContents.map((pc) => ({ pageContent: pc })); - - const chroma = new Chroma(embeddings, args); - await chroma.addDocuments(documents as any); - - expect(mockClient.getOrCreateCollection).toHaveBeenCalled(); - expect(embeddings.embedDocuments).toHaveBeenCalledWith( - expectedPageContents - ); - expect(mockCollection.upsert).toHaveBeenCalled(); - - const { metadatas } = mockCollection.upsert.mock.calls[0][0]; - expect(metadatas).toEqual([{}, {}]); - }); - - test("should override loc.lines with locFrom/locTo", async () => { - const expectedPageContents = ["Document 1"]; - const embeddings = new FakeEmbeddings(); - jest.spyOn(embeddings, "embedDocuments"); - - const args = { collectionName: "testCollection", index: mockClient }; - const documents = expectedPageContents.map((pc) => ({ - pageContent: pc, - metadata: { source: "source.html", loc: { lines: { from: 0, to: 4 } } }, - })); - - const chroma = new Chroma(embeddings, args); - await chroma.addDocuments(documents as any); - - const { metadatas } = mockCollection.upsert.mock.calls[0][0]; - - expect(metadatas[0]).toEqual({ - source: "source.html", - locFrom: 0, - locTo: 4, - }); - }); - - test("should throw an error for mismatched vector lengths", async () => { - const args = { collectionName: "testCollection" }; - const vectors = [ - [1, 2], - [3, 4], - ]; - const documents = [ - { metadata: { id: 1 }, pageContent: "Document 1" }, - { metadata: { id: 2 }, pageContent: "Document 2" }, - ]; - - const chroma = new Chroma(new FakeEmbeddings(), args); - chroma.numDimensions = 3; // Mismatched numDimensions - - await expect(chroma.addVectors(vectors, documents)).rejects.toThrowError(); - }); - - test("should perform similarity search and return results", async () => { - const args = { collectionName: "testCollection" }; - const query = [1, 2]; - const expectedResultCount = 5; - mockCollection.query = jest.fn().mockResolvedValue({ - ids: [["0", "1", "2", "3", "4"]], - distances: [[0.1, 0.2, 0.3, 0.4, 0.5]], - documents: [ - ["Document 1", "Document 2", "Document 3", "Document 4", "Document 5"], - ], - metadatas: [[{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }]], - } as any); - - const chroma = new Chroma(new FakeEmbeddings(), args); - chroma.collection = mockCollection; - - const results = await chroma.similaritySearchVectorWithScore( - query, - expectedResultCount - ); - - expect(mockCollection.query).toHaveBeenCalledWith({ - queryEmbeddings: query, - nResults: expectedResultCount, - where: {}, - }); - expect(results).toHaveLength(5); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts deleted file mode 100644 index 1c4d78c3022f..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts +++ /dev/null @@ -1,99 +0,0 @@ -/* eslint-disable no-process-env */ -import { test, expect } from "@jest/globals"; - -import { Document } from "@langchain/core/documents"; -import { ClickHouseStore } from "../clickhouse.js"; -// Import OpenAIEmbeddings if you have a valid OpenAI API key -import { HuggingFaceInferenceEmbeddings } from "../../embeddings/hf.js"; - -test.skip("ClickHouseStore.fromText", async () => { - const vectorStore = await ClickHouseStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new HuggingFaceInferenceEmbeddings(), - { - host: process.env.CLICKHOUSE_HOST || "localhost", - port: process.env.CLICKHOUSE_PORT || "8443", - username: process.env.CLICKHOUSE_USERNAME || "username", - password: process.env.CLICKHOUSE_PASSWORD || "password", - } - ); - - // Sleep 1 second to ensure that the search occurs after the successful insertion of data. - // eslint-disable-next-line no-promise-executor-return - await new Promise((resolve) => setTimeout(resolve, 1000)); - - const results = await vectorStore.similaritySearch("hello world", 1); - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - const filteredResults = await vectorStore.similaritySearch("hello world", 1, { - whereStr: "metadata.name = '1'", - }); - expect(filteredResults).toEqual([ - new Document({ - pageContent: "Bye bye", - metadata: { id: 1, name: "1" }, - }), - ]); -}); - -test.skip("ClickHouseStore.fromExistingIndex", async () => { - await ClickHouseStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new HuggingFaceInferenceEmbeddings(), - { - host: process.env.CLICKHOUSE_HOST || "localhost", - port: process.env.CLICKHOUSE_PORT || "8443", - username: process.env.CLICKHOUSE_USERNAME || "username", - password: process.env.CLICKHOUSE_PASSWORD || "password", - table: "test_table", - } - ); - - const vectorStore = await ClickHouseStore.fromExistingIndex( - new HuggingFaceInferenceEmbeddings(), - { - host: process.env.CLICKHOUSE_HOST || "localhost", - port: process.env.CLICKHOUSE_PORT || "8443", - username: process.env.CLICKHOUSE_USERNAME || "username", - password: process.env.CLICKHOUSE_PASSWORD || "password", - table: "test_table", - } - ); - - // Sleep 1 second to ensure that the search occurs after the successful insertion of data. - // eslint-disable-next-line no-promise-executor-return - await new Promise((resolve) => setTimeout(resolve, 1000)); - - const results = await vectorStore.similaritySearch("hello world", 1); - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - const filteredResults = await vectorStore.similaritySearch("hello world", 1, { - whereStr: "metadata.name = '1'", - }); - expect(filteredResults).toEqual([ - new Document({ - pageContent: "Bye bye", - metadata: { id: 1, name: "1" }, - }), - ]); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts deleted file mode 100644 index 4a22d5ec3b94..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts +++ /dev/null @@ -1,111 +0,0 @@ -/* eslint-disable no-process-env */ -import { test, expect } from "@jest/globals"; -import { Client, ClientOptions } from "@elastic/elasticsearch"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; - -import { ElasticVectorSearch } from "../elasticsearch.js"; - -describe("ElasticVectorSearch", () => { - let store: ElasticVectorSearch; - - beforeEach(async () => { - if (!process.env.ELASTIC_URL) { - throw new Error("ELASTIC_URL not set"); - } - - const config: ClientOptions = { - node: process.env.ELASTIC_URL, - }; - if (process.env.ELASTIC_API_KEY) { - config.auth = { - apiKey: process.env.ELASTIC_API_KEY, - }; - } else if (process.env.ELASTIC_USERNAME && process.env.ELASTIC_PASSWORD) { - config.auth = { - username: process.env.ELASTIC_USERNAME, - password: process.env.ELASTIC_PASSWORD, - }; - } - const client = new Client(config); - - const indexName = "test_index"; - - const embeddings = new OpenAIEmbeddings(); - store = new ElasticVectorSearch(embeddings, { client, indexName }); - await store.deleteIfExists(); - - expect(store).toBeDefined(); - }); - - test.skip("ElasticVectorSearch integration", async () => { - const createdAt = new Date().getTime(); - - const ids = await store.addDocuments([ - { pageContent: "hello", metadata: { a: createdAt + 1 } }, - { pageContent: "car", metadata: { a: createdAt } }, - { pageContent: "adjective", metadata: { a: createdAt } }, - { pageContent: "hi", metadata: { a: createdAt } }, - ]); - - const results1 = await store.similaritySearch("hello!", 1); - - expect(results1).toHaveLength(1); - expect(results1).toEqual([ - new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }), - ]); - - const results2 = await store.similaritySearchWithScore("testing!", 6, { - a: createdAt, - }); - - expect(results2).toHaveLength(3); - - const ids2 = await store.addDocuments( - [ - { pageContent: "hello upserted", metadata: { a: createdAt + 1 } }, - { pageContent: "car upserted", metadata: { a: createdAt } }, - { pageContent: "adjective upserted", metadata: { a: createdAt } }, - { pageContent: "hi upserted", metadata: { a: createdAt } }, - ], - { ids } - ); - - expect(ids).toEqual(ids2); - - const results3 = await store.similaritySearchWithScore("testing!", 6, { - a: createdAt, - }); - - expect(results3).toHaveLength(3); - - console.log(`Upserted:`, results3); - - await store.delete({ ids: ids.slice(2) }); - - const results4 = await store.similaritySearchWithScore("testing!", 3, { - a: createdAt, - }); - - expect(results4).toHaveLength(1); - }); - - test.skip("ElasticVectorSearch integration with more than 10 documents", async () => { - const createdAt = new Date().getTime(); - await store.addDocuments([ - { pageContent: "pretty", metadata: { a: createdAt + 1 } }, - { pageContent: "intelligent", metadata: { a: createdAt } }, - { pageContent: "creative", metadata: { a: createdAt } }, - { pageContent: "courageous", metadata: { a: createdAt } }, - { pageContent: "energetic", metadata: { a: createdAt } }, - { pageContent: "patient", metadata: { a: createdAt } }, - { pageContent: "responsible", metadata: { a: createdAt } }, - { pageContent: "friendly", metadata: { a: createdAt } }, - { pageContent: "confident", metadata: { a: createdAt } }, - { pageContent: "generous", metadata: { a: createdAt } }, - { pageContent: "compassionate", metadata: { a: createdAt } }, - ]); - const results = await store.similaritySearch("*", 11); - expect(results).toHaveLength(11); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts deleted file mode 100644 index 06797b047062..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { beforeEach, describe, expect, test } from "@jest/globals"; -import * as fs from "node:fs/promises"; -import * as path from "node:path"; -import * as os from "node:os"; -import { connect, Table } from "vectordb"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { LanceDB } from "../lancedb.js"; - -describe("LanceDB", () => { - let lanceDBTable: Table; - - beforeEach(async () => { - const dir = await fs.mkdtemp(path.join(os.tmpdir(), "lcjs-lancedb-")); - const db = await connect(dir); - lanceDBTable = await db.createTable("vectors", [ - { vector: Array(1536), text: "sample", id: 1 }, - ]); - }); - - test("Test fromTexts + addDocuments", async () => { - const embeddings = new OpenAIEmbeddings(); - const vectorStore = await LanceDB.fromTexts( - ["hello bye", "hello world", "bye bye"], - [{ id: 1 }, { id: 2 }, { id: 3 }], - embeddings, - { - table: lanceDBTable, - } - ); - - const results = await vectorStore.similaritySearch("hello bye", 10); - expect(results.length).toBe(4); - - await vectorStore.addDocuments([ - new Document({ - pageContent: "a new world", - metadata: { id: 4 }, - }), - ]); - - const resultsTwo = await vectorStore.similaritySearch("hello bye", 10); - expect(resultsTwo.length).toBe(5); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts b/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts deleted file mode 100644 index bc328304ddbf..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts +++ /dev/null @@ -1,169 +0,0 @@ -import { test, expect, afterAll, beforeAll } from "@jest/globals"; -import { ErrorCode, MilvusClient } from "@zilliz/milvus2-sdk-node"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Milvus } from "../milvus.js"; - -let collectionName: string; -let embeddings: OpenAIEmbeddings; -// https://docs.zilliz.com/docs/quick-start-1#create-a-collection -const MILVUS_ADDRESS = ""; -const MILVUS_TOKEN = ""; - -const OPEN_AI_API_KEY = ""; - -beforeAll(async () => { - embeddings = new OpenAIEmbeddings({ - openAIApiKey: OPEN_AI_API_KEY, - }); - collectionName = `test_collection_${Math.random().toString(36).substring(7)}`; -}); - -test.skip("Test Milvus.fromtext with token", async () => { - const texts = [ - `Tortoise: Labyrinth? Labyrinth? Could it Are we in the notorious Little -Harmonic Labyrinth of the dreaded Majotaur?`, - "Achilles: Yiikes! What is that?", - `Tortoise: They say-although I person never believed it myself-that an I - Majotaur has created a tiny labyrinth sits in a pit in the middle of - it, waiting innocent victims to get lost in its fears complexity. - Then, when they wander and dazed into the center, he laughs and - laughs at them-so hard, that he laughs them to death!`, - "Achilles: Oh, no!", - "Tortoise: But it's only a myth. Courage, Achilles.", - ]; - const objA = { A: { B: "some string" } }; - const objB = { A: { B: "some other string" } }; - const metadatas: object[] = [ - { id: 2, other: objA }, - { id: 1, other: objB }, - { id: 3, other: objA }, - { id: 4, other: objB }, - { id: 5, other: objA }, - ]; - const milvus = await Milvus.fromTexts(texts, metadatas, embeddings, { - collectionName, - autoId: false, - primaryField: "id", - clientConfig: { - address: MILVUS_ADDRESS, - token: MILVUS_TOKEN, - }, - }); - const query = "who is achilles?"; - const result = await milvus.similaritySearch(query, 1); - - const resultMetadatas = result.map(({ metadata }) => metadata); - expect(resultMetadatas).toEqual([{ id: 1, other: objB }]); - - const resultTwo = await milvus.similaritySearch(query, 3); - const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); - expect(resultTwoMetadatas).toEqual([ - { id: 1, other: objB }, - { id: 4, other: objB }, - { id: 5, other: objA }, - ]); - - const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); - const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); - expect(resultThreeMetadatas).toEqual([{ id: 1, other: objB }]); -}); - -test.skip("Test Milvus.fromtext", async () => { - const texts = [ - `Tortoise: Labyrinth? Labyrinth? Could it Are we in the notorious Little -Harmonic Labyrinth of the dreaded Majotaur?`, - "Achilles: Yiikes! What is that?", - `Tortoise: They say-although I person never believed it myself-that an I - Majotaur has created a tiny labyrinth sits in a pit in the middle of - it, waiting innocent victims to get lost in its fears complexity. - Then, when they wander and dazed into the center, he laughs and - laughs at them-so hard, that he laughs them to death!`, - "Achilles: Oh, no!", - "Tortoise: But it's only a myth. Courage, Achilles.", - ]; - const objA = { A: { B: "some string" } }; - const objB = { A: { B: "some other string" } }; - const metadatas: object[] = [ - { id: 2, other: objA }, - { id: 1, other: objB }, - { id: 3, other: objA }, - { id: 4, other: objB }, - { id: 5, other: objA }, - ]; - const milvus = await Milvus.fromTexts(texts, metadatas, embeddings, { - collectionName, - url: MILVUS_ADDRESS, - }); - - const query = "who is achilles?"; - const result = await milvus.similaritySearch(query, 1); - const resultMetadatas = result.map(({ metadata }) => metadata); - expect(resultMetadatas).toEqual([{ id: 1, other: objB }]); - - const resultTwo = await milvus.similaritySearch(query, 3); - const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); - expect(resultTwoMetadatas).toEqual([ - { id: 1, other: objB }, - { id: 4, other: objB }, - { id: 5, other: objA }, - ]); - - const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); - const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); - expect(resultThreeMetadatas).toEqual([{ id: 1, other: objB }]); -}); - -test.skip("Test Milvus.fromExistingCollection", async () => { - const milvus = await Milvus.fromExistingCollection(embeddings, { - collectionName, - }); - - const query = "who is achilles?"; - const result = await milvus.similaritySearch(query, 1); - const resultMetadatas = result.map(({ metadata }) => metadata); - expect(resultMetadatas.length).toBe(1); - expect(resultMetadatas[0].id).toEqual(1); - - const resultTwo = await milvus.similaritySearch(query, 3); - const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); - expect(resultTwoMetadatas.length).toBe(3); - expect(resultTwoMetadatas[0].id).toEqual(1); - expect(resultTwoMetadatas[1].id).toEqual(4); - expect(resultTwoMetadatas[2].id).toEqual(5); - - const resultThree = await milvus.similaritySearch(query, 1, "id == 1"); - const resultThreeMetadatas = resultThree.map(({ metadata }) => metadata); - expect(resultThreeMetadatas.length).toBe(1); - expect(resultThreeMetadatas[0].id).toEqual(1); -}); - -test.skip("Test Milvus.deleteData", async () => { - const milvus = await Milvus.fromExistingCollection(embeddings, { - collectionName, - }); - - const query = "who is achilles?"; - const result = await milvus.similaritySearch(query, 1); - const resultMetadatas = result.map(({ metadata }) => metadata); - const primaryId = resultMetadatas[0].langchain_primaryid; - expect(resultMetadatas.length).toBe(1); - expect(resultMetadatas[0].id).toEqual(1); - - await milvus.delete({ filter: `langchain_primaryid in [${primaryId}]` }); - - const resultTwo = await milvus.similaritySearch(query, 1); - const resultTwoMetadatas = resultTwo.map(({ metadata }) => metadata); - expect(resultTwoMetadatas[0].id).not.toEqual(1); -}); - -afterAll(async () => { - // eslint-disable-next-line no-process-env - if (!process.env.MILVUS_URL) return; - // eslint-disable-next-line no-process-env - const client = new MilvusClient(process.env.MILVUS_URL as string); - const dropRes = await client.dropCollection({ - collection_name: collectionName, - }); - // console.log("Drop collection response: ", dropRes) - expect(dropRes.error_code).toBe(ErrorCode.SUCCESS); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts deleted file mode 100644 index 77db1acec374..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts +++ /dev/null @@ -1,90 +0,0 @@ -/* eslint-disable no-process-env */ -import { test, expect } from "@jest/globals"; - -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { MyScaleStore } from "../myscale.js"; - -test.skip("MyScaleStore.fromText", async () => { - const vectorStore = await MyScaleStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new OpenAIEmbeddings(), - { - host: process.env.MYSCALE_HOST || "localhost", - port: process.env.MYSCALE_PORT || "8443", - username: process.env.MYSCALE_USERNAME || "username", - password: process.env.MYSCALE_PASSWORD || "password", - } - ); - - const results = await vectorStore.similaritySearch("hello world", 1); - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - const filteredResults = await vectorStore.similaritySearch("hello world", 1, { - whereStr: "metadata.name = '1'", - }); - expect(filteredResults).toEqual([ - new Document({ - pageContent: "Bye bye", - metadata: { id: 1, name: "1" }, - }), - ]); -}); - -test.skip("MyScaleStore.fromExistingIndex", async () => { - await MyScaleStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new OpenAIEmbeddings(), - { - host: process.env.MYSCALE_HOST || "localhost", - port: process.env.MYSCALE_PORT || "8443", - username: process.env.MYSCALE_USERNAME || "username", - password: process.env.MYSCALE_PASSWORD || "password", - table: "test_table", - } - ); - - const vectorStore = await MyScaleStore.fromExistingIndex( - new OpenAIEmbeddings(), - { - host: process.env.MYSCALE_HOST || "localhost", - port: process.env.MYSCALE_PORT || "8443", - username: process.env.MYSCALE_USERNAME || "username", - password: process.env.MYSCALE_PASSWORD || "password", - table: "test_table", - } - ); - - const results = await vectorStore.similaritySearch("hello world", 1); - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - const filteredResults = await vectorStore.similaritySearch("hello world", 1, { - whereStr: "metadata.name = '1'", - }); - expect(filteredResults).toEqual([ - new Document({ - pageContent: "Bye bye", - metadata: { id: 1, name: "1" }, - }), - ]); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts deleted file mode 100644 index aae909421e7e..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts +++ /dev/null @@ -1,471 +0,0 @@ -/* eslint-disable no-process-env */ -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "../../util/testing.js"; -import { Neo4jVectorStore } from "../neo4j_vector.js"; - -const OS_TOKEN_COUNT = 1536; - -const texts = ["foo", "bar", "baz"]; - -class FakeEmbeddingsWithOsDimension extends FakeEmbeddings { - async embedDocuments(documents: string[]): Promise { - return Promise.resolve( - documents.map((_, i) => - Array(OS_TOKEN_COUNT - 1) - .fill(1.0) - .concat([i + 1.0]) - ) - ); - } - - async embedQuery(text: string): Promise { - const index = texts.indexOf(text); - - if (index !== -1) { - return Array(OS_TOKEN_COUNT - 1) - .fill(1.0) - .concat([index + 1]); - } else { - throw new Error(`Text '${text}' not found in the 'texts' array.`); - } - } -} - -async function dropVectorIndexes(store: Neo4jVectorStore) { - const allIndexes = await store.query(` - SHOW INDEXES YIELD name, type - WHERE type = "VECTOR" - RETURN name - `); - - if (allIndexes) { - for (const index of allIndexes) { - await store.query(`DROP INDEX ${index.name}`); - } - } -} - -test.skip("Test fromTexts", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - preDeleteCollection: true, - } - ); - - const output = await neo4jVectorStore.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "foo", - metadata: {}, - }), - new Document({ - pageContent: "bar", - metadata: {}, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); -}); - -test.skip("Test fromTexts Hybrid", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - preDeleteCollection: true, - searchType: "hybrid", - } - ); - - const output = await neo4jVectorStore.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "foo", - metadata: {}, - }), - new Document({ - pageContent: "bar", - metadata: {}, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); -}); - -test.skip("Test fromExistingIndex", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - preDeleteCollection: true, - } - ); - - const existingIndex = await Neo4jVectorStore.fromExistingIndex(embeddings, { - url, - username, - password, - indexName: "vector", - }); - - const output = await existingIndex.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "foo", - metadata: {}, - }), - new Document({ - pageContent: "bar", - metadata: {}, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); - await existingIndex.close(); -}); - -test.skip("Test fromExistingIndex Hybrid", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - keywordIndexName: "keyword", - searchType: "hybrid", - preDeleteCollection: true, - } - ); - - const existingIndex = await Neo4jVectorStore.fromExistingIndex(embeddings, { - url, - username, - password, - indexName: "vector", - keywordIndexName: "keyword", - searchType: "hybrid", - }); - - const output = await existingIndex.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "foo", - metadata: {}, - }), - new Document({ - pageContent: "bar", - metadata: {}, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); - await existingIndex.close(); -}); - -test.skip("Test retrievalQuery", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - preDeleteCollection: true, - retrievalQuery: - "RETURN node.text AS text, score, {foo:'bar'} AS metadata", - } - ); - - const output = await neo4jVectorStore.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "foo", - metadata: { foo: "bar" }, - }), - new Document({ - pageContent: "bar", - metadata: { foo: "bar" }, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); -}); - -test.skip("Test fromExistingGraph", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - preDeleteCollection: true, - } - ); - - await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); - - await neo4jVectorStore.query( - "CREATE (:Test {name:'Foo'}), (:Test {name:'Bar', foo:'bar'})" - ); - - const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { - url, - username, - password, - indexName: "vector1", - nodeLabel: "Test", - textNodeProperties: ["name"], - embeddingNodeProperty: "embedding", - }); - - const output = await existingGraph.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "\nname: Foo", - metadata: {}, - }), - new Document({ - pageContent: "\nname: Bar", - metadata: { foo: "bar" }, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); - await existingGraph.close(); -}); - -test.skip("Test fromExistingGraph multiple properties", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - preDeleteCollection: true, - } - ); - - await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); - - await neo4jVectorStore.query( - "CREATE (:Test {name:'Foo', name2:'Fooz'}), (:Test {name:'Bar', foo:'bar'})" - ); - - const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { - url, - username, - password, - indexName: "vector1", - nodeLabel: "Test", - textNodeProperties: ["name", "name2"], - embeddingNodeProperty: "embedding", - }); - - const output = await existingGraph.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "\nname: Foo\nname2: Fooz", - metadata: {}, - }), - new Document({ - pageContent: "\nname: Bar\nname2: ", - metadata: { foo: "bar" }, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); - await existingGraph.close(); -}); - -test.skip("Test fromExistingGraph multiple properties hybrid", async () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - const embeddings = new FakeEmbeddingsWithOsDimension(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const metadatas: any[] = []; - - const neo4jVectorStore = await Neo4jVectorStore.fromTexts( - texts, - metadatas, - embeddings, - { - url, - username, - password, - indexName: "vector", - preDeleteCollection: true, - } - ); - - await neo4jVectorStore.query("MATCH (n) DETACH DELETE n"); - - await neo4jVectorStore.query( - "CREATE (:Test {name:'Foo', name2:'Fooz'}), (:Test {name:'Bar', foo:'bar'})" - ); - - const existingGraph = await Neo4jVectorStore.fromExistingGraph(embeddings, { - url, - username, - password, - indexName: "vector1", - nodeLabel: "Test", - textNodeProperties: ["name", "name2"], - embeddingNodeProperty: "embedding", - searchType: "hybrid", - }); - - const output = await existingGraph.similaritySearch("foo", 2); - - const expectedResult = [ - new Document({ - pageContent: "\nname: Foo\nname2: Fooz", - metadata: {}, - }), - new Document({ - pageContent: "\nname: Bar\nname2: ", - metadata: { foo: "bar" }, - }), - ]; - - expect(output).toStrictEqual(expectedResult); - await dropVectorIndexes(neo4jVectorStore); - await neo4jVectorStore.close(); - await existingGraph.close(); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts deleted file mode 100644 index f3a497dc5b9d..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts +++ /dev/null @@ -1,44 +0,0 @@ -/* eslint-disable no-process-env */ -import { test, expect } from "@jest/globals"; -import { Client } from "@opensearch-project/opensearch"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { OpenSearchVectorStore } from "../opensearch.js"; - -test.skip("OpenSearchVectorStore integration", async () => { - if (!process.env.OPENSEARCH_URL) { - throw new Error("OPENSEARCH_URL not set"); - } - - const client = new Client({ - nodes: [process.env.OPENSEARCH_URL], - }); - - const indexName = "test_index"; - - const embeddings = new OpenAIEmbeddings(); - const store = new OpenSearchVectorStore(embeddings, { client, indexName }); - await store.deleteIfExists(); - - expect(store).toBeDefined(); - - await store.addDocuments([ - { pageContent: "hello", metadata: { a: 2 } }, - { pageContent: "car", metadata: { a: 1 } }, - { pageContent: "adjective", metadata: { a: 1 } }, - { pageContent: "hi", metadata: { a: 1 } }, - ]); - - const results1 = await store.similaritySearch("hello!", 1); - - expect(results1).toHaveLength(1); - expect(results1).toEqual([ - new Document({ metadata: { a: 2 }, pageContent: "hello" }), - ]); - - const results2 = await store.similaritySearchWithScore("hello!", 1, { - a: 1, - }); - - expect(results2).toHaveLength(1); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts deleted file mode 100644 index 70d862f96dc4..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts +++ /dev/null @@ -1,50 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { describe, expect, test } from "@jest/globals"; -import { QdrantClient } from "@qdrant/js-client-rest"; -import { faker } from "@faker-js/faker"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { QdrantVectorStore } from "../qdrant.js"; -import { OllamaEmbeddings } from "../../embeddings/ollama.js"; - -describe.skip("QdrantVectorStore testcase", () => { - test("base usage", async () => { - const embeddings = new OpenAIEmbeddings({}); - - const qdrantVectorStore = new QdrantVectorStore(embeddings, { - url: process.env.QDRANT_URL || "http://localhost:6333", - collectionName: process.env.QDRANT_COLLECTION || "documents", - }); - - const pageContent = faker.lorem.sentence(5); - - await qdrantVectorStore.addDocuments([{ pageContent, metadata: {} }]); - - const results = await qdrantVectorStore.similaritySearch(pageContent, 1); - - expect(results[0]).toEqual(new Document({ metadata: {}, pageContent })); - }); - - test("passing client directly with a local model that creates embeddings with a different number of dimensions", async () => { - const embeddings = new OllamaEmbeddings({}); - - const pageContent = faker.lorem.sentence(5); - - const qdrantVectorStore = await QdrantVectorStore.fromDocuments( - [{ pageContent, metadata: {} }], - embeddings, - { - collectionName: "different_dimensions", - client: new QdrantClient({ - url: process.env.QDRANT_URL, - apiKey: process.env.QDRANT_API_KEY, - }), - } - ); - - const results = await qdrantVectorStore.similaritySearch(pageContent, 1); - - expect(results[0]).toEqual(new Document({ metadata: {}, pageContent })); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts deleted file mode 100644 index a7402e023051..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts +++ /dev/null @@ -1,33 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect } from "@jest/globals"; -import { FakeEmbeddings } from "../../util/testing.js"; - -import { QdrantVectorStore } from "../qdrant.js"; - -test("QdrantVectorStore works", async () => { - const client = { - upsert: jest.fn(), - search: jest.fn().mockResolvedValue([]), - getCollections: jest.fn().mockResolvedValue({ collections: [] }), - createCollection: jest.fn(), - }; - - const embeddings = new FakeEmbeddings(); - - const store = new QdrantVectorStore(embeddings, { client: client as any }); - - expect(store).toBeDefined(); - - await store.addDocuments([ - { - pageContent: "hello", - metadata: {}, - }, - ]); - - expect(client.upsert).toHaveBeenCalledTimes(1); - - const results = await store.similaritySearch("hello", 1); - - expect(results).toHaveLength(0); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts deleted file mode 100644 index f44f52b8d273..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts +++ /dev/null @@ -1,71 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable no-promise-executor-return */ - -import { RedisClientType, createClient } from "redis"; -import { v4 as uuidv4 } from "uuid"; -import { test, expect } from "@jest/globals"; -import { faker } from "@faker-js/faker"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { RedisVectorStore } from "../redis.js"; - -describe("RedisVectorStore", () => { - let vectorStore: RedisVectorStore; - - beforeEach(async () => { - const client = createClient({ url: process.env.REDIS_URL }); - await client.connect(); - - vectorStore = new RedisVectorStore(new OpenAIEmbeddings(), { - redisClient: client as RedisClientType, - indexName: "test-index", - keyPrefix: "test:", - }); - }); - - test.skip("auto-generated ids", async () => { - const pageContent = faker.lorem.sentence(5); - - await vectorStore.addDocuments([{ pageContent, metadata: { foo: "bar" } }]); - - const results = await vectorStore.similaritySearch(pageContent, 1); - - expect(results).toEqual([ - new Document({ metadata: { foo: "bar" }, pageContent }), - ]); - }); - - test.skip("user-provided keys", async () => { - const documentKey = `test:${uuidv4()}`; - const pageContent = faker.lorem.sentence(5); - - await vectorStore.addDocuments([{ pageContent, metadata: {} }], { - keys: [documentKey], - }); - - const results = await vectorStore.similaritySearch(pageContent, 1); - - expect(results).toEqual([new Document({ metadata: {}, pageContent })]); - }); - - test.skip("metadata filtering", async () => { - await vectorStore.dropIndex(); - const pageContent = faker.lorem.sentence(5); - const uuid = uuidv4(); - - await vectorStore.addDocuments([ - { pageContent, metadata: { foo: "bar" } }, - { pageContent, metadata: { foo: uuid } }, - { pageContent, metadata: { foo: "qux" } }, - ]); - - // If the filter wasn't working, we'd get all 3 documents back - const results = await vectorStore.similaritySearch(pageContent, 3, [ - `${uuid}`, - ]); - - expect(results).toEqual([ - new Document({ metadata: { foo: uuid }, pageContent }), - ]); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/redis.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.test.ts deleted file mode 100644 index 4378dc9c2293..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/redis.test.ts +++ /dev/null @@ -1,222 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect, describe } from "@jest/globals"; -import { FakeEmbeddings } from "../../util/testing.js"; - -import { RedisVectorStore } from "../redis.js"; - -const createRedisClientMockup = () => { - const hSetMock = jest.fn(); - - return { - ft: { - info: jest.fn().mockResolvedValue({ - numDocs: 0, - }), - create: jest.fn(), - search: jest.fn().mockResolvedValue({ - total: 0, - documents: [], - }), - dropIndex: jest.fn(), - }, - hSet: hSetMock, - multi: jest.fn().mockImplementation(() => ({ - exec: jest.fn(), - hSet: hSetMock, - })), - }; -}; - -test("RedisVectorStore with external keys", async () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - }); - - expect(store).toBeDefined(); - - await store.addDocuments( - [ - { - pageContent: "hello", - metadata: { - a: 1, - b: { nested: [1, { a: 4 }] }, - }, - }, - ], - { keys: ["id1"] } - ); - - expect(client.hSet).toHaveBeenCalledTimes(1); - expect(client.hSet).toHaveBeenCalledWith("id1", { - content_vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), - content: "hello", - metadata: JSON.stringify({ a: 1, b: { nested: [1, { a: 4 }] } }), - }); - - const results = await store.similaritySearch("goodbye", 1); - - expect(results).toHaveLength(0); -}); - -test("RedisVectorStore with generated keys", async () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - }); - - expect(store).toBeDefined(); - - await store.addDocuments([{ pageContent: "hello", metadata: { a: 1 } }]); - - expect(client.hSet).toHaveBeenCalledTimes(1); - - const results = await store.similaritySearch("goodbye", 1); - - expect(results).toHaveLength(0); -}); - -test("RedisVectorStore with filters", async () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - }); - - expect(store).toBeDefined(); - - await store.similaritySearch("hello", 1, ["a"]); - - expect(client.ft.search).toHaveBeenCalledWith( - "documents", - "@metadata:(a) => [KNN 1 @content_vector $vector AS vector_score]", - { - PARAMS: { - vector: Buffer.from(new Float32Array([0.1, 0.2, 0.3, 0.4]).buffer), - }, - RETURN: ["metadata", "content", "vector_score"], - SORTBY: "vector_score", - DIALECT: 2, - LIMIT: { - from: 0, - size: 1, - }, - } - ); -}); - -describe("RedisVectorStore dropIndex", () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - }); - - test("without deleteDocuments param provided", async () => { - await store.dropIndex(); - - expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", undefined); - }); - - test("with deleteDocuments as false", async () => { - await store.dropIndex(false); - - expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", undefined); - }); - - test("with deleteDocument as true", async () => { - await store.dropIndex(true); - - expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", { - DD: true, - }); - }); - - test("through delete convenience method", async () => { - await store.delete({ deleteAll: true }); - - expect(client.ft.dropIndex).toHaveBeenCalledWith("documents", { - DD: true, - }); - }); -}); - -describe("RedisVectorStore createIndex when index does not exist", () => { - test("calls ft.create with default create options", async () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - }); - store.checkIndexExists = jest.fn().mockResolvedValue(false); - - await store.createIndex(); - - expect(client.ft.create).toHaveBeenCalledWith( - "documents", - expect.any(Object), - { - ON: "HASH", - PREFIX: "doc:documents:", - } - ); - }); - - test("calls ft.create with custom options", async () => { - const client = createRedisClientMockup(); - const embeddings = new FakeEmbeddings(); - const store = new RedisVectorStore(embeddings, { - redisClient: client as any, - indexName: "documents", - createIndexOptions: { - ON: "JSON", - FILTER: '@indexName == "documents"', - SCORE: 0.5, - MAXTEXTFIELDS: true, - TEMPORARY: 1000, - NOOFFSETS: true, - NOHL: true, - NOFIELDS: true, - NOFREQS: true, - SKIPINITIALSCAN: true, - STOPWORDS: ["a", "b"], - LANGUAGE: "German", - }, - }); - store.checkIndexExists = jest.fn().mockResolvedValue(false); - - await store.createIndex(); - - expect(client.ft.create).toHaveBeenCalledWith( - "documents", - expect.any(Object), - { - ON: "JSON", - PREFIX: "doc:documents:", - FILTER: '@indexName == "documents"', - SCORE: 0.5, - MAXTEXTFIELDS: true, - TEMPORARY: 1000, - NOOFFSETS: true, - NOHL: true, - NOFIELDS: true, - NOFREQS: true, - SKIPINITIALSCAN: true, - STOPWORDS: ["a", "b"], - LANGUAGE: "German", - } - ); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts deleted file mode 100644 index aacfd5dd71b4..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts +++ /dev/null @@ -1,175 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable import/no-extraneous-dependencies */ -import { test, expect } from "@jest/globals"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { SingleStoreVectorStore } from "../singlestore.js"; - -test.skip("SingleStoreVectorStore", async () => { - expect(process.env.SINGLESTORE_HOST).toBeDefined(); - expect(process.env.SINGLESTORE_PORT).toBeDefined(); - expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); - expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); - expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); - - const vectorStore = await SingleStoreVectorStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new OpenAIEmbeddings(), - { - connectionOptions: { - host: process.env.SINGLESTORE_HOST, - port: Number(process.env.SINGLESTORE_PORT), - user: process.env.SINGLESTORE_USERNAME, - password: process.env.SINGLESTORE_PASSWORD, - database: process.env.SINGLESTORE_DATABASE, - }, - contentColumnName: "cont", - metadataColumnName: "met", - vectorColumnName: "vec", - } - ); - expect(vectorStore).toBeDefined(); - - const results = await vectorStore.similaritySearch("hello world", 1); - - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - await vectorStore.addDocuments([ - new Document({ - pageContent: "Green forest", - metadata: { id: 4, name: "4" }, - }), - new Document({ - pageContent: "Green field", - metadata: { id: 5, name: "5" }, - }), - ]); - - const results2 = await vectorStore.similaritySearch("forest", 1); - - expect(results2).toEqual([ - new Document({ - pageContent: "Green forest", - metadata: { id: 4, name: "4" }, - }), - ]); - - await vectorStore.end(); -}); - -test.skip("SingleStoreVectorStore euclidean_distance", async () => { - expect(process.env.SINGLESTORE_HOST).toBeDefined(); - expect(process.env.SINGLESTORE_PORT).toBeDefined(); - expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); - expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); - expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); - - const vectorStore = await SingleStoreVectorStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2" }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new OpenAIEmbeddings(), - { - connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, - tableName: "euclidean_distance_test", - distanceMetric: "EUCLIDEAN_DISTANCE", - } - ); - expect(vectorStore).toBeDefined(); - - const results = await vectorStore.similaritySearch("hello world", 1); - - expect(results).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2" }, - }), - ]); - - await vectorStore.end(); -}); - -test.skip("SingleStoreVectorStore filtering", async () => { - expect(process.env.SINGLESTORE_HOST).toBeDefined(); - expect(process.env.SINGLESTORE_PORT).toBeDefined(); - expect(process.env.SINGLESTORE_USERNAME).toBeDefined(); - expect(process.env.SINGLESTORE_PASSWORD).toBeDefined(); - expect(process.env.SINGLESTORE_DATABASE).toBeDefined(); - - const vectorStore = await SingleStoreVectorStore.fromTexts( - ["Hello world", "Bye bye", "hello nice world"], - [ - { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, - { id: 1, name: "1" }, - { id: 3, name: "3" }, - ], - new OpenAIEmbeddings(), - { - connectionURI: `http://${process.env.SINGLESTORE_USERNAME}:${process.env.SINGLESTORE_PASSWORD}@${process.env.SINGLESTORE_HOST}:${process.env.SINGLESTORE_PORT}/${process.env.SINGLESTORE_DATABASE}`, - tableName: "filtering_test", - } - ); - expect(vectorStore).toBeDefined(); - - const results1 = await vectorStore.similaritySearch("hello world", 1, { - id: 3, - }); - - expect(results1).toEqual([ - new Document({ - pageContent: "hello nice world", - metadata: { id: 3, name: "3" }, - }), - ]); - - const results2 = await vectorStore.similaritySearch("hello nice world", 1, { - name: "2", - }); - expect(results2).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, - }), - ]); - - const results3 = await vectorStore.similaritySearch("hello nice world", 1, { - sub: { sub2: { idx: 1 } }, - }); - expect(results3).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, - }), - ]); - - const results4 = await vectorStore.similaritySearch("hello nice world", 1, { - name: "2", - id: 2, - }); - expect(results4).toEqual([ - new Document({ - pageContent: "Hello world", - metadata: { id: 2, name: "2", sub: { sub2: { idx: 1 } } }, - }), - ]); - - const results5 = await vectorStore.similaritySearch("hello nice world", 1, { - name: "3", - sub: { sub2: { idx: 1 } }, - }); - expect(results5).toEqual([]); - await vectorStore.end(); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/tigris.test.ts b/libs/langchain-community/src/vectorstores/tests/tigris.test.ts deleted file mode 100644 index e03943d52312..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/tigris.test.ts +++ /dev/null @@ -1,76 +0,0 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { jest, test, expect } from "@jest/globals"; -import { FakeEmbeddings } from "../../util/testing.js"; - -import { TigrisVectorStore } from "../tigris.js"; - -test("TigrisVectorStore with external ids", async () => { - const client = { - addDocumentsWithVectors: jest.fn(), - similaritySearchVectorWithScore: jest - .fn() - .mockImplementation(async () => []), - }; - const embeddings = new FakeEmbeddings(); - - const store = new TigrisVectorStore(embeddings, { - index: client as any, - }); - - expect(store).toBeDefined(); - - await store.addDocuments( - [ - { - pageContent: "hello", - metadata: { - a: 1, - b: { nested: [1, { a: 4 }] }, - }, - }, - ], - ["id1"] - ); - - expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1); - - expect(client.addDocumentsWithVectors).toHaveBeenCalledWith({ - ids: ["id1"], - embeddings: [[0.1, 0.2, 0.3, 0.4]], - documents: [ - { - content: "hello", - metadata: { - a: 1, - b: { nested: [1, { a: 4 }] }, - }, - }, - ], - }); - - const results = await store.similaritySearch("hello", 1); - - expect(results).toHaveLength(0); -}); - -test("TigrisVectorStore with generated ids", async () => { - const client = { - addDocumentsWithVectors: jest.fn(), - similaritySearchVectorWithScore: jest - .fn() - .mockImplementation(async () => []), - }; - const embeddings = new FakeEmbeddings(); - - const store = new TigrisVectorStore(embeddings, { index: client as any }); - - expect(store).toBeDefined(); - - await store.addDocuments([{ pageContent: "hello", metadata: { a: 1 } }]); - - expect(client.addDocumentsWithVectors).toHaveBeenCalledTimes(1); - - const results = await store.similaritySearch("hello", 1); - - expect(results).toHaveLength(0); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts b/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts deleted file mode 100644 index 1a59fde00409..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { DataSourceOptions } from "typeorm"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { TypeORMVectorStore } from "../typeorm.js"; - -test.skip("Test embeddings creation", async () => { - const args = { - postgresConnectionOptions: { - type: "postgres", - host: "localhost", - port: 5432, - username: "myuser", - password: "ChangeMe", - database: "api", - } as DataSourceOptions, - tableName: "testlangchain", - }; - - const typeormVectorStore = await TypeORMVectorStore.fromDataSource( - new OpenAIEmbeddings(), - args - ); - - expect(typeormVectorStore).toBeDefined(); - - const docHello = { - pageContent: "hello", - metadata: { a: 1 }, - }; - const docCat = { - pageContent: "Cat drinks milk", - metadata: { a: 2 }, - }; - const docHi = { pageContent: "hi", metadata: { a: 1 } }; - - await typeormVectorStore.addDocuments([docHello, docHi, docCat]); - - const results = await typeormVectorStore.similaritySearch("hello", 2, { - a: 2, - }); - - expect(results).toHaveLength(1); - - expect(results[0].pageContent).toEqual(docCat.pageContent); - - await typeormVectorStore.appDataSource.query( - 'TRUNCATE TABLE "testlangchain"' - ); - - await typeormVectorStore.appDataSource.destroy(); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/typesense.test.ts b/libs/langchain-community/src/vectorstores/tests/typesense.test.ts deleted file mode 100644 index c3eba14f7d49..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/typesense.test.ts +++ /dev/null @@ -1,127 +0,0 @@ -import { Client } from "typesense"; -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "../../util/testing.js"; -import { Typesense } from "../typesense.js"; - -test("documentsToTypesenseRecords should return the correct typesense records", async () => { - const embeddings = new FakeEmbeddings(); - const vectorstore = new Typesense(embeddings, { - schemaName: "test", - typesenseClient: {} as unknown as Client, - columnNames: { - vector: "vec", - pageContent: "text", - metadataColumnNames: ["foo", "bar", "baz"], - }, - }); - - const documents: Document[] = [ - { - metadata: { - id: "1", - foo: "fooo", - bar: "barr", - baz: "bazz", - }, - pageContent: "hello world", - }, - { - metadata: { - id: "2", - foo: "foooo", - bar: "barrr", - baz: "bazzz", - }, - pageContent: "hello world 2", - }, - ]; - - const expected = [ - { - text: "hello world", - foo: "fooo", - bar: "barr", - baz: "bazz", - vec: await embeddings.embedQuery("hello world"), - }, - { - text: "hello world 2", - foo: "foooo", - bar: "barrr", - baz: "bazzz", - vec: await embeddings.embedQuery("hello world 2"), - }, - ]; - - expect( - await vectorstore._documentsToTypesenseRecords( - documents, - await embeddings.embedDocuments(["hello world", "hello world 2"]) - ) - ).toEqual(expected); -}); - -test("typesenseRecordsToDocuments should return the correct langchain documents", async () => { - const embeddings = new FakeEmbeddings(); - const vectorstore = new Typesense(embeddings, { - schemaName: "test", - typesenseClient: {} as unknown as Client, - columnNames: { - vector: "vec", - pageContent: "text", - metadataColumnNames: ["foo", "bar", "baz"], - }, - }); - - const typesenseRecords = [ - { - document: { - text: "hello world", - foo: "fooo", - bar: "barr", - baz: "bazz", - vec: await embeddings.embedQuery("hello world"), - }, - vector_distance: 0.2342145, - }, - { - document: { - text: "hello world 2", - foo: "foooo", - bar: "barrr", - baz: "bazzz", - vec: await embeddings.embedQuery("hello world 2"), - }, - vector_distance: 0.4521355, - }, - ]; - - const expected = [ - [ - { - metadata: { - foo: "fooo", - bar: "barr", - baz: "bazz", - }, - pageContent: "hello world", - }, - 0.2342145, - ], - [ - { - metadata: { - foo: "foooo", - bar: "barrr", - baz: "bazzz", - }, - pageContent: "hello world 2", - }, - 0.4521355, - ], - ]; - - expect(vectorstore._typesenseRecordsToDocuments(typesenseRecords)).toEqual( - expected - ); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts deleted file mode 100644 index 7ca5c11ac67d..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts +++ /dev/null @@ -1,225 +0,0 @@ -/* eslint-disable @typescript-eslint/no-unused-vars */ -/* eslint-disable no-process-env */ -import fs from "fs"; -import { expect, beforeAll } from "@jest/globals"; -import { insecureHash } from "@langchain/core/utils/hash"; -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "../../util/testing.js"; -import { VectaraFile, VectaraLibArgs, VectaraStore } from "../vectara.js"; - -const getDocs = (): Document[] => { - // Some text from Lord of the Rings - const englishOne = `It all depends on what you want. You can trust us to stick to you through thick and thin to the - bitter end. And you can trust us to keep any secret of yours - closer than you keep it yourself. - But you cannot trust us to let you face trouble alone, and go off without a word. We are your - friends, Frodo. Anyway: there it is. We know most of what Gandalf has told you. We know a good - deal about the Ring. We are horribly afraid - but we are coming with you; or following you - like hounds.`; - const englishTwo = `Sam lay back, and stared with open mouth, and for a moment, between bewilderment and great joy, - he could not answer. At last he gasped: “Gandalf! I thought you were dead! But then I thought I - was dead myself. Is everything sad going to come untrue? What's happened to the world?`; - const frenchOne = `Par exemple, sur la planète Terre, l'homme a toujours supposé qu'il était plus intelligent que les dauphins - parce qu'il avait accompli tant de choses - la roue, New York, les guerres, etc. passer du - bon temps. Mais à l'inverse, les dauphins ont toujours cru qu'ils étaient bien plus - intelligents que l'homme, pour les mêmes raisons précisément.`; - - const documents = [ - new Document({ - pageContent: englishOne, - metadata: { - document_id: insecureHash(englishOne), // Generate a hashcode for document id based on the text - title: "Lord of the Rings", - author: "Tolkien", - genre: "fiction", - lang: "eng", - }, - }), - new Document({ - pageContent: englishTwo, - metadata: { - document_id: insecureHash(englishTwo), // Generate a hashcode for document id based on the text - title: "Lord of the Rings", - author: "Tolkien", - genre: "fiction", - lang: "eng", - }, - }), - new Document({ - pageContent: frenchOne, - metadata: { - document_id: insecureHash(frenchOne), // Generate a hashcode for document id based on the text - title: "The hitchhiker's guide to the galaxy", - author: "Douglas Adams", - genre: "fiction", - lang: "fra", - }, - }), - ]; - return documents; -}; - -let corpusId: number[] = []; -const envValue = process.env.VECTARA_CORPUS_ID; -if (envValue) { - corpusId = envValue.split(",").map((id) => { - const num = Number(id); - if (Number.isNaN(num)) corpusId = [0]; - return num; - }); - - if (corpusId.length === 0) corpusId = [0]; -} else { - corpusId = [0]; -} - -describe("VectaraStore", () => { - ["VECTARA_CUSTOMER_ID", "VECTARA_CORPUS_ID", "VECTARA_API_KEY"].forEach( - (envVar) => { - if (!process.env[envVar]) { - throw new Error(`${envVar} not set`); - } - } - ); - - describe("fromTexts", () => { - const args: VectaraLibArgs = { - customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, - corpusId, - apiKey: process.env.VECTARA_API_KEY || "", - }; - - test.skip("with fakeEmbeddings doesn't throw error", () => { - expect(() => - VectaraStore.fromTexts([], [], new FakeEmbeddings(), args) - ).not.toThrow(); - }); - }); - - describe("fromDocuments", () => { - const args: VectaraLibArgs = { - customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, - corpusId, - apiKey: process.env.VECTARA_API_KEY || "", - }; - - test.skip("with fakeEmbeddings doesn't throw error", async () => { - await expect( - VectaraStore.fromDocuments(getDocs(), new FakeEmbeddings(), args) - ).resolves.toBeDefined(); - }); - }); - - describe("access operations", () => { - let store: VectaraStore; - let doc_ids: string[] = []; - - beforeAll(async () => { - store = new VectaraStore({ - customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, - corpusId, - apiKey: process.env.VECTARA_API_KEY || "", - }); - doc_ids = await store.addDocuments(getDocs()); - }); - - test.skip("similaritySearchWithScore", async () => { - const resultsWithScore = await store.similaritySearchWithScore( - "What did Sam do?", - 10, // Number of results needed - { lambda: 0.025 } - ); - expect(resultsWithScore.length).toBeGreaterThan(0); - expect(resultsWithScore[0][0].pageContent.length).toBeGreaterThan(0); - expect(resultsWithScore[0][0].metadata.title).toBe("Lord of the Rings"); - expect(resultsWithScore[0][1]).toBeGreaterThan(0); - }); - - test.skip("similaritySearch", async () => { - const results = await store.similaritySearch( - "Was Gandalf dead?", - 10, // Number of results needed - { - lambda: 0.025, - contextConfig: { - sentencesAfter: 1, - sentencesBefore: 1, - }, - } - ); - expect(results.length).toBeGreaterThan(0); - expect(results[0].pageContent.length).toBeGreaterThan(0); - expect(results[0].metadata.title).toBe("Lord of the Rings"); - }); - - test.skip("similaritySearch with filter", async () => { - const results = await store.similaritySearch( - "Was Gandalf dead?", - 10, // Number of results needed - { filter: "part.lang = 'fra'", lambda: 0.025 } // Filter on the language of the document - ); - expect(results.length).toBeGreaterThan(0); - expect(results[0].pageContent.length).toBeGreaterThan(0); - // Query filtered on French, so we expect only French results - const hasEnglish = results.some( - (result) => - // eslint-disable-next-line @typescript-eslint/no-explicit-any - result.metadata.lang === "eng" - ); - expect(hasEnglish).toBe(false); - }); - - test.skip("addFiles", async () => { - const docs = getDocs(); - const englishOneContent = docs[0].pageContent; - const frenchOneContent = docs[2].pageContent; - - const files = [ - { filename: "englishOne.txt", content: englishOneContent }, - { filename: "frenchOne.txt", content: frenchOneContent }, - ]; - - const vectaraFiles: VectaraFile[] = []; - for (const file of files) { - fs.writeFileSync(file.filename, file.content); - - const buffer = fs.readFileSync(file.filename); - vectaraFiles.push({ - blob: new Blob([buffer], { type: "text/plain" }), - fileName: file.filename, - }); - } - - const bitcoinBuffer = fs.readFileSync( - "../examples/src/document_loaders/example_data/bitcoin.pdf" - ); - vectaraFiles.push({ - blob: new Blob([bitcoinBuffer], { type: "application/pdf" }), - fileName: "bitcoin.pdf", - }); - - const file_doc_ids = await store.addFiles(vectaraFiles); - doc_ids = [...doc_ids, ...file_doc_ids]; - - for (const file of files) { - fs.unlinkSync(file.filename); - } - - expect(file_doc_ids.length).toEqual(3); - const searchResults = await store.similaritySearch("What is bitcoin"); - expect(searchResults.length).toBeGreaterThan(0); - expect(searchResults[0].pageContent).toContain( - "A Peer-to-Peer Electronic Cash System" - ); - }); - - // delete documents added in the test - afterAll(async () => { - store = new VectaraStore({ - customerId: Number(process.env.VECTARA_CUSTOMER_ID) || 0, - corpusId, - apiKey: process.env.VECTARA_API_KEY || "", - }); - await store.deleteDocuments(doc_ids); - }); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts deleted file mode 100644 index 5e572a012f81..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts +++ /dev/null @@ -1,129 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { OpenAIEmbeddings } from "@langchain/openai"; - -import { VercelPostgres } from "../vercel_postgres.js"; - -let vercelPostgresStore: VercelPostgres; - -const config = { - tableName: "testvercelvectorstorelangchain2", - columns: { - idColumnName: "id", - vectorColumnName: "vector", - contentColumnName: "content", - metadataColumnName: "metadata", - }, -}; - -describe("Test VercelPostgres store", () => { - afterEach(async () => { - await vercelPostgresStore?.delete({ deleteAll: true }); - await vercelPostgresStore?.end(); - }); - - test("Test embeddings creation", async () => { - vercelPostgresStore = await VercelPostgres.initialize( - new OpenAIEmbeddings(), - config - ); - - expect(vercelPostgresStore).toBeDefined(); - - const docHello = { - pageContent: "hello", - metadata: { a: 1 }, - }; - const docCat = { - pageContent: "Cat drinks milk", - metadata: { a: 2 }, - }; - const docHi = { pageContent: "hi", metadata: { a: 1 } }; - - const ids = await vercelPostgresStore.addDocuments([ - docHello, - docHi, - docCat, - ]); - - const results = await vercelPostgresStore.similaritySearch("hello", 2, { - a: 2, - }); - - expect(results).toHaveLength(1); - - expect(results[0].pageContent).toEqual(docCat.pageContent); - - await vercelPostgresStore.addDocuments( - [{ pageContent: "Dog drinks milk", metadata: { a: 2 } }], - { ids: [ids[2]] } - ); - - const results2 = await vercelPostgresStore.similaritySearch("hello", 2, { - a: 2, - }); - - expect(results2).toHaveLength(1); - expect(results2[0].pageContent).toEqual("Dog drinks milk"); - - await vercelPostgresStore.delete({ ids: [ids[2]] }); - - const results3 = await vercelPostgresStore.similaritySearch("hello", 2, { - a: 2, - }); - - expect(results3).toHaveLength(0); - }); - - test("Test metadata filtering", async () => { - vercelPostgresStore = await VercelPostgres.initialize( - new OpenAIEmbeddings(), - config - ); - - const docGreen = { - pageContent: "Hi, I am the color green.", - metadata: { color: "green" }, - }; - const docBlue = { - pageContent: "Hi, I am the color blue.", - metadata: { color: "blue" }, - }; - const docYellow = { - pageContent: "Hi, I am the color yellow.", - metadata: { color: "yellow" }, - }; - const docIrrelevant = { - pageContent: "Hi, I am an irrelevant doc without metadata.", - metadata: {}, - }; - - await vercelPostgresStore.addDocuments([ - docGreen, - docBlue, - docYellow, - docIrrelevant, - ]); - - const results1 = await vercelPostgresStore.similaritySearch("color", 5, { - color: "blue", - }); - - expect(results1).toHaveLength(1); - - const results2 = await vercelPostgresStore.similaritySearch( - "irrelevant query", - 5, - { - color: { in: ["blue", "yellow"] }, - } - ); - - expect(results2).toHaveLength(2); - - const results2WithColorGreen = results2.filter( - (result) => result.metadata.color === "green" - ); - - expect(results2WithColorGreen).toHaveLength(0); - }); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts deleted file mode 100644 index 27af3a74bab0..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { Voy as VoyOriginClient } from "voy-search"; -import { Document } from "@langchain/core/documents"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { VoyVectorStore } from "../voy.js"; - -const client = new VoyOriginClient(); - -test("it can create index using Voy.from text, add new elements to the index and get queried documents", async () => { - const vectorStore = await VoyVectorStore.fromTexts( - ["initial first page", "initial second page"], - [{ id: 1 }, { id: 2 }], - new OpenAIEmbeddings(), - client - ); - // the number of dimensions is produced by OpenAI - expect(vectorStore.numDimensions).toBe(1536); - await vectorStore.addDocuments([ - new Document({ - pageContent: "added first page", - metadata: { id: 5 }, - }), - new Document({ - pageContent: "added second page", - metadata: { id: 4 }, - }), - new Document({ - pageContent: "added third page", - metadata: { id: 6 }, - }), - ]); - expect(vectorStore.docstore.length).toBe(5); - await vectorStore.addDocuments([ - new Document({ - pageContent: "added another first page", - metadata: { id: 7 }, - }), - ]); - const results = await vectorStore.similaritySearchWithScore("added first", 6); - expect(results.length).toBe(6); - await vectorStore.delete({ - deleteAll: true, - }); - const results2 = await vectorStore.similaritySearchWithScore( - "added first", - 6 - ); - expect(results2.length).toBe(0); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/voy.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.test.ts deleted file mode 100644 index b941e94ca72b..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/voy.test.ts +++ /dev/null @@ -1,56 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { Document } from "@langchain/core/documents"; -import { FakeEmbeddings } from "../../util/testing.js"; -import { VoyVectorStore, VoyClient } from "../voy.js"; - -const fakeClient: VoyClient = { - index: ({ embeddings }) => embeddings.map((i) => i.id).join(","), - add: (_) => {}, - search: () => ({ - neighbors: [ - { id: "0", title: "", url: "" }, - { id: "1", title: "", url: "" }, - ], - }), - clear: () => {}, -}; - -test("it can create index using Voy.from text, add new elements to the index and get queried documents", async () => { - const vectorStore = await VoyVectorStore.fromTexts( - ["initial first page", "initial second page"], - [{ id: 1 }, { id: 2 }], - new FakeEmbeddings(), - fakeClient - ); - - // the number of dimensions is produced by fake embeddings - expect(vectorStore.numDimensions).toBe(4); - await vectorStore.addVectors( - [ - [0, 1, 0, 0], - [1, 0, 0, 0], - [0.5, 0.5, 0.5, 0.5], - ], - [ - new Document({ - pageContent: "added first page", - metadata: { id: 5 }, - }), - new Document({ - pageContent: "added second page", - metadata: { id: 4 }, - }), - new Document({ - pageContent: "added third page", - metadata: { id: 6 }, - }), - ] - ); - expect(vectorStore.docstore.length).toBe(5); - const results = await vectorStore.similaritySearchVectorWithScore( - [1, 0, 0, 0], - 3 - ); - expect(results[0][0].metadata.id).toBe(1); - expect(results[1][0].metadata.id).toBe(2); -}); diff --git a/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts deleted file mode 100644 index a0d03964a893..000000000000 --- a/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts +++ /dev/null @@ -1,166 +0,0 @@ -/* eslint-disable no-process-env */ -// eslint-disable-next-line import/no-extraneous-dependencies -import { BaseClient } from "@xata.io/client"; - -import { OpenAIEmbeddings } from "@langchain/openai"; -import { Document } from "@langchain/core/documents"; -import { XataVectorSearch } from "../xata.js"; - -// Tests require a DB with a table called "docs" with: -// * a column name content of type Text -// * a column named embedding of type Vector -// * a column named a of type Integer - -test.skip("XataVectorSearch integration", async () => { - if (!process.env.XATA_API_KEY) { - throw new Error("XATA_API_KEY not set"); - } - - if (!process.env.XATA_DB_URL) { - throw new Error("XATA_DB_URL not set"); - } - const xata = new BaseClient({ - databaseURL: process.env.XATA_DB_URL, - apiKey: process.env.XATA_API_KEY, - branch: process.env.XATA_BRANCH || "main", - }); - - const table = "docs"; - const embeddings = new OpenAIEmbeddings(); - - const store = new XataVectorSearch(embeddings, { client: xata, table }); - expect(store).toBeDefined(); - - const createdAt = new Date().getTime(); - - const ids1 = await store.addDocuments([ - { pageContent: "hello", metadata: { a: createdAt + 1 } }, - { pageContent: "car", metadata: { a: createdAt } }, - { pageContent: "adjective", metadata: { a: createdAt } }, - { pageContent: "hi", metadata: { a: createdAt } }, - ]); - - let results1 = await store.similaritySearch("hello!", 1); - - // search store is eventually consistent so we need to retry if nothing is - // returned - for (let i = 0; i < 5 && results1.length === 0; i += 1) { - results1 = await store.similaritySearch("hello!", 1); - // eslint-disable-next-line no-promise-executor-return - await new Promise((r) => setTimeout(r, 1000)); - } - - expect(results1).toHaveLength(1); - expect(results1).toEqual([ - new Document({ metadata: { a: createdAt + 1 }, pageContent: "hello" }), - ]); - - const results2 = await store.similaritySearchWithScore("testing!", 6, { - a: createdAt, - }); - expect(results2).toHaveLength(3); - - const ids2 = await store.addDocuments( - [ - { pageContent: "hello upserted", metadata: { a: createdAt + 1 } }, - { pageContent: "car upserted", metadata: { a: createdAt } }, - { pageContent: "adjective upserted", metadata: { a: createdAt } }, - { pageContent: "hi upserted", metadata: { a: createdAt } }, - ], - { ids: ids1 } - ); - - expect(ids1).toEqual(ids2); - - const results3 = await store.similaritySearchWithScore("testing!", 6, { - a: createdAt, - }); - - expect(results3).toHaveLength(3); - - await store.delete({ ids: ids1.slice(2) }); - - let results4 = await store.similaritySearchWithScore("testing!", 3, { - a: createdAt, - }); - for (let i = 0; i < 5 && results4.length > 1; i += 1) { - results4 = await store.similaritySearchWithScore("testing!", 3, { - a: createdAt, - }); - // eslint-disable-next-line no-promise-executor-return - await new Promise((r) => setTimeout(r, 1000)); - } - - expect(results4).toHaveLength(1); - - await store.delete({ ids: ids1 }); - let results5 = await store.similaritySearch("hello!", 1); - for (let i = 0; i < 5 && results1.length > 0; i += 1) { - results5 = await store.similaritySearch("hello!", 1); - // eslint-disable-next-line no-promise-executor-return - await new Promise((r) => setTimeout(r, 1000)); - } - expect(results5).toHaveLength(0); -}); - -test.skip("Search a XataVectorSearch using a metadata filter", async () => { - if (!process.env.XATA_API_KEY) { - throw new Error("XATA_API_KEY not set"); - } - - if (!process.env.XATA_DB_URL) { - throw new Error("XATA_DB_URL not set"); - } - const xata = new BaseClient({ - databaseURL: process.env.XATA_DB_URL, - apiKey: process.env.XATA_API_KEY, - branch: process.env.XATA_BRANCH || "main", - }); - - const table = "docs"; - const embeddings = new OpenAIEmbeddings(); - - const store = new XataVectorSearch(embeddings, { client: xata, table }); - expect(store).toBeDefined(); - - const createdAt = new Date().getTime(); - - const ids = await store.addDocuments([ - { pageContent: "hello 0", metadata: { a: createdAt } }, - { pageContent: "hello 1", metadata: { a: createdAt + 1 } }, - { pageContent: "hello 2", metadata: { a: createdAt + 2 } }, - { pageContent: "hello 3", metadata: { a: createdAt + 3 } }, - ]); - - // search store is eventually consistent so we need to retry if nothing is - // returned - let results1 = await store.similaritySearch("hello!", 1); - for (let i = 0; i < 5 && results1.length < 4; i += 1) { - results1 = await store.similaritySearch("hello", 6); - // eslint-disable-next-line no-promise-executor-return - await new Promise((r) => setTimeout(r, 1000)); - } - - expect(results1).toHaveLength(4); - - const results = await store.similaritySearch("hello", 1, { - a: createdAt + 2, - }); - expect(results).toHaveLength(1); - - expect(results).toEqual([ - new Document({ - metadata: { a: createdAt + 2 }, - pageContent: "hello 2", - }), - ]); - - await store.delete({ ids }); - let results5 = await store.similaritySearch("hello!", 1); - for (let i = 0; i < 5 && results1.length > 0; i += 1) { - results5 = await store.similaritySearch("hello", 1); - // eslint-disable-next-line no-promise-executor-return - await new Promise((r) => setTimeout(r, 1000)); - } - expect(results5).toHaveLength(0); -}); diff --git a/libs/langchain-community/src/vectorstores/vectara.ts b/libs/langchain-community/src/vectorstores/vectara.ts index 34c8ed5a9579..d3afa22fadbe 100644 --- a/libs/langchain-community/src/vectorstores/vectara.ts +++ b/libs/langchain-community/src/vectorstores/vectara.ts @@ -4,7 +4,7 @@ import { Embeddings } from "@langchain/core/embeddings"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { FakeEmbeddings } from "../util/testing.js"; +import { FakeEmbeddings } from "../utils/testing.js"; /** * Interface for the arguments required to initialize a VectaraStore From 0b1624f7c47560321b803716f2ed8f58fbef213c Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 02:28:40 -0800 Subject: [PATCH 07/22] Build fixes --- langchain/scripts/check-tree-shaking.js | 1 + langchain/src/load/import_type.d.ts | 29 ------ langchain/src/retrievers/vespa.ts | 93 ++++++++++++++++++- langchain/src/tools/tests/gmail.test.ts | 2 +- langchain/src/util/event-source-parse.ts | 2 +- libs/langchain-community/.gitignore | 3 + libs/langchain-community/package.json | 10 +- .../scripts/create-entrypoints.js | 1 + .../src/chat_models/cloudflare_workersai.ts | 2 +- .../src/llms/cloudflare_workersai.ts | 2 +- .../src/load/import_map.ts | 1 + ...-source-parse.ts => event_source_parse.ts} | 0 package.json | 2 +- turbo.json | 9 +- 14 files changed, 118 insertions(+), 39 deletions(-) rename libs/langchain-community/src/utils/{event-source-parse.ts => event_source_parse.ts} (100%) diff --git a/langchain/scripts/check-tree-shaking.js b/langchain/scripts/check-tree-shaking.js index 66a1f194199f..bae5213269d7 100644 --- a/langchain/scripts/check-tree-shaking.js +++ b/langchain/scripts/check-tree-shaking.js @@ -28,6 +28,7 @@ export function listExternals() { /node\:/, /js-tiktoken/, /@langchain\/core/, + /@langchain\/community/, "axios", // axios is a dependency of openai "convex", "convex/server", diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index 9b2b3abbde95..68515bbf4995 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -526,29 +526,9 @@ export interface SecretMap { AWS_SECRET_ACCESS_KEY?: string; AWS_SESSION_TOKEN?: string; AZURE_OPENAI_API_KEY?: string; - BAIDU_API_KEY?: string; - BAIDU_SECRET_KEY?: string; - BEDROCK_AWS_ACCESS_KEY_ID?: string; - BEDROCK_AWS_SECRET_ACCESS_KEY?: string; - CLOUDFLARE_API_TOKEN?: string; - COHERE_API_KEY?: string; - DATABERRY_API_KEY?: string; - FIREWORKS_API_KEY?: string; - GOOGLE_API_KEY?: string; - GOOGLE_PALM_API_KEY?: string; - GOOGLE_PLACES_API_KEY?: string; - GOOGLE_VERTEX_AI_WEB_CREDENTIALS?: string; - GRADIENT_ACCESS_TOKEN?: string; - GRADIENT_WORKSPACE_ID?: string; - HUGGINGFACEHUB_API_KEY?: string; - IBM_CLOUD_API_KEY?: string; - IFLYTEK_API_KEY?: string; - IFLYTEK_API_SECRET?: string; MILVUS_PASSWORD?: string; MILVUS_SSL?: string; MILVUS_USERNAME?: string; - MINIMAX_API_KEY?: string; - MINIMAX_GROUP_ID?: string; OPENAI_API_KEY?: string; OPENAI_ORGANIZATION?: string; PLANETSCALE_DATABASE_URL?: string; @@ -562,19 +542,10 @@ export interface SecretMap { REDIS_URL?: string; REDIS_USERNAME?: string; REMOTE_RETRIEVER_AUTH_BEARER?: string; - REPLICATE_API_TOKEN?: string; - SEARXNG_API_BASE?: string; UPSTASH_REDIS_REST_TOKEN?: string; UPSTASH_REDIS_REST_URL?: string; VECTARA_API_KEY?: string; VECTARA_CORPUS_ID?: string; VECTARA_CUSTOMER_ID?: string; - WATSONX_PROJECT_ID?: string; - WRITER_API_KEY?: string; - WRITER_ORG_ID?: string; - YC_API_KEY?: string; - YC_IAM_TOKEN?: string; ZAPIER_NLA_API_KEY?: string; - ZEP_API_KEY?: string; - ZEP_API_URL?: string; } diff --git a/langchain/src/retrievers/vespa.ts b/langchain/src/retrievers/vespa.ts index 234e3e745f47..09b3b3e46763 100644 --- a/langchain/src/retrievers/vespa.ts +++ b/langchain/src/retrievers/vespa.ts @@ -1 +1,92 @@ -export * from "@langchain/community/retrievers/amazon_kendra"; \ No newline at end of file +import { Document } from "../document.js"; +import { + RemoteRetriever, + RemoteRetrieverValues, + RemoteRetrieverParams, +} from "./remote/base.js"; + +export interface VespaRetrieverParams extends RemoteRetrieverParams { + /** + * The body of the query to send to Vespa + */ + query_body: object; + /** + * The name of the field the content resides in + */ + content_field: string; +} + +/** + * Class responsible for retrieving data from Vespa. It extends the + * `RemoteRetriever` class and includes methods for creating the JSON body + * for a query and processing the JSON response from Vespa. + * @example + * ```typescript + * const retriever = new VespaRetriever({ + * url: "https: + * auth: false, + * query_body: { + * yql: "select content from paragraph where userQuery()", + * hits: 5, + * ranking: "documentation", + * locale: "en-us", + * }, + * content_field: "content", + * }); + * const result = await retriever.getRelevantDocuments("what is vespa?"); + * ``` + */ +export class VespaRetriever extends RemoteRetriever { + static lc_name() { + return "VespaRetriever"; + } + + lc_namespace = ["langchain", "retrievers", "vespa"]; + + query_body: object; + + content_field: string; + + constructor(fields: VespaRetrieverParams) { + super(fields); + this.query_body = fields.query_body; + this.content_field = fields.content_field; + + this.url = `${this.url}/search/?`; + } + + /** + * Method that takes a query string as input and returns a JSON object + * that includes the query and the original `query_body`. + * @param query The query string to be sent to Vespa. + * @returns A JSON object that includes the query and the original `query_body`. + */ + createJsonBody(query: string): RemoteRetrieverValues { + return { + ...this.query_body, + query, + }; + } + + /** + * Method that processes the JSON response from Vespa into an array of + * `Document` instances. Each `Document` instance includes the content + * from the specified `content_field` and the document's ID. + * @param json The JSON response from Vespa. + * @returns An array of `Document` instances. + */ + processJsonResponse(json: RemoteRetrieverValues): Document[] { + return json.root.children.map( + (doc: { + id: string; + relevance: number; + source: string; + fields: Record; + }) => + new Document({ + pageContent: doc.fields[this.content_field] as string, + metadata: { id: doc.id }, + }) + ); + } +} diff --git a/langchain/src/tools/tests/gmail.test.ts b/langchain/src/tools/tests/gmail.test.ts index e44b6f7fef36..8ab6743d0071 100644 --- a/langchain/src/tools/tests/gmail.test.ts +++ b/langchain/src/tools/tests/gmail.test.ts @@ -1,5 +1,5 @@ import { jest, expect, describe } from "@jest/globals"; -import { GmailGetMessage } from "../gmail/get_message.js"; +import { GmailGetMessage } from "../gmail/index.js"; jest.mock("googleapis", () => ({ google: { diff --git a/langchain/src/util/event-source-parse.ts b/langchain/src/util/event-source-parse.ts index 93e538d555a9..5aaedc4c9164 100644 --- a/langchain/src/util/event-source-parse.ts +++ b/langchain/src/util/event-source-parse.ts @@ -1 +1 @@ -export * from "@langchain/community/utils/event-source-parse"; \ No newline at end of file +export * from "@langchain/community/utils/event_source_parse"; \ No newline at end of file diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index df5ca14400ed..fb99a6ef7063 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -310,3 +310,6 @@ caches/upstash_redis.d.ts graphs/neo4j_graph.cjs graphs/neo4j_graph.js graphs/neo4j_graph.d.ts +utils/event_source_parse.cjs +utils/event_source_parse.js +utils/event_source_parse.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 0666be1d853c..c917eb6600e5 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1115,6 +1115,11 @@ "import": "./graphs/neo4j_graph.js", "require": "./graphs/neo4j_graph.cjs" }, + "./utils/event_source_parse": { + "types": "./utils/event_source_parse.d.ts", + "import": "./utils/event_source_parse.js", + "require": "./utils/event_source_parse.cjs" + }, "./package.json": "./package.json" }, "files": [ @@ -1430,6 +1435,9 @@ "caches/upstash_redis.d.ts", "graphs/neo4j_graph.cjs", "graphs/neo4j_graph.js", - "graphs/neo4j_graph.d.ts" + "graphs/neo4j_graph.d.ts", + "utils/event_source_parse.cjs", + "utils/event_source_parse.js", + "utils/event_source_parse.d.ts" ] } diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index 507e8b4f6e8a..b01f87821f3c 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -120,6 +120,7 @@ const entrypoints = { "caches/upstash_redis": "caches/upstash_redis", // graphs "graphs/neo4j_graph": "graphs/neo4j_graph", + "utils/event_source_parse": "utils/event_source_parse", }; // Entrypoints in this list will diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts index 2c76df6fb55c..c85b14cc27bc 100644 --- a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -13,7 +13,7 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import type { CloudflareWorkersAIInput } from "../llms/cloudflare_workersai.js"; -import { convertEventStreamToIterableReadableDataStream } from "../utils/event-source-parse.js"; +import { convertEventStreamToIterableReadableDataStream } from "../utils/event_source_parse.js"; /** * An interface defining the options for a Cloudflare Workers AI call. It extends diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts index fd6f0135f1f6..66a053e482cf 100644 --- a/libs/langchain-community/src/llms/cloudflare_workersai.ts +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -3,7 +3,7 @@ import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { GenerationChunk } from "@langchain/core/outputs"; -import { convertEventStreamToIterableReadableDataStream } from "../utils/event-source-parse.js"; +import { convertEventStreamToIterableReadableDataStream } from "../utils/event_source_parse.js"; /** * Interface for CloudflareWorkersAI input parameters. diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 67274f55160f..67f965138d37 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -40,3 +40,4 @@ export * as retrievers__tavily_search_api from "../retrievers/tavily_search_api. export * as caches__cloudflare_kv from "../caches/cloudflare_kv.js"; export * as caches__momento from "../caches/momento.js"; export * as caches__upstash_redis from "../caches/upstash_redis.js"; +export * as utils__event_source_parse from "../utils/event_source_parse.js"; diff --git a/libs/langchain-community/src/utils/event-source-parse.ts b/libs/langchain-community/src/utils/event_source_parse.ts similarity index 100% rename from libs/langchain-community/src/utils/event-source-parse.ts rename to libs/langchain-community/src/utils/event_source_parse.ts diff --git a/package.json b/package.json index 44b3f60cc84a..c34923c8ed19 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "packageManager": "yarn@3.4.1", "scripts": { "build": "turbo run build --filter=\"!test-exports-*\" --concurrency 1", - "build:deps": "yarn workspace @langchain/core build && yarn workspace @langchain/anthropic build && yarn workspace @langchain/openai build", + "build:deps": "yarn workspace @langchain/core build && yarn workspace @langchain/community build && yarn workspace @langchain/anthropic build && yarn workspace @langchain/openai build", "format": "turbo run format", "format:check": "turbo run format:check", "lint": "turbo run lint --concurrency 1", diff --git a/turbo.json b/turbo.json index 01a50e732e26..18bd38254a0f 100644 --- a/turbo.json +++ b/turbo.json @@ -3,14 +3,17 @@ "globalDependencies": ["**/.env"], "pipeline": { "@langchain/core#build": {}, - "libs/langchain-anthropic#build": { + "@langchain/anthropic#build": { "dependsOn": ["@langchain/core#build"] }, - "libs/langchain-openai#build": { + "@langchain/openai#build": { "dependsOn": ["@langchain/core#build"] }, + "@langchain/community#build": { + "dependsOn": ["@langchain/openai#build"] + }, "build": { - "dependsOn": ["@langchain/core#build", "^build"], + "dependsOn": ["@langchain/core#build", "@langchain/community#build", "^build"], "outputs": ["dist/**", "dist-cjs/**", "*.js", "*.cjs", "*.d.ts"], "inputs": ["src/**", "scripts/**", "package.json", "tsconfig.json"] }, From bd94ddcab9c3c386812334790fb1697afeb75b2e Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 02:39:33 -0800 Subject: [PATCH 08/22] Update lockfile --- yarn.lock | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/yarn.lock b/yarn.lock index 5d968b3ad48a..c47a6b953e56 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8106,7 +8106,7 @@ __metadata: jest: ^29.5.0 jest-environment-node: ^29.6.4 jsdom: ^22.1.0 - langsmith: ^0.0.49 + langsmith: ~0.0.48 llmonitor: ^0.5.9 lodash: ^4.17.21 mammoth: ^1.5.1 @@ -23572,21 +23572,6 @@ __metadata: languageName: unknown linkType: soft -"langsmith@npm:^0.0.49": - version: 0.0.49 - resolution: "langsmith@npm:0.0.49" - dependencies: - "@types/uuid": ^9.0.1 - commander: ^10.0.1 - p-queue: ^6.6.2 - p-retry: 4 - uuid: ^9.0.0 - bin: - langsmith: dist/cli/main.cjs - checksum: 9976d9fe1e4d4ace5041af08d3271dff61d7a87fbd88523b52274817704d282c46a48187cc73af0f7e440dbe4db5da1d221966d1136a71cbfa6115e5159242a8 - languageName: node - linkType: hard - "langsmith@npm:~0.0.48": version: 0.0.48 resolution: "langsmith@npm:0.0.48" From b95ef095ab9b916475ee93b2839c81c1d9f6245c Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Thu, 7 Dec 2023 08:24:32 -0800 Subject: [PATCH 09/22] Fix docs (#3584) --- examples/src/experimental/masking/basic.ts | 2 +- examples/src/experimental/masking/kitchen_sink.ts | 2 +- examples/src/experimental/masking/next.ts | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/examples/src/experimental/masking/basic.ts b/examples/src/experimental/masking/basic.ts index 77a5c78de2b2..094e35fed9e6 100644 --- a/examples/src/experimental/masking/basic.ts +++ b/examples/src/experimental/masking/basic.ts @@ -20,7 +20,7 @@ maskingParser.addTransformer(piiMaskingTransformer); const input = "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; -const masked = await maskingParser.parse(input); +const masked = await maskingParser.mask(input); console.log(masked); // Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] diff --git a/examples/src/experimental/masking/kitchen_sink.ts b/examples/src/experimental/masking/kitchen_sink.ts index 07e85e7fc50f..4743c20a7b2f 100644 --- a/examples/src/experimental/masking/kitchen_sink.ts +++ b/examples/src/experimental/masking/kitchen_sink.ts @@ -70,7 +70,7 @@ const message = // Mask and rehydrate the message maskingParser - .parse(message) + .mask(message) .then((maskedMessage: string) => { console.log(`Masked message: ${maskedMessage}`); return maskingParser.rehydrate(maskedMessage); diff --git a/examples/src/experimental/masking/next.ts b/examples/src/experimental/masking/next.ts index 85621a4b8dca..dac49a781978 100644 --- a/examples/src/experimental/masking/next.ts +++ b/examples/src/experimental/masking/next.ts @@ -36,11 +36,11 @@ export async function POST(req: Request) { const formattedPreviousMessages = messages.slice(0, -1).map(formatMessage); const currentMessageContent = messages[messages.length - 1].content; // Extract the content of the last message // Mask sensitive information in the current message - const guardedMessageContent = await maskingParser.parse( + const guardedMessageContent = await maskingParser.mask( currentMessageContent ); // Mask sensitive information in the chat history - const guardedHistory = await maskingParser.parse( + const guardedHistory = await maskingParser.mask( formattedPreviousMessages.join("\n") ); @@ -64,6 +64,11 @@ export async function POST(req: Request) { headers: { "content-type": "text/plain; charset=utf-8" }, }); } catch (e: any) { - return Response.json({ error: e.message }, { status: 500 }); + return new Response(JSON.stringify({ error: e.message }), { + status: 500, + headers: { + "content-type": "application/json", + }, + }); } } From c3919df071e530348ed4c84220d593f60f2181d6 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Thu, 7 Dec 2023 09:47:54 -0800 Subject: [PATCH 10/22] Fix typo in example (#3585) --- examples/src/experimental/masking/basic.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/experimental/masking/basic.ts b/examples/src/experimental/masking/basic.ts index 094e35fed9e6..7d364ab00449 100644 --- a/examples/src/experimental/masking/basic.ts +++ b/examples/src/experimental/masking/basic.ts @@ -25,6 +25,6 @@ const masked = await maskingParser.mask(input); console.log(masked); // Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] -const rehydrated = maskingParser.rehydrate(masked); +const rehydrated = await maskingParser.rehydrate(masked); console.log(rehydrated); // Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com From 00c7ff15957bf2a5223cfc62878f94bafe9ded22 Mon Sep 17 00:00:00 2001 From: Brace Sproul Date: Thu, 7 Dec 2023 09:55:25 -0800 Subject: [PATCH 11/22] core[docs]: Added get started page to LCEL (#3571) * core[docs]: Added get started page to LCEL * chore: lint files * cr * cr --- .../docs/expression_language/get_started.mdx | 92 +++++++++++++++++++ .../expression_language/get_started/basic.ts | 20 ++++ .../get_started/chat_model.ts | 14 +++ .../get_started/llm_model.ts | 12 +++ .../get_started/output_parser.ts | 12 +++ .../expression_language/get_started/prompt.ts | 34 +++++++ .../expression_language/get_started/rag.ts | 47 ++++++++++ 7 files changed, 231 insertions(+) create mode 100644 docs/core_docs/docs/expression_language/get_started.mdx create mode 100644 examples/src/guides/expression_language/get_started/basic.ts create mode 100644 examples/src/guides/expression_language/get_started/chat_model.ts create mode 100644 examples/src/guides/expression_language/get_started/llm_model.ts create mode 100644 examples/src/guides/expression_language/get_started/output_parser.ts create mode 100644 examples/src/guides/expression_language/get_started/prompt.ts create mode 100644 examples/src/guides/expression_language/get_started/rag.ts diff --git a/docs/core_docs/docs/expression_language/get_started.mdx b/docs/core_docs/docs/expression_language/get_started.mdx new file mode 100644 index 000000000000..a3e6e8035675 --- /dev/null +++ b/docs/core_docs/docs/expression_language/get_started.mdx @@ -0,0 +1,92 @@ +--- +sidebar_position: 0 +title: Get started +--- + +import CodeBlock from "@theme/CodeBlock"; +import BasicExample from "@examples/guides/expression_language/get_started/basic.ts"; +import BasicPromptExample from "@examples/guides/expression_language/get_started/prompt.ts"; +import BasicChatModelExample from "@examples/guides/expression_language/get_started/chat_model.ts"; +import BasicLLMModelExample from "@examples/guides/expression_language/get_started/llm_model.ts"; +import BasicOutputParserExample from "@examples/guides/expression_language/get_started/output_parser.ts"; +import BasicRagExample from "@examples/guides/expression_language/get_started/rag.ts"; + +# Get started + +LCEL makes it easy to build complex chains from basic components, and supports out of the box functionality such as streaming, parallelism, and logging. + +## Basic example: prompt + model + output parser + +The most basic and common use case is chaining a prompt template and a model together. To see how this works, let's create a chain that takes a topic and generates a joke: + +{BasicExample} + +:::tip + +[LangSmith trace](https://smith.langchain.com/public/dcac6d79-5254-4889-a974-4b3abaf605b4/r) + +::: + +Notice in this line we're chaining our prompt, LLM model and output parser together: + +```typescript +const chain = prompt.pipe(model).pipe(outputParser); +``` + +The `.pipe()` method allows for chaining together any number of runnables. It will pass the output of one through to the input of the next. + +Here, the prompt is passed a `topic` and when invoked it returns a formatted string with the `{topic}` input variable replaced with the string we passed to the invoke call. +That string is then passed as the input to the LLM which returns a `BaseMessage` object. Finally, the output parser takes that `BaseMessage` object and returns the content of that object as a string. + +### 1. Prompt + +`prompt` is a `BasePromptTemplate`, which means it takes in an object of template variables and produces a `PromptValue`. +A `PromptValue` is a wrapper around a completed prompt that can be passed to either an `LLM` (which takes a string as input) or `ChatModel` (which takes a sequence of messages as input). +It can work with either language model type because it defines logic both for producing BaseMessages and for producing a string. + +{BasicPromptExample} + +### 2. Model + +The `PromptValue` is then passed to `model`. In this case our `model` is a `ChatModel`, meaning it will output a `BaseMessage`. + +{BasicChatModelExample} + +If our model was an LLM, it would output a string. + +{BasicLLMModelExample} + +### 3. Output parser + +And lastly we pass our `model` output to the `outputParser`, which is a `BaseOutputParser` meaning it takes either a string or a `BaseMessage` as input. The `StringOutputParser` specifically simple converts any input into a string. + +{BasicOutputParserExample} + +## RAG Search Example + +For our next example, we want to run a retrieval-augmented generation chain to add some context when responding to questions. + +{BasicRagExample} + +:::tip + +[LangSmith trace](https://smith.langchain.com/public/f0205e20-c46f-47cd-a3a4-6a95451f8a25/r) + +::: + +In this chain we add some extra logic around retrieving context from a vector store. + +We first instantiated our model, vector store and output parser. Then we defined our prompt, which takes in two input variables: + +- `context` -> this is a string which is returned from our vector store based on a semantic search from the input. +- `question` -> this is the question we want to ask. + +Next we created a `setupAndRetriever` runnable. This has two components which return the values required by our prompt: + +- `context` -> this is a `RunnableLambda` which takes the input from the `.invoke()` call, makes a request to our vector store, and returns the first result. +- `question` -> this uses a `RunnablePassthrough` which simply passes whatever the input was through to the next step, and in our case it returns it to the key in the object we defined. + +Both of these are wrapped inside a `RunnableMap`. This is a special type of runnable that takes an object of runnables and executes them all in parallel. +It then returns an object with the same keys as the input object, but with the values replaced with the output of the runnables. + +Finally, we pass the output of the `setupAndRetriever` to our `prompt` and then to our `model` and `outputParser` as before. diff --git a/examples/src/guides/expression_language/get_started/basic.ts b/examples/src/guides/expression_language/get_started/basic.ts new file mode 100644 index 000000000000..a6035b82a531 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/basic.ts @@ -0,0 +1,20 @@ +import { ChatOpenAI } from "langchain/chat_models/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; +import { StringOutputParser } from "langchain/schema/output_parser"; + +const prompt = ChatPromptTemplate.fromMessages([ + ["human", "Tell me a short joke about {topic}"], +]); +const model = new ChatOpenAI({}); +const outputParser = new StringOutputParser(); + +const chain = prompt.pipe(model).pipe(outputParser); + +const response = await chain.invoke({ + topic: "ice cream", +}); +console.log(response); +/** +Why did the ice cream go to the gym? +Because it wanted to get a little "cone"ditioning! + */ diff --git a/examples/src/guides/expression_language/get_started/chat_model.ts b/examples/src/guides/expression_language/get_started/chat_model.ts new file mode 100644 index 000000000000..f1da2c7c8072 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/chat_model.ts @@ -0,0 +1,14 @@ +import { ChatOpenAI } from "langchain/chat_models/openai"; + +const model = new ChatOpenAI({}); +const promptAsString = "Human: Tell me a short joke about ice cream"; + +const response = await model.invoke(promptAsString); +console.log(response); +/** +AIMessage { + content: 'Sure, here you go: Why did the ice cream go to school? Because it wanted to get a little "sundae" education!', + name: undefined, + additional_kwargs: { function_call: undefined, tool_calls: undefined } +} + */ diff --git a/examples/src/guides/expression_language/get_started/llm_model.ts b/examples/src/guides/expression_language/get_started/llm_model.ts new file mode 100644 index 000000000000..e689a8f828a0 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/llm_model.ts @@ -0,0 +1,12 @@ +import { OpenAI } from "langchain/llms/openai"; + +const model = new OpenAI({}); +const promptAsString = "Human: Tell me a short joke about ice cream"; + +const response = await model.invoke(promptAsString); +console.log(response); +/** +Why did the ice cream go to therapy? + +Because it was feeling a little rocky road. + */ diff --git a/examples/src/guides/expression_language/get_started/output_parser.ts b/examples/src/guides/expression_language/get_started/output_parser.ts new file mode 100644 index 000000000000..7640166c1452 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/output_parser.ts @@ -0,0 +1,12 @@ +import { AIMessage } from "langchain/schema"; +import { StringOutputParser } from "langchain/schema/output_parser"; + +const outputParser = new StringOutputParser(); +const message = new AIMessage( + 'Sure, here you go: Why did the ice cream go to school? Because it wanted to get a little "sundae" education!' +); +const parsed = await outputParser.invoke(message); +console.log(parsed); +/** +Sure, here you go: Why did the ice cream go to school? Because it wanted to get a little "sundae" education! + */ diff --git a/examples/src/guides/expression_language/get_started/prompt.ts b/examples/src/guides/expression_language/get_started/prompt.ts new file mode 100644 index 000000000000..fe178719f954 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/prompt.ts @@ -0,0 +1,34 @@ +import { ChatPromptTemplate } from "langchain/prompts"; + +const prompt = ChatPromptTemplate.fromMessages([ + ["human", "Tell me a short joke about {topic}"], +]); +const promptValue = await prompt.invoke({ topic: "ice cream" }); +console.log(promptValue); +/** +ChatPromptValue { + messages: [ + HumanMessage { + content: 'Tell me a short joke about ice cream', + name: undefined, + additional_kwargs: {} + } + ] +} + */ +const promptAsMessages = promptValue.toChatMessages(); +console.log(promptAsMessages); +/** +[ + HumanMessage { + content: 'Tell me a short joke about ice cream', + name: undefined, + additional_kwargs: {} + } +] + */ +const promptAsString = promptValue.toString(); +console.log(promptAsString); +/** +Human: Tell me a short joke about ice cream + */ diff --git a/examples/src/guides/expression_language/get_started/rag.ts b/examples/src/guides/expression_language/get_started/rag.ts new file mode 100644 index 000000000000..5127c837ff93 --- /dev/null +++ b/examples/src/guides/expression_language/get_started/rag.ts @@ -0,0 +1,47 @@ +import { ChatOpenAI } from "langchain/chat_models/openai"; +import { Document } from "langchain/document"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; +import { + RunnableLambda, + RunnableMap, + RunnablePassthrough, +} from "langchain/runnables"; +import { StringOutputParser } from "langchain/schema/output_parser"; +import { HNSWLib } from "langchain/vectorstores/hnswlib"; + +const vectorStore = await HNSWLib.fromDocuments( + [ + new Document({ pageContent: "Harrison worked at Kensho" }), + new Document({ pageContent: "Bears like to eat honey." }), + ], + new OpenAIEmbeddings() +); +const retriever = vectorStore.asRetriever(1); + +const prompt = ChatPromptTemplate.fromMessages([ + [ + "ai", + `Answer the question based on only the following context: + +{context}`, + ], + ["human", "{question}"], +]); +const model = new ChatOpenAI({}); +const outputParser = new StringOutputParser(); + +const setupAndRetrieval = RunnableMap.from({ + context: new RunnableLambda({ + func: (input: string) => + retriever.invoke(input).then((response) => response[0].pageContent), + }).withConfig({ runName: "contextRetriever" }), + question: new RunnablePassthrough(), +}); +const chain = setupAndRetrieval.pipe(prompt).pipe(model).pipe(outputParser); + +const response = await chain.invoke("Where did Harrison work?"); +console.log(response); +/** +Harrison worked at Kensho. + */ From de265816163e3ea15aec4940d3a1e6dc4fe90c71 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 11:43:27 -0800 Subject: [PATCH 12/22] Format, lint --- docker-compose.yml | 7 + .../scripts/docker-ci-entrypoint.sh | 1 + .../test-exports-bun/package.json | 1 + .../test-exports-cf/package.json | 1 + .../test-exports-cjs/package.json | 1 + .../test-exports-esbuild/package.json | 1 + .../test-exports-esm/package.json | 1 + .../test-exports-vercel/package.json | 1 + .../test-exports-vite/package.json | 1 + langchain-core/src/load/index.ts | 17 ++- langchain/package.json | 2 +- langchain/src/cache/cloudflare_kv.ts | 2 +- langchain/src/cache/momento.ts | 2 +- langchain/src/cache/upstash_redis.ts | 2 +- langchain/src/callbacks/handlers/llmonitor.ts | 2 +- langchain/src/chat_models/baiduwenxin.ts | 2 +- langchain/src/chat_models/bedrock/index.ts | 2 +- langchain/src/chat_models/bedrock/web.ts | 2 +- .../src/chat_models/cloudflare_workersai.ts | 2 +- langchain/src/chat_models/fireworks.ts | 2 +- langchain/src/chat_models/googlepalm.ts | 2 +- .../src/chat_models/googlevertexai/index.ts | 2 +- .../src/chat_models/googlevertexai/web.ts | 2 +- .../src/chat_models/iflytek_xinghuo/index.ts | 2 +- .../src/chat_models/iflytek_xinghuo/web.ts | 2 +- langchain/src/chat_models/llama_cpp.ts | 2 +- langchain/src/chat_models/minimax.ts | 2 +- langchain/src/chat_models/ollama.ts | 2 +- langchain/src/chat_models/portkey.ts | 2 +- langchain/src/chat_models/yandex.ts | 2 +- langchain/src/embeddings/bedrock.ts | 2 +- .../src/embeddings/cloudflare_workersai.ts | 2 +- langchain/src/embeddings/cohere.ts | 2 +- langchain/src/embeddings/googlepalm.ts | 2 +- langchain/src/embeddings/googlevertexai.ts | 2 +- langchain/src/embeddings/gradient_ai.ts | 2 +- langchain/src/embeddings/hf.ts | 2 +- langchain/src/embeddings/hf_transformers.ts | 2 +- langchain/src/embeddings/llama_cpp.ts | 2 +- langchain/src/embeddings/minimax.ts | 2 +- langchain/src/embeddings/ollama.ts | 2 +- langchain/src/embeddings/tensorflow.ts | 2 +- langchain/src/embeddings/voyage.ts | 2 +- .../tests/googlemakersuitehub.test.ts | 14 +- langchain/src/graphs/neo4j_graph.ts | 2 +- langchain/src/llms/ai21.ts | 2 +- langchain/src/llms/aleph_alpha.ts | 2 +- langchain/src/llms/bedrock/web.ts | 2 +- langchain/src/llms/cloudflare_workersai.ts | 2 +- langchain/src/llms/cohere.ts | 2 +- langchain/src/llms/fireworks.ts | 2 +- langchain/src/llms/googlepalm.ts | 2 +- langchain/src/llms/googlevertexai/index.ts | 2 +- langchain/src/llms/googlevertexai/web.ts | 2 +- langchain/src/llms/gradient_ai.ts | 2 +- langchain/src/llms/hf.ts | 2 +- langchain/src/llms/llama_cpp.ts | 2 +- langchain/src/llms/ollama.ts | 2 +- langchain/src/llms/portkey.ts | 2 +- langchain/src/llms/raycast.ts | 2 +- langchain/src/llms/replicate.ts | 2 +- langchain/src/llms/sagemaker_endpoint.ts | 2 +- langchain/src/llms/watsonx_ai.ts | 2 +- langchain/src/llms/writer.ts | 2 +- langchain/src/llms/yandex.ts | 2 +- langchain/src/load/index.ts | 3 +- .../tests/__snapshots__/load.test.ts.snap | 2 +- langchain/src/retrievers/amazon_kendra.ts | 2 +- langchain/src/retrievers/chaindesk.ts | 2 +- langchain/src/retrievers/databerry.ts | 2 +- langchain/src/retrievers/metal.ts | 2 +- langchain/src/retrievers/supabase.ts | 2 +- langchain/src/retrievers/tavily_search_api.ts | 2 +- langchain/src/retrievers/zep.ts | 2 +- langchain/src/tools/IFTTTWebhook.ts | 2 +- langchain/src/tools/aiplugin.ts | 2 +- langchain/src/tools/aws_sfn.ts | 2 +- langchain/src/tools/bingserpapi.ts | 2 +- langchain/src/tools/brave_search.ts | 2 +- langchain/src/tools/connery.ts | 2 +- langchain/src/tools/dadjokeapi.ts | 2 +- langchain/src/tools/dataforseo_api_search.ts | 2 +- langchain/src/tools/gmail/index.ts | 2 +- langchain/src/tools/google_custom_search.ts | 2 +- langchain/src/tools/google_places.ts | 2 +- langchain/src/tools/searchapi.ts | 2 +- langchain/src/tools/searxng_search.ts | 2 +- langchain/src/tools/serpapi.ts | 2 +- langchain/src/tools/serper.ts | 2 +- langchain/src/tools/wikipedia_query_run.ts | 2 +- langchain/src/tools/wolframalpha.ts | 2 +- langchain/src/util/event-source-parse.ts | 2 +- .../scripts/create-entrypoints.js | 2 +- .../src/chat_models/baiduwenxin.ts | 2 +- .../src/chat_models/bedrock/web.ts | 2 +- .../src/chat_models/cloudflare_workersai.ts | 2 +- .../src/chat_models/fireworks.ts | 2 +- .../src/chat_models/googlepalm.ts | 2 +- .../src/chat_models/googlevertexai/common.ts | 2 +- .../src/chat_models/iflytek_xinghuo/common.ts | 2 +- .../src/chat_models/llama_cpp.ts | 2 +- .../src/chat_models/minimax.ts | 2 +- .../src/chat_models/ollama.ts | 2 +- .../src/chat_models/portkey.ts | 2 +- libs/langchain-community/src/index.ts | 2 +- libs/langchain-community/src/llms/ai21.ts | 2 +- .../src/llms/aleph_alpha.ts | 2 +- .../src/llms/bedrock/web.ts | 2 +- .../src/llms/cloudflare_workersai.ts | 2 +- libs/langchain-community/src/llms/cohere.ts | 2 +- .../langchain-community/src/llms/fireworks.ts | 2 +- .../src/llms/googlepalm.ts | 2 +- .../src/llms/googlevertexai/common.ts | 2 +- .../src/llms/gradient_ai.ts | 2 +- libs/langchain-community/src/llms/hf.ts | 2 +- .../langchain-community/src/llms/llama_cpp.ts | 2 +- libs/langchain-community/src/llms/ollama.ts | 2 +- libs/langchain-community/src/llms/portkey.ts | 2 +- libs/langchain-community/src/llms/raycast.ts | 2 +- .../langchain-community/src/llms/replicate.ts | 2 +- .../src/llms/sagemaker_endpoint.ts | 2 +- .../src/llms/watsonx_ai.ts | 2 +- libs/langchain-community/src/llms/writer.ts | 2 +- libs/langchain-community/src/llms/yandex.ts | 2 +- .../src/load/import_constants.ts | 126 +++++++++--------- libs/langchain-community/src/load/index.ts | 21 +-- .../langchain-community/src/tools/aiplugin.ts | 2 +- libs/langchain-community/src/tools/aws_sfn.ts | 2 +- .../src/tools/bingserpapi.ts | 2 +- .../src/tools/brave_search.ts | 2 +- libs/langchain-community/src/tools/connery.ts | 2 +- .../src/tools/dadjokeapi.ts | 2 +- .../src/tools/dataforseo_api_search.ts | 2 +- .../src/tools/gmail/base.ts | 2 +- .../src/tools/google_custom_search.ts | 2 +- .../src/tools/google_places.ts | 2 +- libs/langchain-community/src/tools/ifttt.ts | 2 +- .../src/tools/searchapi.ts | 2 +- .../src/tools/searxng_search.ts | 2 +- libs/langchain-community/src/tools/serpapi.ts | 2 +- libs/langchain-community/src/tools/serper.ts | 2 +- .../src/tools/wikipedia_query_run.ts | 2 +- .../src/tools/wolframalpha.ts | 2 +- yarn.lock | 4 +- 144 files changed, 239 insertions(+), 219 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f050399fc8a9..d07b5cfb3a1b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -25,6 +26,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -39,6 +41,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -53,6 +56,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -67,6 +71,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -81,6 +86,7 @@ services: - ./environment_tests/scripts:/scripts - ./langchain:/langchain - ./langchain-core:/langchain-core + - ./libs/langchain-community:/langchain-community - ./libs/langchain-anthropic:/langchain-anthropic - ./libs/langchain-openai:/langchain-openai command: bash /scripts/docker-ci-entrypoint.sh @@ -92,6 +98,7 @@ services: # - ./environment_tests/scripts:/scripts # - ./langchain:/langchain-workspace # - ./langchain-core:/langchain-core + # - ./libs/langchain-community:/langchain-community-workspace # - ./libs/langchain-anthropic:/langchain-anthropic-workspace # command: bash /scripts/docker-bun-ci-entrypoint.sh success: diff --git a/environment_tests/scripts/docker-ci-entrypoint.sh b/environment_tests/scripts/docker-ci-entrypoint.sh index dd98d4276256..a4e2d45e7d45 100644 --- a/environment_tests/scripts/docker-ci-entrypoint.sh +++ b/environment_tests/scripts/docker-ci-entrypoint.sh @@ -19,6 +19,7 @@ cp ../root/yarn.lock ../root/.yarnrc.yml . # Avoid calling "yarn add ../langchain" as yarn berry does seem to hang for ~30s # before installation actually occurs sed -i 's/"@langchain\/core": "workspace:\*"/"@langchain\/core": "..\/langchain-core"/g' package.json +sed -i 's/"@langchain\/community": "workspace:\*"/"@langchain\/community": "..\/langchain-community"/g' package.json sed -i 's/"@langchain\/anthropic": "workspace:\*"/"@langchain\/anthropic": "..\/langchain-anthropic"/g' package.json sed -i 's/"@langchain\/openai": "workspace:\*"/"@langchain\/openai": "..\/langchain-openai"/g' package.json sed -i 's/"langchain": "workspace:\*"/"langchain": "..\/langchain"/g' package.json diff --git a/environment_tests/test-exports-bun/package.json b/environment_tests/test-exports-bun/package.json index 8294ac754257..649c5e7a1f3d 100644 --- a/environment_tests/test-exports-bun/package.json +++ b/environment_tests/test-exports-bun/package.json @@ -18,6 +18,7 @@ "license": "MIT", "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "d3-dsv": "2", diff --git a/environment_tests/test-exports-cf/package.json b/environment_tests/test-exports-cf/package.json index b87c55713fda..fa01eb216974 100644 --- a/environment_tests/test-exports-cf/package.json +++ b/environment_tests/test-exports-cf/package.json @@ -9,6 +9,7 @@ }, "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "langchain": "workspace:*" diff --git a/environment_tests/test-exports-cjs/package.json b/environment_tests/test-exports-cjs/package.json index b0b43151e2b4..e9aecb14e38d 100644 --- a/environment_tests/test-exports-cjs/package.json +++ b/environment_tests/test-exports-cjs/package.json @@ -19,6 +19,7 @@ "license": "MIT", "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "d3-dsv": "2", diff --git a/environment_tests/test-exports-esbuild/package.json b/environment_tests/test-exports-esbuild/package.json index 15241cd5ef20..5d456fba7335 100644 --- a/environment_tests/test-exports-esbuild/package.json +++ b/environment_tests/test-exports-esbuild/package.json @@ -17,6 +17,7 @@ "license": "MIT", "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "d3-dsv": "2", diff --git a/environment_tests/test-exports-esm/package.json b/environment_tests/test-exports-esm/package.json index a79fc8e8b174..9eeb68a80474 100644 --- a/environment_tests/test-exports-esm/package.json +++ b/environment_tests/test-exports-esm/package.json @@ -20,6 +20,7 @@ "license": "MIT", "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "d3-dsv": "2", diff --git a/environment_tests/test-exports-vercel/package.json b/environment_tests/test-exports-vercel/package.json index c784c3d21a8d..dde8eb4e7128 100644 --- a/environment_tests/test-exports-vercel/package.json +++ b/environment_tests/test-exports-vercel/package.json @@ -10,6 +10,7 @@ }, "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "@types/node": "18.15.11", diff --git a/environment_tests/test-exports-vite/package.json b/environment_tests/test-exports-vite/package.json index 3a3ea14886d5..2240bd9398ca 100644 --- a/environment_tests/test-exports-vite/package.json +++ b/environment_tests/test-exports-vite/package.json @@ -11,6 +11,7 @@ }, "dependencies": { "@langchain/anthropic": "workspace:*", + "@langchain/community": "workspace:*", "@langchain/core": "workspace:*", "@langchain/openai": "workspace:*", "langchain": "workspace:*" diff --git a/langchain-core/src/load/index.ts b/langchain-core/src/load/index.ts index 69e32bc52cbc..870b7a374f93 100644 --- a/langchain-core/src/load/index.ts +++ b/langchain-core/src/load/index.ts @@ -104,15 +104,25 @@ async function reviver( | (typeof finalImportMap)[keyof typeof finalImportMap] | OptionalImportMap[keyof OptionalImportMap] | null = null; + + const optionalImportNamespaceAliases = [namespace.join("/")]; + if (namespace[0] === "langchain_community") { + optionalImportNamespaceAliases.push( + ["langchain", ...namespace.slice(1)].join("/") + ); + } + const matchingNamespaceAlias = optionalImportNamespaceAliases.find( + (alias) => alias in optionalImportsMap + ); if ( defaultOptionalImportEntrypoints .concat(optionalImportEntrypoints) .includes(namespace.join("/")) || - namespace.join("/") in optionalImportsMap + matchingNamespaceAlias ) { - if (namespace.join("/") in optionalImportsMap) { + if (matchingNamespaceAlias !== undefined) { module = await optionalImportsMap[ - namespace.join("/") as keyof typeof optionalImportsMap + matchingNamespaceAlias as keyof typeof optionalImportsMap ]; } else { throw new Error( @@ -126,6 +136,7 @@ async function reviver( if ( namespace[0] === "langchain" || namespace[0] === "langchain_core" || + namespace[0] === "langchain_community" || namespace[0] === "langchain_anthropic" || namespace[0] === "langchain_openai" ) { diff --git a/langchain/package.json b/langchain/package.json index 23dc2e1b82f7..71ae43c0b452 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1435,7 +1435,7 @@ }, "dependencies": { "@anthropic-ai/sdk": "^0.9.1", - "@langchain/community": "workspace:*", + "@langchain/community": "~0.0.0", "@langchain/core": "~0.0.10", "binary-extensions": "^2.2.0", "expr-eval": "^2.0.2", diff --git a/langchain/src/cache/cloudflare_kv.ts b/langchain/src/cache/cloudflare_kv.ts index db657589e95b..b3f86e2b187e 100644 --- a/langchain/src/cache/cloudflare_kv.ts +++ b/langchain/src/cache/cloudflare_kv.ts @@ -1 +1 @@ -export * from "@langchain/community/caches/cloudflare_kv"; \ No newline at end of file +export * from "@langchain/community/caches/cloudflare_kv"; diff --git a/langchain/src/cache/momento.ts b/langchain/src/cache/momento.ts index 3694d0d2154b..0a24cf70f21b 100644 --- a/langchain/src/cache/momento.ts +++ b/langchain/src/cache/momento.ts @@ -1 +1 @@ -export * from "@langchain/community/caches/momento"; \ No newline at end of file +export * from "@langchain/community/caches/momento"; diff --git a/langchain/src/cache/upstash_redis.ts b/langchain/src/cache/upstash_redis.ts index 55e717462226..8e1a82be82e3 100644 --- a/langchain/src/cache/upstash_redis.ts +++ b/langchain/src/cache/upstash_redis.ts @@ -1 +1 @@ -export * from "@langchain/community/caches/upstash_redis"; \ No newline at end of file +export * from "@langchain/community/caches/upstash_redis"; diff --git a/langchain/src/callbacks/handlers/llmonitor.ts b/langchain/src/callbacks/handlers/llmonitor.ts index 792e7468d1a8..92fe3e48f946 100644 --- a/langchain/src/callbacks/handlers/llmonitor.ts +++ b/langchain/src/callbacks/handlers/llmonitor.ts @@ -1 +1 @@ -export * from "@langchain/community/callbacks/handlers/llmonitor"; \ No newline at end of file +export * from "@langchain/community/callbacks/handlers/llmonitor"; diff --git a/langchain/src/chat_models/baiduwenxin.ts b/langchain/src/chat_models/baiduwenxin.ts index af27928d6d21..61ffe21de4d4 100644 --- a/langchain/src/chat_models/baiduwenxin.ts +++ b/langchain/src/chat_models/baiduwenxin.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/baiduwenxin"; \ No newline at end of file +export * from "@langchain/community/chat_models/baiduwenxin"; diff --git a/langchain/src/chat_models/bedrock/index.ts b/langchain/src/chat_models/bedrock/index.ts index 64061633047e..a78523c28a2b 100644 --- a/langchain/src/chat_models/bedrock/index.ts +++ b/langchain/src/chat_models/bedrock/index.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/bedrock"; \ No newline at end of file +export * from "@langchain/community/chat_models/bedrock"; diff --git a/langchain/src/chat_models/bedrock/web.ts b/langchain/src/chat_models/bedrock/web.ts index c21182a1ce2d..9ef97f768f8a 100644 --- a/langchain/src/chat_models/bedrock/web.ts +++ b/langchain/src/chat_models/bedrock/web.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/bedrock/web"; \ No newline at end of file +export * from "@langchain/community/chat_models/bedrock/web"; diff --git a/langchain/src/chat_models/cloudflare_workersai.ts b/langchain/src/chat_models/cloudflare_workersai.ts index 702fbab8dc60..009e5183f0a0 100644 --- a/langchain/src/chat_models/cloudflare_workersai.ts +++ b/langchain/src/chat_models/cloudflare_workersai.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/cloudflare_workersai"; \ No newline at end of file +export * from "@langchain/community/chat_models/cloudflare_workersai"; diff --git a/langchain/src/chat_models/fireworks.ts b/langchain/src/chat_models/fireworks.ts index 9903af0282f9..438f96c7c33d 100644 --- a/langchain/src/chat_models/fireworks.ts +++ b/langchain/src/chat_models/fireworks.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/fireworks"; \ No newline at end of file +export * from "@langchain/community/chat_models/fireworks"; diff --git a/langchain/src/chat_models/googlepalm.ts b/langchain/src/chat_models/googlepalm.ts index ff52c6cd4cf2..53f338a9674d 100644 --- a/langchain/src/chat_models/googlepalm.ts +++ b/langchain/src/chat_models/googlepalm.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/googlepalm"; \ No newline at end of file +export * from "@langchain/community/chat_models/googlepalm"; diff --git a/langchain/src/chat_models/googlevertexai/index.ts b/langchain/src/chat_models/googlevertexai/index.ts index c116919b8417..b2977f65c20a 100644 --- a/langchain/src/chat_models/googlevertexai/index.ts +++ b/langchain/src/chat_models/googlevertexai/index.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/googlevertexai"; \ No newline at end of file +export * from "@langchain/community/chat_models/googlevertexai"; diff --git a/langchain/src/chat_models/googlevertexai/web.ts b/langchain/src/chat_models/googlevertexai/web.ts index 1bf66b9c05f0..4c350a89266a 100644 --- a/langchain/src/chat_models/googlevertexai/web.ts +++ b/langchain/src/chat_models/googlevertexai/web.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/googlevertexai/web"; \ No newline at end of file +export * from "@langchain/community/chat_models/googlevertexai/web"; diff --git a/langchain/src/chat_models/iflytek_xinghuo/index.ts b/langchain/src/chat_models/iflytek_xinghuo/index.ts index 3ad6aa3eb4b2..9b988a537704 100644 --- a/langchain/src/chat_models/iflytek_xinghuo/index.ts +++ b/langchain/src/chat_models/iflytek_xinghuo/index.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/iflytek_xinghuo"; \ No newline at end of file +export * from "@langchain/community/chat_models/iflytek_xinghuo"; diff --git a/langchain/src/chat_models/iflytek_xinghuo/web.ts b/langchain/src/chat_models/iflytek_xinghuo/web.ts index 0fe64665b992..8867445ee59c 100644 --- a/langchain/src/chat_models/iflytek_xinghuo/web.ts +++ b/langchain/src/chat_models/iflytek_xinghuo/web.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/iflytek_xinghuo/web"; \ No newline at end of file +export * from "@langchain/community/chat_models/iflytek_xinghuo/web"; diff --git a/langchain/src/chat_models/llama_cpp.ts b/langchain/src/chat_models/llama_cpp.ts index 1ecd1b008fde..ae06e6116cfc 100644 --- a/langchain/src/chat_models/llama_cpp.ts +++ b/langchain/src/chat_models/llama_cpp.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/llama_cpp"; \ No newline at end of file +export * from "@langchain/community/chat_models/llama_cpp"; diff --git a/langchain/src/chat_models/minimax.ts b/langchain/src/chat_models/minimax.ts index 7cc9bd240b18..6cb2a1436dbd 100644 --- a/langchain/src/chat_models/minimax.ts +++ b/langchain/src/chat_models/minimax.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/minimax"; \ No newline at end of file +export * from "@langchain/community/chat_models/minimax"; diff --git a/langchain/src/chat_models/ollama.ts b/langchain/src/chat_models/ollama.ts index 5c078d77a912..175e84ab3511 100644 --- a/langchain/src/chat_models/ollama.ts +++ b/langchain/src/chat_models/ollama.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/ollama"; \ No newline at end of file +export * from "@langchain/community/chat_models/ollama"; diff --git a/langchain/src/chat_models/portkey.ts b/langchain/src/chat_models/portkey.ts index cf17722ab306..41b8ae09537c 100644 --- a/langchain/src/chat_models/portkey.ts +++ b/langchain/src/chat_models/portkey.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/portkey"; \ No newline at end of file +export * from "@langchain/community/chat_models/portkey"; diff --git a/langchain/src/chat_models/yandex.ts b/langchain/src/chat_models/yandex.ts index 9a1100723334..3dbac8ffb540 100644 --- a/langchain/src/chat_models/yandex.ts +++ b/langchain/src/chat_models/yandex.ts @@ -1 +1 @@ -export * from "@langchain/community/chat_models/yandex"; \ No newline at end of file +export * from "@langchain/community/chat_models/yandex"; diff --git a/langchain/src/embeddings/bedrock.ts b/langchain/src/embeddings/bedrock.ts index bbc7a702cb60..0158430fd0a5 100644 --- a/langchain/src/embeddings/bedrock.ts +++ b/langchain/src/embeddings/bedrock.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/bedrock"; \ No newline at end of file +export * from "@langchain/community/embeddings/bedrock"; diff --git a/langchain/src/embeddings/cloudflare_workersai.ts b/langchain/src/embeddings/cloudflare_workersai.ts index a5a6050cd44b..b5d4cd6238a8 100644 --- a/langchain/src/embeddings/cloudflare_workersai.ts +++ b/langchain/src/embeddings/cloudflare_workersai.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/cloudflare_workersai"; \ No newline at end of file +export * from "@langchain/community/embeddings/cloudflare_workersai"; diff --git a/langchain/src/embeddings/cohere.ts b/langchain/src/embeddings/cohere.ts index e65f3be38b9f..83eb358d4e37 100644 --- a/langchain/src/embeddings/cohere.ts +++ b/langchain/src/embeddings/cohere.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/cohere"; \ No newline at end of file +export * from "@langchain/community/embeddings/cohere"; diff --git a/langchain/src/embeddings/googlepalm.ts b/langchain/src/embeddings/googlepalm.ts index 07625a3217c9..950bc455c2c6 100644 --- a/langchain/src/embeddings/googlepalm.ts +++ b/langchain/src/embeddings/googlepalm.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/googlepalm"; \ No newline at end of file +export * from "@langchain/community/embeddings/googlepalm"; diff --git a/langchain/src/embeddings/googlevertexai.ts b/langchain/src/embeddings/googlevertexai.ts index 2e5377894e49..3eded4165344 100644 --- a/langchain/src/embeddings/googlevertexai.ts +++ b/langchain/src/embeddings/googlevertexai.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/googlevertexai"; \ No newline at end of file +export * from "@langchain/community/embeddings/googlevertexai"; diff --git a/langchain/src/embeddings/gradient_ai.ts b/langchain/src/embeddings/gradient_ai.ts index d5a860615d3c..83b02683d3d9 100644 --- a/langchain/src/embeddings/gradient_ai.ts +++ b/langchain/src/embeddings/gradient_ai.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/gradient_ai"; \ No newline at end of file +export * from "@langchain/community/embeddings/gradient_ai"; diff --git a/langchain/src/embeddings/hf.ts b/langchain/src/embeddings/hf.ts index 27686094c28a..7042535179ed 100644 --- a/langchain/src/embeddings/hf.ts +++ b/langchain/src/embeddings/hf.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/hf"; \ No newline at end of file +export * from "@langchain/community/embeddings/hf"; diff --git a/langchain/src/embeddings/hf_transformers.ts b/langchain/src/embeddings/hf_transformers.ts index eb38d9a597ec..aff3ec98244e 100644 --- a/langchain/src/embeddings/hf_transformers.ts +++ b/langchain/src/embeddings/hf_transformers.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/hf_transformers"; \ No newline at end of file +export * from "@langchain/community/embeddings/hf_transformers"; diff --git a/langchain/src/embeddings/llama_cpp.ts b/langchain/src/embeddings/llama_cpp.ts index f8d30024a0d9..3fcf55dda59a 100644 --- a/langchain/src/embeddings/llama_cpp.ts +++ b/langchain/src/embeddings/llama_cpp.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/llama_cpp"; \ No newline at end of file +export * from "@langchain/community/embeddings/llama_cpp"; diff --git a/langchain/src/embeddings/minimax.ts b/langchain/src/embeddings/minimax.ts index c2576a58b278..8568bf09207e 100644 --- a/langchain/src/embeddings/minimax.ts +++ b/langchain/src/embeddings/minimax.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/minimax"; \ No newline at end of file +export * from "@langchain/community/embeddings/minimax"; diff --git a/langchain/src/embeddings/ollama.ts b/langchain/src/embeddings/ollama.ts index 2b6d5602fdf1..9bd994c08dd9 100644 --- a/langchain/src/embeddings/ollama.ts +++ b/langchain/src/embeddings/ollama.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/ollama"; \ No newline at end of file +export * from "@langchain/community/embeddings/ollama"; diff --git a/langchain/src/embeddings/tensorflow.ts b/langchain/src/embeddings/tensorflow.ts index 2f1919d08352..cd09a7c82cf5 100644 --- a/langchain/src/embeddings/tensorflow.ts +++ b/langchain/src/embeddings/tensorflow.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/tensorflow"; \ No newline at end of file +export * from "@langchain/community/embeddings/tensorflow"; diff --git a/langchain/src/embeddings/voyage.ts b/langchain/src/embeddings/voyage.ts index eb93200b313d..ea725613d118 100644 --- a/langchain/src/embeddings/voyage.ts +++ b/langchain/src/embeddings/voyage.ts @@ -1 +1 @@ -export * from "@langchain/community/embeddings/voyage"; \ No newline at end of file +export * from "@langchain/community/embeddings/voyage"; diff --git a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts index cd12a3de87c3..38253bde4037 100644 --- a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts +++ b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts @@ -51,7 +51,11 @@ describe("Google Maker Suite Hub", () => { const prompt = new MakerSuitePrompt(textFile); const model = prompt.toModel(); // console.log(model.lc_namespace); - expect(model.lc_namespace).toEqual(["langchain", "llms", "googlepalm"]); + expect(model.lc_namespace).toEqual([ + "langchain_community", + "llms", + "googlepalm", + ]); }); test("data type", () => { @@ -77,7 +81,11 @@ describe("Google Maker Suite Hub", () => { test("data model", () => { const prompt = new MakerSuitePrompt(dataFile); const model = prompt.toModel(); - expect(model.lc_namespace).toEqual(["langchain", "llms", "googlepalm"]); + expect(model.lc_namespace).toEqual([ + "langchain_community", + "llms", + "googlepalm", + ]); }); test("chat type", () => { @@ -89,7 +97,7 @@ describe("Google Maker Suite Hub", () => { const prompt = new MakerSuitePrompt(chatFile); const model = prompt.toModel(); expect(model.lc_namespace).toEqual([ - "langchain", + "langchain_community", "chat_models", "googlepalm", ]); diff --git a/langchain/src/graphs/neo4j_graph.ts b/langchain/src/graphs/neo4j_graph.ts index e1be4e2c49cf..99cc0011f46a 100644 --- a/langchain/src/graphs/neo4j_graph.ts +++ b/langchain/src/graphs/neo4j_graph.ts @@ -1 +1 @@ -export * from "@langchain/community/graphs/neo4j_graph"; \ No newline at end of file +export * from "@langchain/community/graphs/neo4j_graph"; diff --git a/langchain/src/llms/ai21.ts b/langchain/src/llms/ai21.ts index da34e9c6da84..d68f4b637a87 100644 --- a/langchain/src/llms/ai21.ts +++ b/langchain/src/llms/ai21.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/ai21"; \ No newline at end of file +export * from "@langchain/community/llms/ai21"; diff --git a/langchain/src/llms/aleph_alpha.ts b/langchain/src/llms/aleph_alpha.ts index d3aa0002c4b2..1d0c12be8e26 100644 --- a/langchain/src/llms/aleph_alpha.ts +++ b/langchain/src/llms/aleph_alpha.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/aleph_alpha"; \ No newline at end of file +export * from "@langchain/community/llms/aleph_alpha"; diff --git a/langchain/src/llms/bedrock/web.ts b/langchain/src/llms/bedrock/web.ts index 578b2f90b7e2..3ed2ccdd2650 100644 --- a/langchain/src/llms/bedrock/web.ts +++ b/langchain/src/llms/bedrock/web.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/bedrock/web"; \ No newline at end of file +export * from "@langchain/community/llms/bedrock/web"; diff --git a/langchain/src/llms/cloudflare_workersai.ts b/langchain/src/llms/cloudflare_workersai.ts index 015cee52bb8e..2e5add32a465 100644 --- a/langchain/src/llms/cloudflare_workersai.ts +++ b/langchain/src/llms/cloudflare_workersai.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/cloudflare_workersai"; \ No newline at end of file +export * from "@langchain/community/llms/cloudflare_workersai"; diff --git a/langchain/src/llms/cohere.ts b/langchain/src/llms/cohere.ts index 0aa63c4ec6a4..8b911819109e 100644 --- a/langchain/src/llms/cohere.ts +++ b/langchain/src/llms/cohere.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/cohere"; \ No newline at end of file +export * from "@langchain/community/llms/cohere"; diff --git a/langchain/src/llms/fireworks.ts b/langchain/src/llms/fireworks.ts index b65fa31f7f93..c8ff87b2d830 100644 --- a/langchain/src/llms/fireworks.ts +++ b/langchain/src/llms/fireworks.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/fireworks"; \ No newline at end of file +export * from "@langchain/community/llms/fireworks"; diff --git a/langchain/src/llms/googlepalm.ts b/langchain/src/llms/googlepalm.ts index 44de2cf9d3f1..5e95faa32f13 100644 --- a/langchain/src/llms/googlepalm.ts +++ b/langchain/src/llms/googlepalm.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/googlepalm"; \ No newline at end of file +export * from "@langchain/community/llms/googlepalm"; diff --git a/langchain/src/llms/googlevertexai/index.ts b/langchain/src/llms/googlevertexai/index.ts index c4e23fa6a7af..0616e82bdd49 100644 --- a/langchain/src/llms/googlevertexai/index.ts +++ b/langchain/src/llms/googlevertexai/index.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/googlevertexai"; \ No newline at end of file +export * from "@langchain/community/llms/googlevertexai"; diff --git a/langchain/src/llms/googlevertexai/web.ts b/langchain/src/llms/googlevertexai/web.ts index 30304d34c5c9..cecd871df86d 100644 --- a/langchain/src/llms/googlevertexai/web.ts +++ b/langchain/src/llms/googlevertexai/web.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/googlevertexai/web"; \ No newline at end of file +export * from "@langchain/community/llms/googlevertexai/web"; diff --git a/langchain/src/llms/gradient_ai.ts b/langchain/src/llms/gradient_ai.ts index c47dee6c5a30..f6a15989551a 100644 --- a/langchain/src/llms/gradient_ai.ts +++ b/langchain/src/llms/gradient_ai.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/gradient_ai"; \ No newline at end of file +export * from "@langchain/community/llms/gradient_ai"; diff --git a/langchain/src/llms/hf.ts b/langchain/src/llms/hf.ts index b0dd59b3a5d6..2f0e767bf6cd 100644 --- a/langchain/src/llms/hf.ts +++ b/langchain/src/llms/hf.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/hf"; \ No newline at end of file +export * from "@langchain/community/llms/hf"; diff --git a/langchain/src/llms/llama_cpp.ts b/langchain/src/llms/llama_cpp.ts index 08380bd67f4d..ce09302c20d3 100644 --- a/langchain/src/llms/llama_cpp.ts +++ b/langchain/src/llms/llama_cpp.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/llama_cpp"; \ No newline at end of file +export * from "@langchain/community/llms/llama_cpp"; diff --git a/langchain/src/llms/ollama.ts b/langchain/src/llms/ollama.ts index b636acd69fa3..1369ed4c0c60 100644 --- a/langchain/src/llms/ollama.ts +++ b/langchain/src/llms/ollama.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/ollama"; \ No newline at end of file +export * from "@langchain/community/llms/ollama"; diff --git a/langchain/src/llms/portkey.ts b/langchain/src/llms/portkey.ts index f915efb8c94a..79b975e5fabc 100644 --- a/langchain/src/llms/portkey.ts +++ b/langchain/src/llms/portkey.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/portkey"; \ No newline at end of file +export * from "@langchain/community/llms/portkey"; diff --git a/langchain/src/llms/raycast.ts b/langchain/src/llms/raycast.ts index df36240c49a0..a36430dc3817 100644 --- a/langchain/src/llms/raycast.ts +++ b/langchain/src/llms/raycast.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/raycast"; \ No newline at end of file +export * from "@langchain/community/llms/raycast"; diff --git a/langchain/src/llms/replicate.ts b/langchain/src/llms/replicate.ts index a88ed8e5889c..72c1ca24a637 100644 --- a/langchain/src/llms/replicate.ts +++ b/langchain/src/llms/replicate.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/replicate"; \ No newline at end of file +export * from "@langchain/community/llms/replicate"; diff --git a/langchain/src/llms/sagemaker_endpoint.ts b/langchain/src/llms/sagemaker_endpoint.ts index 6de4672bc0cd..f9a5e590c0fb 100644 --- a/langchain/src/llms/sagemaker_endpoint.ts +++ b/langchain/src/llms/sagemaker_endpoint.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/sagemaker_endpoint"; \ No newline at end of file +export * from "@langchain/community/llms/sagemaker_endpoint"; diff --git a/langchain/src/llms/watsonx_ai.ts b/langchain/src/llms/watsonx_ai.ts index ccda3df79f20..741308ef5ffb 100644 --- a/langchain/src/llms/watsonx_ai.ts +++ b/langchain/src/llms/watsonx_ai.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/watsonx_ai"; \ No newline at end of file +export * from "@langchain/community/llms/watsonx_ai"; diff --git a/langchain/src/llms/writer.ts b/langchain/src/llms/writer.ts index 202b24f2783a..225f212949d8 100644 --- a/langchain/src/llms/writer.ts +++ b/langchain/src/llms/writer.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/writer"; \ No newline at end of file +export * from "@langchain/community/llms/writer"; diff --git a/langchain/src/llms/yandex.ts b/langchain/src/llms/yandex.ts index 495c0dcf55a3..d19f0acf6b0c 100644 --- a/langchain/src/llms/yandex.ts +++ b/langchain/src/llms/yandex.ts @@ -1 +1 @@ -export * from "@langchain/community/llms/yandex"; \ No newline at end of file +export * from "@langchain/community/llms/yandex"; diff --git a/langchain/src/load/index.ts b/langchain/src/load/index.ts index 0f4fe863a1e7..c0522c9bd151 100644 --- a/langchain/src/load/index.ts +++ b/langchain/src/load/index.ts @@ -7,7 +7,8 @@ export async function load( text: string, // eslint-disable-next-line @typescript-eslint/no-explicit-any secretsMap: Record = {}, - optionalImportsMap: OptionalImportMap = {} + // eslint-disable-next-line @typescript-eslint/no-explicit-any + optionalImportsMap: OptionalImportMap & Record = {} ): Promise { return coreLoad(text, { secretsMap, diff --git a/langchain/src/load/tests/__snapshots__/load.test.ts.snap b/langchain/src/load/tests/__snapshots__/load.test.ts.snap index cf24e88de6c3..0b2f7fcbe5e2 100644 --- a/langchain/src/load/tests/__snapshots__/load.test.ts.snap +++ b/langchain/src/load/tests/__snapshots__/load.test.ts.snap @@ -283,7 +283,7 @@ exports[`serialize + deserialize llm with optional deps 1`] = ` "lc: 1 type: constructor id: - - langchain + - langchain_community - llms - cohere - Cohere diff --git a/langchain/src/retrievers/amazon_kendra.ts b/langchain/src/retrievers/amazon_kendra.ts index 234e3e745f47..8a734c4eea6f 100644 --- a/langchain/src/retrievers/amazon_kendra.ts +++ b/langchain/src/retrievers/amazon_kendra.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/amazon_kendra"; \ No newline at end of file +export * from "@langchain/community/retrievers/amazon_kendra"; diff --git a/langchain/src/retrievers/chaindesk.ts b/langchain/src/retrievers/chaindesk.ts index c62294c251df..f61fccbf7e10 100644 --- a/langchain/src/retrievers/chaindesk.ts +++ b/langchain/src/retrievers/chaindesk.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/chaindesk"; \ No newline at end of file +export * from "@langchain/community/retrievers/chaindesk"; diff --git a/langchain/src/retrievers/databerry.ts b/langchain/src/retrievers/databerry.ts index 49932d11a3dd..39eb9679c95e 100644 --- a/langchain/src/retrievers/databerry.ts +++ b/langchain/src/retrievers/databerry.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/databerry"; \ No newline at end of file +export * from "@langchain/community/retrievers/databerry"; diff --git a/langchain/src/retrievers/metal.ts b/langchain/src/retrievers/metal.ts index 92a8510d11b0..ea8dbb1f03e0 100644 --- a/langchain/src/retrievers/metal.ts +++ b/langchain/src/retrievers/metal.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/metal"; \ No newline at end of file +export * from "@langchain/community/retrievers/metal"; diff --git a/langchain/src/retrievers/supabase.ts b/langchain/src/retrievers/supabase.ts index 9c18d35bf9ff..bd3ea5d07c7c 100644 --- a/langchain/src/retrievers/supabase.ts +++ b/langchain/src/retrievers/supabase.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/supabase"; \ No newline at end of file +export * from "@langchain/community/retrievers/supabase"; diff --git a/langchain/src/retrievers/tavily_search_api.ts b/langchain/src/retrievers/tavily_search_api.ts index 1f906eeb638f..5a59061172d6 100644 --- a/langchain/src/retrievers/tavily_search_api.ts +++ b/langchain/src/retrievers/tavily_search_api.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/tavily_search_api"; \ No newline at end of file +export * from "@langchain/community/retrievers/tavily_search_api"; diff --git a/langchain/src/retrievers/zep.ts b/langchain/src/retrievers/zep.ts index 6519b842da71..319cff8852a6 100644 --- a/langchain/src/retrievers/zep.ts +++ b/langchain/src/retrievers/zep.ts @@ -1 +1 @@ -export * from "@langchain/community/retrievers/zep"; \ No newline at end of file +export * from "@langchain/community/retrievers/zep"; diff --git a/langchain/src/tools/IFTTTWebhook.ts b/langchain/src/tools/IFTTTWebhook.ts index f5f2f2afaae3..a5795255d9d4 100644 --- a/langchain/src/tools/IFTTTWebhook.ts +++ b/langchain/src/tools/IFTTTWebhook.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/ifttt"; \ No newline at end of file +export * from "@langchain/community/tools/ifttt"; diff --git a/langchain/src/tools/aiplugin.ts b/langchain/src/tools/aiplugin.ts index dd4e140e4892..47d7705c5eaf 100644 --- a/langchain/src/tools/aiplugin.ts +++ b/langchain/src/tools/aiplugin.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/aiplugin"; \ No newline at end of file +export * from "@langchain/community/tools/aiplugin"; diff --git a/langchain/src/tools/aws_sfn.ts b/langchain/src/tools/aws_sfn.ts index f9cc54a9d737..5c3eb9d14e0f 100644 --- a/langchain/src/tools/aws_sfn.ts +++ b/langchain/src/tools/aws_sfn.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/aws_sfn"; \ No newline at end of file +export * from "@langchain/community/tools/aws_sfn"; diff --git a/langchain/src/tools/bingserpapi.ts b/langchain/src/tools/bingserpapi.ts index 28d359da9662..378e52d4e2c3 100644 --- a/langchain/src/tools/bingserpapi.ts +++ b/langchain/src/tools/bingserpapi.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/bingserpapi"; \ No newline at end of file +export * from "@langchain/community/tools/bingserpapi"; diff --git a/langchain/src/tools/brave_search.ts b/langchain/src/tools/brave_search.ts index d26347aed42b..07bfc4995109 100644 --- a/langchain/src/tools/brave_search.ts +++ b/langchain/src/tools/brave_search.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/brave_search"; \ No newline at end of file +export * from "@langchain/community/tools/brave_search"; diff --git a/langchain/src/tools/connery.ts b/langchain/src/tools/connery.ts index 8b7542db9940..73bad45cebf7 100644 --- a/langchain/src/tools/connery.ts +++ b/langchain/src/tools/connery.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/connery"; \ No newline at end of file +export * from "@langchain/community/tools/connery"; diff --git a/langchain/src/tools/dadjokeapi.ts b/langchain/src/tools/dadjokeapi.ts index c7ec315bdc27..8b5874b0e2b5 100644 --- a/langchain/src/tools/dadjokeapi.ts +++ b/langchain/src/tools/dadjokeapi.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/dadjokeapi"; \ No newline at end of file +export * from "@langchain/community/tools/dadjokeapi"; diff --git a/langchain/src/tools/dataforseo_api_search.ts b/langchain/src/tools/dataforseo_api_search.ts index 344f916c14f3..00e1aedc3c18 100644 --- a/langchain/src/tools/dataforseo_api_search.ts +++ b/langchain/src/tools/dataforseo_api_search.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/dataforseo_api_search"; \ No newline at end of file +export * from "@langchain/community/tools/dataforseo_api_search"; diff --git a/langchain/src/tools/gmail/index.ts b/langchain/src/tools/gmail/index.ts index 1756394505f0..1c913b663768 100644 --- a/langchain/src/tools/gmail/index.ts +++ b/langchain/src/tools/gmail/index.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/gmail"; \ No newline at end of file +export * from "@langchain/community/tools/gmail"; diff --git a/langchain/src/tools/google_custom_search.ts b/langchain/src/tools/google_custom_search.ts index 7fc55bf7e3b7..5748c242bdaa 100644 --- a/langchain/src/tools/google_custom_search.ts +++ b/langchain/src/tools/google_custom_search.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/google_custom_search"; \ No newline at end of file +export * from "@langchain/community/tools/google_custom_search"; diff --git a/langchain/src/tools/google_places.ts b/langchain/src/tools/google_places.ts index 518b22160db9..a5c7404f9810 100644 --- a/langchain/src/tools/google_places.ts +++ b/langchain/src/tools/google_places.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/google_places"; \ No newline at end of file +export * from "@langchain/community/tools/google_places"; diff --git a/langchain/src/tools/searchapi.ts b/langchain/src/tools/searchapi.ts index 98eec816d9fb..b10e7a37472a 100644 --- a/langchain/src/tools/searchapi.ts +++ b/langchain/src/tools/searchapi.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/searchapi"; \ No newline at end of file +export * from "@langchain/community/tools/searchapi"; diff --git a/langchain/src/tools/searxng_search.ts b/langchain/src/tools/searxng_search.ts index 6fe25a45e714..d792040f540a 100644 --- a/langchain/src/tools/searxng_search.ts +++ b/langchain/src/tools/searxng_search.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/searxng_search"; \ No newline at end of file +export * from "@langchain/community/tools/searxng_search"; diff --git a/langchain/src/tools/serpapi.ts b/langchain/src/tools/serpapi.ts index b4b4c57b92e8..2c3ff83f9b21 100644 --- a/langchain/src/tools/serpapi.ts +++ b/langchain/src/tools/serpapi.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/serpapi"; \ No newline at end of file +export * from "@langchain/community/tools/serpapi"; diff --git a/langchain/src/tools/serper.ts b/langchain/src/tools/serper.ts index e3b237d66e84..275f96a4bc15 100644 --- a/langchain/src/tools/serper.ts +++ b/langchain/src/tools/serper.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/serper"; \ No newline at end of file +export * from "@langchain/community/tools/serper"; diff --git a/langchain/src/tools/wikipedia_query_run.ts b/langchain/src/tools/wikipedia_query_run.ts index 50a73f33b228..f80c7eefd903 100644 --- a/langchain/src/tools/wikipedia_query_run.ts +++ b/langchain/src/tools/wikipedia_query_run.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/wikipedia_query_run"; \ No newline at end of file +export * from "@langchain/community/tools/wikipedia_query_run"; diff --git a/langchain/src/tools/wolframalpha.ts b/langchain/src/tools/wolframalpha.ts index fd5efb48da91..e0e76e072248 100644 --- a/langchain/src/tools/wolframalpha.ts +++ b/langchain/src/tools/wolframalpha.ts @@ -1 +1 @@ -export * from "@langchain/community/tools/wolframalpha"; \ No newline at end of file +export * from "@langchain/community/tools/wolframalpha"; diff --git a/langchain/src/util/event-source-parse.ts b/langchain/src/util/event-source-parse.ts index 5aaedc4c9164..0b74a11974ae 100644 --- a/langchain/src/util/event-source-parse.ts +++ b/langchain/src/util/event-source-parse.ts @@ -1 +1 @@ -export * from "@langchain/community/utils/event_source_parse"; \ No newline at end of file +export * from "@langchain/community/utils/event_source_parse"; diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index b01f87821f3c..f51a44b5e5f8 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -345,7 +345,7 @@ ${[...identifySecrets()] const importConstants = [ "langchain-community", - (k) => ` "@langchain/community/${k}"`, + (k) => ` "langchain_community/${k}"`, "src/load/import_constants.ts", ]; diff --git a/libs/langchain-community/src/chat_models/baiduwenxin.ts b/libs/langchain-community/src/chat_models/baiduwenxin.ts index b618af9a0bad..01a3719bcee7 100644 --- a/libs/langchain-community/src/chat_models/baiduwenxin.ts +++ b/libs/langchain-community/src/chat_models/baiduwenxin.ts @@ -191,7 +191,7 @@ export class ChatBaiduWenxin }; } - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; get lc_aliases(): { [key: string]: string } | undefined { return undefined; diff --git a/libs/langchain-community/src/chat_models/bedrock/web.ts b/libs/langchain-community/src/chat_models/bedrock/web.ts index a6a8b224c469..404cb29cb0d4 100644 --- a/libs/langchain-community/src/chat_models/bedrock/web.ts +++ b/libs/langchain-community/src/chat_models/bedrock/web.ts @@ -125,7 +125,7 @@ export class BedrockChat extends SimpleChatModel implements BaseBedrockInput { lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; get lc_aliases(): Record { return { diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts index c85b14cc27bc..f46c0addb47c 100644 --- a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -52,7 +52,7 @@ export class ChatCloudflareWorkersAI lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; model = "@cf/meta/llama-2-7b-chat-int8"; diff --git a/libs/langchain-community/src/chat_models/fireworks.ts b/libs/langchain-community/src/chat_models/fireworks.ts index f40894b12de2..ac7290d84570 100644 --- a/libs/langchain-community/src/chat_models/fireworks.ts +++ b/libs/langchain-community/src/chat_models/fireworks.ts @@ -57,7 +57,7 @@ export class ChatFireworks extends ChatOpenAI { lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; fireworksApiKey?: string; diff --git a/libs/langchain-community/src/chat_models/googlepalm.ts b/libs/langchain-community/src/chat_models/googlepalm.ts index 21839a578ace..77aa99c31266 100644 --- a/libs/langchain-community/src/chat_models/googlepalm.ts +++ b/libs/langchain-community/src/chat_models/googlepalm.ts @@ -124,7 +124,7 @@ export class ChatGooglePaLM lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; get lc_secrets(): { [key: string]: string } | undefined { return { diff --git a/libs/langchain-community/src/chat_models/googlevertexai/common.ts b/libs/langchain-community/src/chat_models/googlevertexai/common.ts index 82bf1b3f99e4..760430884fe3 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/common.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/common.ts @@ -214,7 +214,7 @@ export class BaseChatGoogleVertexAI AuthOptions >; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; get lc_aliases(): Record { return { diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts index d88d1ad696b1..4501ee2a9fac 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts @@ -187,7 +187,7 @@ export abstract class BaseChatIflytekXinghuo return ["stop", "signal", "options"]; } - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; get lc_secrets(): { [key: string]: string } | undefined { return { diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts index 6ecd35200cbf..05a95d85ff60 100644 --- a/libs/langchain-community/src/chat_models/llama_cpp.ts +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -84,7 +84,7 @@ export class ChatLlamaCpp extends SimpleChatModel { lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; static lc_name() { return "ChatLlamaCpp"; diff --git a/libs/langchain-community/src/chat_models/minimax.ts b/libs/langchain-community/src/chat_models/minimax.ts index 2435f1f83fd1..12d4347c9739 100644 --- a/libs/langchain-community/src/chat_models/minimax.ts +++ b/libs/langchain-community/src/chat_models/minimax.ts @@ -332,7 +332,7 @@ export class ChatMinimax lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; minimaxGroupId?: string; diff --git a/libs/langchain-community/src/chat_models/ollama.ts b/libs/langchain-community/src/chat_models/ollama.ts index 0eb695075354..13c03be23c0d 100644 --- a/libs/langchain-community/src/chat_models/ollama.ts +++ b/libs/langchain-community/src/chat_models/ollama.ts @@ -59,7 +59,7 @@ export class ChatOllama lc_serializable = true; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; model = "llama2"; diff --git a/libs/langchain-community/src/chat_models/portkey.ts b/libs/langchain-community/src/chat_models/portkey.ts index ceb6d6996e12..67076cc50385 100644 --- a/libs/langchain-community/src/chat_models/portkey.ts +++ b/libs/langchain-community/src/chat_models/portkey.ts @@ -81,7 +81,7 @@ export class PortkeyChat extends BaseChatModel { session: PortkeySession; - lc_namespace = ["langchain-community", "chat_models", this._llmType()]; + lc_namespace = ["langchain_community", "chat_models", this._llmType()]; constructor(init?: Partial) { super(init ?? {}); diff --git a/libs/langchain-community/src/index.ts b/libs/langchain-community/src/index.ts index 23ca3db4bfe4..d15abba59766 100644 --- a/libs/langchain-community/src/index.ts +++ b/libs/langchain-community/src/index.ts @@ -1 +1 @@ -// Empty \ No newline at end of file +// Empty diff --git a/libs/langchain-community/src/llms/ai21.ts b/libs/langchain-community/src/llms/ai21.ts index 6f14495c0140..0dcd1169fcf5 100644 --- a/libs/langchain-community/src/llms/ai21.ts +++ b/libs/langchain-community/src/llms/ai21.ts @@ -40,7 +40,7 @@ export interface AI21Input extends BaseLLMParams { export class AI21 extends LLM implements AI21Input { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; model = "j2-jumbo-instruct"; diff --git a/libs/langchain-community/src/llms/aleph_alpha.ts b/libs/langchain-community/src/llms/aleph_alpha.ts index f53e0f19484c..531111ae6092 100644 --- a/libs/langchain-community/src/llms/aleph_alpha.ts +++ b/libs/langchain-community/src/llms/aleph_alpha.ts @@ -51,7 +51,7 @@ export interface AlephAlphaInput extends BaseLLMParams { export class AlephAlpha extends LLM implements AlephAlphaInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; model = "luminous-base"; diff --git a/libs/langchain-community/src/llms/bedrock/web.ts b/libs/langchain-community/src/llms/bedrock/web.ts index 638fe4ce91fe..326bbbefc3b2 100644 --- a/libs/langchain-community/src/llms/bedrock/web.ts +++ b/libs/langchain-community/src/llms/bedrock/web.ts @@ -52,7 +52,7 @@ export class Bedrock extends LLM implements BaseBedrockInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; get lc_aliases(): Record { return { diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts index 66a053e482cf..456271c28737 100644 --- a/libs/langchain-community/src/llms/cloudflare_workersai.ts +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -41,7 +41,7 @@ export class CloudflareWorkersAI lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; constructor(fields?: CloudflareWorkersAIInput & BaseLLMParams) { super(fields ?? {}); diff --git a/libs/langchain-community/src/llms/cohere.ts b/libs/langchain-community/src/llms/cohere.ts index 2f9e86d573d3..1fb88505a19f 100644 --- a/libs/langchain-community/src/llms/cohere.ts +++ b/libs/langchain-community/src/llms/cohere.ts @@ -55,7 +55,7 @@ export class Cohere extends LLM implements CohereInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; temperature = 0; diff --git a/libs/langchain-community/src/llms/fireworks.ts b/libs/langchain-community/src/llms/fireworks.ts index ae53c84eb693..4f1cd411ffdc 100644 --- a/libs/langchain-community/src/llms/fireworks.ts +++ b/libs/langchain-community/src/llms/fireworks.ts @@ -46,7 +46,7 @@ export class Fireworks extends OpenAI { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; fireworksApiKey?: string; diff --git a/libs/langchain-community/src/llms/googlepalm.ts b/libs/langchain-community/src/llms/googlepalm.ts index 626a0902b54c..66042a38f67c 100644 --- a/libs/langchain-community/src/llms/googlepalm.ts +++ b/libs/langchain-community/src/llms/googlepalm.ts @@ -86,7 +86,7 @@ export interface GooglePaLMTextInput extends BaseLLMParams { export class GooglePaLM extends LLM implements GooglePaLMTextInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; get lc_secrets(): { [key: string]: string } | undefined { return { diff --git a/libs/langchain-community/src/llms/googlevertexai/common.ts b/libs/langchain-community/src/llms/googlevertexai/common.ts index 90660fb2dba3..99f6301fdeb0 100644 --- a/libs/langchain-community/src/llms/googlevertexai/common.ts +++ b/libs/langchain-community/src/llms/googlevertexai/common.ts @@ -61,7 +61,7 @@ export class BaseGoogleVertexAI { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; model = "text-bison"; diff --git a/libs/langchain-community/src/llms/gradient_ai.ts b/libs/langchain-community/src/llms/gradient_ai.ts index 028df1ff87ae..6be6fd1e92c3 100644 --- a/libs/langchain-community/src/llms/gradient_ai.ts +++ b/libs/langchain-community/src/llms/gradient_ai.ts @@ -63,7 +63,7 @@ export class GradientLLM extends LLM { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; // Gradient AI does not export the BaseModel type. Once it does, we can use it here. // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/libs/langchain-community/src/llms/hf.ts b/libs/langchain-community/src/llms/hf.ts index d2833193065b..0acb17179af3 100644 --- a/libs/langchain-community/src/llms/hf.ts +++ b/libs/langchain-community/src/llms/hf.ts @@ -58,7 +58,7 @@ export interface HFInput { export class HuggingFaceInference extends LLM implements HFInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; get lc_secrets(): { [key: string]: string } | undefined { return { diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts index 74c3397a408d..3ce3a543104d 100644 --- a/libs/langchain-community/src/llms/llama_cpp.ts +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -36,7 +36,7 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions { export class LlamaCpp extends LLM { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; declare CallOptions: LlamaCppCallOptions; diff --git a/libs/langchain-community/src/llms/ollama.ts b/libs/langchain-community/src/llms/ollama.ts index 9b769e18b35c..9df3655f34d0 100644 --- a/libs/langchain-community/src/llms/ollama.ts +++ b/libs/langchain-community/src/llms/ollama.ts @@ -39,7 +39,7 @@ export class Ollama extends LLM implements OllamaInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; model = "llama2"; diff --git a/libs/langchain-community/src/llms/portkey.ts b/libs/langchain-community/src/llms/portkey.ts index f2158c1eb720..cd3d347eb892 100644 --- a/libs/langchain-community/src/llms/portkey.ts +++ b/libs/langchain-community/src/llms/portkey.ts @@ -97,7 +97,7 @@ export class Portkey extends BaseLLM { session: PortkeySession; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; constructor(init?: Partial) { super(init ?? {}); diff --git a/libs/langchain-community/src/llms/raycast.ts b/libs/langchain-community/src/llms/raycast.ts index f2bcee6b85d5..d66b762d15b1 100644 --- a/libs/langchain-community/src/llms/raycast.ts +++ b/libs/langchain-community/src/llms/raycast.ts @@ -21,7 +21,7 @@ const wait = (ms: number) => export class RaycastAI extends LLM implements RaycastAIInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; /** * The model to use for generating text. diff --git a/libs/langchain-community/src/llms/replicate.ts b/libs/langchain-community/src/llms/replicate.ts index 7656993e1b49..e062ac10cddb 100644 --- a/libs/langchain-community/src/llms/replicate.ts +++ b/libs/langchain-community/src/llms/replicate.ts @@ -51,7 +51,7 @@ export class Replicate extends LLM implements ReplicateInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; model: ReplicateInput["model"]; diff --git a/libs/langchain-community/src/llms/sagemaker_endpoint.ts b/libs/langchain-community/src/llms/sagemaker_endpoint.ts index 10aa46484a19..ea3c89a1167f 100644 --- a/libs/langchain-community/src/llms/sagemaker_endpoint.ts +++ b/libs/langchain-community/src/llms/sagemaker_endpoint.ts @@ -112,7 +112,7 @@ export interface SageMakerEndpointInput extends BaseLLMParams { export class SageMakerEndpoint extends LLM { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; static lc_name() { return "SageMakerEndpoint"; diff --git a/libs/langchain-community/src/llms/watsonx_ai.ts b/libs/langchain-community/src/llms/watsonx_ai.ts index bf64a470be3a..add00a4eb39c 100644 --- a/libs/langchain-community/src/llms/watsonx_ai.ts +++ b/libs/langchain-community/src/llms/watsonx_ai.ts @@ -58,7 +58,7 @@ const endpointConstructor = (region: string, version: string) => export class WatsonxAI extends LLM { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; static lc_name() { return "WatsonxAI"; diff --git a/libs/langchain-community/src/llms/writer.ts b/libs/langchain-community/src/llms/writer.ts index aa7a7a63b9c0..5da73f15c7af 100644 --- a/libs/langchain-community/src/llms/writer.ts +++ b/libs/langchain-community/src/llms/writer.ts @@ -69,7 +69,7 @@ export class Writer extends LLM implements WriterInput { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; apiKey: string; diff --git a/libs/langchain-community/src/llms/yandex.ts b/libs/langchain-community/src/llms/yandex.ts index 6482035aa128..0ccd1471d648 100644 --- a/libs/langchain-community/src/llms/yandex.ts +++ b/libs/langchain-community/src/llms/yandex.ts @@ -35,7 +35,7 @@ export interface YandexGPTInputs extends BaseLLMParams { export class YandexGPT extends LLM implements YandexGPTInputs { lc_serializable = true; - lc_namespace = ["langchain-community", "llms", this._llmType()]; + lc_namespace = ["langchain_community", "llms", this._llmType()]; static lc_name() { return "Yandex GPT"; diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index 079aebba8ff5..79405fe1d31b 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -1,67 +1,67 @@ // Auto-generated by `scripts/create-entrypoints.js`. Do not edit manually. export const optionalImportEntrypoints = [ - "@langchain/community/tools/aws_sfn", - "@langchain/community/tools/gmail", - "@langchain/community/embeddings/bedrock", - "@langchain/community/embeddings/cloudflare_workersai", - "@langchain/community/embeddings/cohere", - "@langchain/community/embeddings/googlepalm", - "@langchain/community/embeddings/googlevertexai", - "@langchain/community/embeddings/gradient_ai", - "@langchain/community/embeddings/hf", - "@langchain/community/embeddings/hf_transformers", - "@langchain/community/embeddings/llama_cpp", - "@langchain/community/embeddings/tensorflow", - "@langchain/community/llms/bedrock", - "@langchain/community/llms/bedrock/web", - "@langchain/community/llms/cohere", - "@langchain/community/llms/googlepalm", - "@langchain/community/llms/googlevertexai", - "@langchain/community/llms/googlevertexai/web", - "@langchain/community/llms/gradient_ai", - "@langchain/community/llms/hf", - "@langchain/community/llms/llama_cpp", - "@langchain/community/llms/portkey", - "@langchain/community/llms/raycast", - "@langchain/community/llms/replicate", - "@langchain/community/llms/sagemaker_endpoint", - "@langchain/community/llms/watsonx_ai", - "@langchain/community/llms/writer", - "@langchain/community/vectorstores/analyticdb", - "@langchain/community/vectorstores/cassandra", - "@langchain/community/vectorstores/chroma", - "@langchain/community/vectorstores/clickhouse", - "@langchain/community/vectorstores/cloudflare_vectorize", - "@langchain/community/vectorstores/convex", - "@langchain/community/vectorstores/elasticsearch", - "@langchain/community/vectorstores/lancedb", - "@langchain/community/vectorstores/milvus", - "@langchain/community/vectorstores/myscale", - "@langchain/community/vectorstores/neo4j_vector", - "@langchain/community/vectorstores/opensearch", - "@langchain/community/vectorstores/qdrant", - "@langchain/community/vectorstores/redis", - "@langchain/community/vectorstores/rockset", - "@langchain/community/vectorstores/singlestore", - "@langchain/community/vectorstores/tigris", - "@langchain/community/vectorstores/typeorm", - "@langchain/community/vectorstores/typesense", - "@langchain/community/vectorstores/vercel_postgres", - "@langchain/community/vectorstores/voy", - "@langchain/community/chat_models/bedrock", - "@langchain/community/chat_models/bedrock/web", - "@langchain/community/chat_models/googlevertexai", - "@langchain/community/chat_models/googlevertexai/web", - "@langchain/community/chat_models/googlepalm", - "@langchain/community/chat_models/iflytek_xinghuo", - "@langchain/community/chat_models/iflytek_xinghuo/web", - "@langchain/community/chat_models/llama_cpp", - "@langchain/community/chat_models/portkey", - "@langchain/community/callbacks/handlers/llmonitor", - "@langchain/community/retrievers/amazon_kendra", - "@langchain/community/retrievers/metal", - "@langchain/community/retrievers/supabase", - "@langchain/community/retrievers/zep", - "@langchain/community/graphs/neo4j_graph", + "langchain_community/tools/aws_sfn", + "langchain_community/tools/gmail", + "langchain_community/embeddings/bedrock", + "langchain_community/embeddings/cloudflare_workersai", + "langchain_community/embeddings/cohere", + "langchain_community/embeddings/googlepalm", + "langchain_community/embeddings/googlevertexai", + "langchain_community/embeddings/gradient_ai", + "langchain_community/embeddings/hf", + "langchain_community/embeddings/hf_transformers", + "langchain_community/embeddings/llama_cpp", + "langchain_community/embeddings/tensorflow", + "langchain_community/llms/bedrock", + "langchain_community/llms/bedrock/web", + "langchain_community/llms/cohere", + "langchain_community/llms/googlepalm", + "langchain_community/llms/googlevertexai", + "langchain_community/llms/googlevertexai/web", + "langchain_community/llms/gradient_ai", + "langchain_community/llms/hf", + "langchain_community/llms/llama_cpp", + "langchain_community/llms/portkey", + "langchain_community/llms/raycast", + "langchain_community/llms/replicate", + "langchain_community/llms/sagemaker_endpoint", + "langchain_community/llms/watsonx_ai", + "langchain_community/llms/writer", + "langchain_community/vectorstores/analyticdb", + "langchain_community/vectorstores/cassandra", + "langchain_community/vectorstores/chroma", + "langchain_community/vectorstores/clickhouse", + "langchain_community/vectorstores/cloudflare_vectorize", + "langchain_community/vectorstores/convex", + "langchain_community/vectorstores/elasticsearch", + "langchain_community/vectorstores/lancedb", + "langchain_community/vectorstores/milvus", + "langchain_community/vectorstores/myscale", + "langchain_community/vectorstores/neo4j_vector", + "langchain_community/vectorstores/opensearch", + "langchain_community/vectorstores/qdrant", + "langchain_community/vectorstores/redis", + "langchain_community/vectorstores/rockset", + "langchain_community/vectorstores/singlestore", + "langchain_community/vectorstores/tigris", + "langchain_community/vectorstores/typeorm", + "langchain_community/vectorstores/typesense", + "langchain_community/vectorstores/vercel_postgres", + "langchain_community/vectorstores/voy", + "langchain_community/chat_models/bedrock", + "langchain_community/chat_models/bedrock/web", + "langchain_community/chat_models/googlevertexai", + "langchain_community/chat_models/googlevertexai/web", + "langchain_community/chat_models/googlepalm", + "langchain_community/chat_models/iflytek_xinghuo", + "langchain_community/chat_models/iflytek_xinghuo/web", + "langchain_community/chat_models/llama_cpp", + "langchain_community/chat_models/portkey", + "langchain_community/callbacks/handlers/llmonitor", + "langchain_community/retrievers/amazon_kendra", + "langchain_community/retrievers/metal", + "langchain_community/retrievers/supabase", + "langchain_community/retrievers/zep", + "langchain_community/graphs/neo4j_graph", ]; diff --git a/libs/langchain-community/src/load/index.ts b/libs/langchain-community/src/load/index.ts index 0f4fe863a1e7..eaf66e52d699 100644 --- a/libs/langchain-community/src/load/index.ts +++ b/libs/langchain-community/src/load/index.ts @@ -1,18 +1,3 @@ -import { load as coreLoad } from "@langchain/core/load"; -import { optionalImportEntrypoints } from "./import_constants.js"; -import * as importMap from "./import_map.js"; -import { OptionalImportMap } from "./import_type.js"; - -export async function load( - text: string, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - secretsMap: Record = {}, - optionalImportsMap: OptionalImportMap = {} -): Promise { - return coreLoad(text, { - secretsMap, - optionalImportsMap, - optionalImportEntrypoints, - importMap, - }); -} +export { type OptionalImportMap, type SecretMap } from "./import_type.js"; +export * as importMap from "./import_map.js"; +export { optionalImportEntrypoints } from "./import_constants.js"; diff --git a/libs/langchain-community/src/tools/aiplugin.ts b/libs/langchain-community/src/tools/aiplugin.ts index 394d557df7ff..d924bc482418 100644 --- a/libs/langchain-community/src/tools/aiplugin.ts +++ b/libs/langchain-community/src/tools/aiplugin.ts @@ -20,7 +20,7 @@ export class AIPluginTool extends Tool implements AIPluginToolParams { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } private _name: string; diff --git a/libs/langchain-community/src/tools/aws_sfn.ts b/libs/langchain-community/src/tools/aws_sfn.ts index 4af70e7d784b..1b83d1b4a7f5 100644 --- a/libs/langchain-community/src/tools/aws_sfn.ts +++ b/libs/langchain-community/src/tools/aws_sfn.ts @@ -43,7 +43,7 @@ export class StartExecutionAWSSfnTool extends Tool { public description: string; get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } constructor({ diff --git a/libs/langchain-community/src/tools/bingserpapi.ts b/libs/langchain-community/src/tools/bingserpapi.ts index 734994c42fab..23846cbbd62d 100644 --- a/libs/langchain-community/src/tools/bingserpapi.ts +++ b/libs/langchain-community/src/tools/bingserpapi.ts @@ -14,7 +14,7 @@ class BingSerpAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/brave_search.ts b/libs/langchain-community/src/tools/brave_search.ts index ebd42496bf9e..8abd0d0aff94 100644 --- a/libs/langchain-community/src/tools/brave_search.ts +++ b/libs/langchain-community/src/tools/brave_search.ts @@ -21,7 +21,7 @@ export class BraveSearch extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "brave-search"; diff --git a/libs/langchain-community/src/tools/connery.ts b/libs/langchain-community/src/tools/connery.ts index 307da0cfc4ca..90a6f1320a08 100644 --- a/libs/langchain-community/src/tools/connery.ts +++ b/libs/langchain-community/src/tools/connery.ts @@ -69,7 +69,7 @@ type RunActionResult = { */ export class ConneryAction extends Tool { get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name: string; diff --git a/libs/langchain-community/src/tools/dadjokeapi.ts b/libs/langchain-community/src/tools/dadjokeapi.ts index a8c970267449..aa6950b27065 100644 --- a/libs/langchain-community/src/tools/dadjokeapi.ts +++ b/libs/langchain-community/src/tools/dadjokeapi.ts @@ -12,7 +12,7 @@ class DadJokeAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "dadjoke"; diff --git a/libs/langchain-community/src/tools/dataforseo_api_search.ts b/libs/langchain-community/src/tools/dataforseo_api_search.ts index 05d3d7934aa2..bb7f94467547 100644 --- a/libs/langchain-community/src/tools/dataforseo_api_search.ts +++ b/libs/langchain-community/src/tools/dataforseo_api_search.ts @@ -102,7 +102,7 @@ export class DataForSeoAPISearch extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "dataforseo-api-wrapper"; diff --git a/libs/langchain-community/src/tools/gmail/base.ts b/libs/langchain-community/src/tools/gmail/base.ts index 5009768fcbb1..824222fc3828 100644 --- a/libs/langchain-community/src/tools/gmail/base.ts +++ b/libs/langchain-community/src/tools/gmail/base.ts @@ -47,7 +47,7 @@ export abstract class GmailBaseTool extends StructuredTool { description = "A tool to send and view emails through Gmail"; get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } protected gmail: gmail_v1.Gmail; diff --git a/libs/langchain-community/src/tools/google_custom_search.ts b/libs/langchain-community/src/tools/google_custom_search.ts index 4c4ba2ea8cbc..5a7c8cd8f540 100644 --- a/libs/langchain-community/src/tools/google_custom_search.ts +++ b/libs/langchain-community/src/tools/google_custom_search.ts @@ -26,7 +26,7 @@ export class GoogleCustomSearch extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "google-custom-search"; diff --git a/libs/langchain-community/src/tools/google_places.ts b/libs/langchain-community/src/tools/google_places.ts index ecf4aebbbcf3..8e297ef60c47 100644 --- a/libs/langchain-community/src/tools/google_places.ts +++ b/libs/langchain-community/src/tools/google_places.ts @@ -17,7 +17,7 @@ export class GooglePlacesAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } get lc_secrets(): { [key: string]: string } | undefined { diff --git a/libs/langchain-community/src/tools/ifttt.ts b/libs/langchain-community/src/tools/ifttt.ts index 231f3d97b32a..c34f5b566b22 100644 --- a/libs/langchain-community/src/tools/ifttt.ts +++ b/libs/langchain-community/src/tools/ifttt.ts @@ -46,7 +46,7 @@ export class IFTTTWebhook extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } private url: string; diff --git a/libs/langchain-community/src/tools/searchapi.ts b/libs/langchain-community/src/tools/searchapi.ts index b404e686fe7b..6329b892a8e7 100644 --- a/libs/langchain-community/src/tools/searchapi.ts +++ b/libs/langchain-community/src/tools/searchapi.ts @@ -68,7 +68,7 @@ export class SearchApi extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/searxng_search.ts b/libs/langchain-community/src/tools/searxng_search.ts index 6be674fef1ce..1b8932fab86b 100644 --- a/libs/langchain-community/src/tools/searxng_search.ts +++ b/libs/langchain-community/src/tools/searxng_search.ts @@ -117,7 +117,7 @@ export class SearxngSearch extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "searxng-search"; diff --git a/libs/langchain-community/src/tools/serpapi.ts b/libs/langchain-community/src/tools/serpapi.ts index 06ac92d619ba..6f0d29dcd52a 100644 --- a/libs/langchain-community/src/tools/serpapi.ts +++ b/libs/langchain-community/src/tools/serpapi.ts @@ -295,7 +295,7 @@ export class SerpAPI extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } toJSON() { diff --git a/libs/langchain-community/src/tools/serper.ts b/libs/langchain-community/src/tools/serper.ts index 5c9327938ec2..179796665ca5 100644 --- a/libs/langchain-community/src/tools/serper.ts +++ b/libs/langchain-community/src/tools/serper.ts @@ -23,7 +23,7 @@ export class Serper extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } /** diff --git a/libs/langchain-community/src/tools/wikipedia_query_run.ts b/libs/langchain-community/src/tools/wikipedia_query_run.ts index 9106989cf381..127eb6d1ec14 100644 --- a/libs/langchain-community/src/tools/wikipedia_query_run.ts +++ b/libs/langchain-community/src/tools/wikipedia_query_run.ts @@ -70,7 +70,7 @@ export class WikipediaQueryRun extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } name = "wikipedia-api"; diff --git a/libs/langchain-community/src/tools/wolframalpha.ts b/libs/langchain-community/src/tools/wolframalpha.ts index 2e5390a834f0..f8816d6b53f7 100644 --- a/libs/langchain-community/src/tools/wolframalpha.ts +++ b/libs/langchain-community/src/tools/wolframalpha.ts @@ -23,7 +23,7 @@ export class WolframAlphaTool extends Tool { } get lc_namespace() { - return ["langchain-community", "tools"]; + return ["langchain_community", "tools"]; } static lc_name() { diff --git a/yarn.lock b/yarn.lock index c47a6b953e56..d4b4909be14f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7997,7 +7997,7 @@ __metadata: languageName: unknown linkType: soft -"@langchain/community@workspace:*, @langchain/community@workspace:libs/langchain-community": +"@langchain/community@workspace:libs/langchain-community, @langchain/community@~0.0.0": version: 0.0.0-use.local resolution: "@langchain/community@workspace:libs/langchain-community" dependencies: @@ -23108,7 +23108,7 @@ __metadata: "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 "@jest/globals": ^29.5.0 - "@langchain/community": "workspace:*" + "@langchain/community": ~0.0.0 "@langchain/core": ~0.0.10 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 From 0cf863b88f3e64dacfe7fcb50be517fe826f136b Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 12:57:22 -0800 Subject: [PATCH 13/22] Move more modules --- langchain-core/src/documents/transformers.ts | 17 ++ langchain-core/src/messages/index.ts | 26 ++ .../src/document_transformers/html_to_text.ts | 44 +--- .../mozilla_readability.ts | 53 +--- langchain/src/load/import_type.d.ts | 11 - langchain/src/schema/document.ts | 22 +- langchain/src/storage/convex.ts | 225 +--------------- langchain/src/storage/ioredis.ts | 160 +----------- langchain/src/storage/upstash_redis.ts | 177 +------------ langchain/src/storage/vercel_kv.ts | 151 +---------- langchain/src/stores/message/cassandra.ts | 155 +---------- langchain/src/stores/message/cloudflare_d1.ts | 198 +------------- langchain/src/stores/message/convex.ts | 210 +-------------- langchain/src/stores/message/dynamodb.ts | 199 +------------- langchain/src/stores/message/firestore.ts | 196 +------------- langchain/src/stores/message/ioredis.ts | 103 +------- langchain/src/stores/message/momento.ts | 199 +------------- langchain/src/stores/message/mongodb.ts | 60 +---- langchain/src/stores/message/planetscale.ts | 211 +-------------- langchain/src/stores/message/redis.ts | 130 +--------- langchain/src/stores/message/upstash_redis.ts | 96 +------ langchain/src/stores/message/utils.ts | 32 +-- langchain/src/stores/message/xata.ts | 244 +----------------- libs/langchain-community/.gitignore | 54 ++++ libs/langchain-community/package.json | 146 ++++++++++- .../scripts/check-tree-shaking.js | 2 + .../scripts/create-entrypoints.js | 43 +++ .../src/document_transformers/html_to_text.ts | 43 +++ .../mozilla_readability.ts | 52 ++++ .../src/load/import_constants.ts | 18 ++ .../src/load/import_type.d.ts | 64 +++++ .../langchain-community/src/storage/convex.ts | 224 ++++++++++++++++ .../src/storage/ioredis.ts | 159 ++++++++++++ .../src/storage/upstash_redis.ts | 176 +++++++++++++ .../src/storage/vercel_kv.ts | 150 +++++++++++ .../src/stores/message/cassandra.ts | 150 +++++++++++ .../src/stores/message/cloudflare_d1.ts | 190 ++++++++++++++ .../src/stores/message/convex.ts | 209 +++++++++++++++ .../src/stores/message/dynamodb.ts | 194 ++++++++++++++ .../src/stores/message/firestore.ts | 191 ++++++++++++++ .../src/stores/message/ioredis.ts | 102 ++++++++ .../src/stores/message/momento.ts | 194 ++++++++++++++ .../src/stores/message/mongodb.ts | 59 +++++ .../src/stores/message/planetscale.ts | 205 +++++++++++++++ .../src/stores/message/redis.ts | 129 +++++++++ .../src/stores/message/upstash_redis.ts | 91 +++++++ .../src/stores/message/xata.ts | 238 +++++++++++++++++ 47 files changed, 3145 insertions(+), 2857 deletions(-) create mode 100644 libs/langchain-community/src/document_transformers/html_to_text.ts create mode 100644 libs/langchain-community/src/document_transformers/mozilla_readability.ts create mode 100644 libs/langchain-community/src/storage/convex.ts create mode 100644 libs/langchain-community/src/storage/ioredis.ts create mode 100644 libs/langchain-community/src/storage/upstash_redis.ts create mode 100644 libs/langchain-community/src/storage/vercel_kv.ts create mode 100644 libs/langchain-community/src/stores/message/cassandra.ts create mode 100644 libs/langchain-community/src/stores/message/cloudflare_d1.ts create mode 100644 libs/langchain-community/src/stores/message/convex.ts create mode 100644 libs/langchain-community/src/stores/message/dynamodb.ts create mode 100644 libs/langchain-community/src/stores/message/firestore.ts create mode 100644 libs/langchain-community/src/stores/message/ioredis.ts create mode 100644 libs/langchain-community/src/stores/message/momento.ts create mode 100644 libs/langchain-community/src/stores/message/mongodb.ts create mode 100644 libs/langchain-community/src/stores/message/planetscale.ts create mode 100644 libs/langchain-community/src/stores/message/redis.ts create mode 100644 libs/langchain-community/src/stores/message/upstash_redis.ts create mode 100644 libs/langchain-community/src/stores/message/xata.ts diff --git a/langchain-core/src/documents/transformers.ts b/langchain-core/src/documents/transformers.ts index b42ccab2dc16..45b5a6a06607 100644 --- a/langchain-core/src/documents/transformers.ts +++ b/langchain-core/src/documents/transformers.ts @@ -36,3 +36,20 @@ export abstract class BaseDocumentTransformer< return this.transformDocuments(input); } } + +/** + * Class for document transformers that return exactly one transformed document + * for each input document. + */ +export abstract class MappingDocumentTransformer extends BaseDocumentTransformer { + async transformDocuments(documents: Document[]): Promise { + const newDocuments = []; + for (const document of documents) { + const transformedDocument = await this._transformDocument(document); + newDocuments.push(transformedDocument); + } + return newDocuments; + } + + abstract _transformDocument(document: Document): Promise; +} diff --git a/langchain-core/src/messages/index.ts b/langchain-core/src/messages/index.ts index 9ddc2fd64e06..2019e67e7620 100644 --- a/langchain-core/src/messages/index.ts +++ b/langchain-core/src/messages/index.ts @@ -734,3 +734,29 @@ export function mapStoredMessageToChatMessage(message: StoredMessage) { throw new Error(`Got unexpected type: ${storedMessage.type}`); } } + +/** + * Transforms an array of `StoredMessage` instances into an array of + * `BaseMessage` instances. It uses the `mapV1MessageToStoredMessage` + * function to ensure all messages are in the `StoredMessage` format, then + * creates new instances of the appropriate `BaseMessage` subclass based + * on the type of each message. This function is used to prepare stored + * messages for use in a chat context. + */ +export function mapStoredMessagesToChatMessages( + messages: StoredMessage[] +): BaseMessage[] { + return messages.map(mapStoredMessageToChatMessage); +} + +/** + * Transforms an array of `BaseMessage` instances into an array of + * `StoredMessage` instances. It does this by calling the `toDict` method + * on each `BaseMessage`, which returns a `StoredMessage`. This function + * is used to prepare chat messages for storage. + */ +export function mapChatMessagesToStoredMessages( + messages: BaseMessage[] +): StoredMessage[] { + return messages.map((message) => message.toDict()); +} diff --git a/langchain/src/document_transformers/html_to_text.ts b/langchain/src/document_transformers/html_to_text.ts index 987d1c6feed3..df29570f9a90 100644 --- a/langchain/src/document_transformers/html_to_text.ts +++ b/langchain/src/document_transformers/html_to_text.ts @@ -1,43 +1 @@ -import { htmlToText } from "html-to-text"; -import type { HtmlToTextOptions } from "html-to-text"; -import { Document } from "../document.js"; -import { MappingDocumentTransformer } from "../schema/document.js"; - -/** - * A transformer that converts HTML content to plain text. - * @example - * ```typescript - * const loader = new CheerioWebBaseLoader("https://example.com/some-page"); - * const docs = await loader.load(); - * - * const splitter = new RecursiveCharacterTextSplitter({ - * maxCharacterCount: 1000, - * }); - * const transformer = new HtmlToTextTransformer(); - * - * // The sequence of text splitting followed by HTML to text transformation - * const sequence = splitter.pipe(transformer); - * - * // Processing the loaded documents through the sequence - * const newDocuments = await sequence.invoke(docs); - * - * console.log(newDocuments); - * ``` - */ -export class HtmlToTextTransformer extends MappingDocumentTransformer { - static lc_name() { - return "HtmlToTextTransformer"; - } - - constructor(protected options: HtmlToTextOptions = {}) { - super(options); - } - - async _transformDocument(document: Document): Promise { - const extractedContent = htmlToText(document.pageContent, this.options); - return new Document({ - pageContent: extractedContent, - metadata: { ...document.metadata }, - }); - } -} +export * from "@langchain/community/document_transformers/html_to_text"; \ No newline at end of file diff --git a/langchain/src/document_transformers/mozilla_readability.ts b/langchain/src/document_transformers/mozilla_readability.ts index 1eb302bbbc38..4b64753acb38 100644 --- a/langchain/src/document_transformers/mozilla_readability.ts +++ b/langchain/src/document_transformers/mozilla_readability.ts @@ -1,52 +1 @@ -import { Readability } from "@mozilla/readability"; -import { JSDOM } from "jsdom"; -import { Options } from "mozilla-readability"; -import { Document } from "../document.js"; -import { MappingDocumentTransformer } from "../schema/document.js"; - -/** - * A transformer that uses the Mozilla Readability library to extract the - * main content from a web page. - * @example - * ```typescript - * const loader = new CheerioWebBaseLoader("https://example.com/article"); - * const docs = await loader.load(); - * - * const splitter = new RecursiveCharacterTextSplitter({ - * maxCharacterCount: 5000, - * }); - * const transformer = new MozillaReadabilityTransformer(); - * - * // The sequence processes the loaded documents through the splitter and then the transformer. - * const sequence = splitter.pipe(transformer); - * - * // Invoke the sequence to transform the documents into a more readable format. - * const newDocuments = await sequence.invoke(docs); - * - * console.log(newDocuments); - * ``` - */ -export class MozillaReadabilityTransformer extends MappingDocumentTransformer { - static lc_name() { - return "MozillaReadabilityTransformer"; - } - - constructor(protected options: Options = {}) { - super(options); - } - - async _transformDocument(document: Document): Promise { - const doc = new JSDOM(document.pageContent); - - const readability = new Readability(doc.window.document, this.options); - - const result = readability.parse(); - - return new Document({ - pageContent: result?.textContent ?? "", - metadata: { - ...document.metadata, - }, - }); - } -} +export * from "@langchain/community/document_transformers/mozilla_readability"; \ No newline at end of file diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index 68515bbf4995..0ca37eab8c29 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -522,28 +522,17 @@ export interface OptionalImportMap { export interface SecretMap { ANTHROPIC_API_KEY?: string; AWS_ACCESS_KEY_ID?: string; - AWS_SECRETE_ACCESS_KEY?: string; AWS_SECRET_ACCESS_KEY?: string; - AWS_SESSION_TOKEN?: string; AZURE_OPENAI_API_KEY?: string; MILVUS_PASSWORD?: string; MILVUS_SSL?: string; MILVUS_USERNAME?: string; OPENAI_API_KEY?: string; OPENAI_ORGANIZATION?: string; - PLANETSCALE_DATABASE_URL?: string; - PLANETSCALE_HOST?: string; - PLANETSCALE_PASSWORD?: string; - PLANETSCALE_USERNAME?: string; PROMPTLAYER_API_KEY?: string; QDRANT_API_KEY?: string; QDRANT_URL?: string; - REDIS_PASSWORD?: string; - REDIS_URL?: string; - REDIS_USERNAME?: string; REMOTE_RETRIEVER_AUTH_BEARER?: string; - UPSTASH_REDIS_REST_TOKEN?: string; - UPSTASH_REDIS_REST_URL?: string; VECTARA_API_KEY?: string; VECTARA_CORPUS_ID?: string; VECTARA_CUSTOMER_ID?: string; diff --git a/langchain/src/schema/document.ts b/langchain/src/schema/document.ts index 9e50f747c03f..5e3f6459952e 100644 --- a/langchain/src/schema/document.ts +++ b/langchain/src/schema/document.ts @@ -1,21 +1 @@ -import { BaseDocumentTransformer } from "@langchain/core/documents"; -import { Document } from "../document.js"; - -export { BaseDocumentTransformer }; - -/** - * Class for document transformers that return exactly one transformed document - * for each input document. - */ -export abstract class MappingDocumentTransformer extends BaseDocumentTransformer { - async transformDocuments(documents: Document[]): Promise { - const newDocuments = []; - for (const document of documents) { - const transformedDocument = await this._transformDocument(document); - newDocuments.push(transformedDocument); - } - return newDocuments; - } - - abstract _transformDocument(document: Document): Promise; -} +export { BaseDocumentTransformer, MappingDocumentTransformer } from "@langchain/core/documents"; diff --git a/langchain/src/storage/convex.ts b/langchain/src/storage/convex.ts index e32b1647d151..ad8d8c311a06 100644 --- a/langchain/src/storage/convex.ts +++ b/langchain/src/storage/convex.ts @@ -1,224 +1 @@ -// eslint-disable-next-line import/no-extraneous-dependencies -import { - FieldPaths, - FunctionReference, - GenericActionCtx, - GenericDataModel, - NamedTableInfo, - TableNamesInDataModel, - VectorIndexNames, - makeFunctionReference, -} from "convex/server"; -// eslint-disable-next-line import/no-extraneous-dependencies -import { Value } from "convex/values"; -import { BaseStore } from "../schema/storage.js"; - -/** - * Type that defines the config required to initialize the - * ConvexKVStore class. It includes the table name, - * index name, field name. - */ -export type ConvexKVStoreConfig< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - KeyFieldName extends FieldPaths>, - ValueFieldName extends FieldPaths>, - UpsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - LookupQuery extends FunctionReference< - "query", - "internal", - { table: string; index: string; keyField: string; key: string }, - object[] - >, - DeleteManyMutation extends FunctionReference< - "mutation", - "internal", - { table: string; index: string; keyField: string; key: string } - > -> = { - readonly ctx: GenericActionCtx; - /** - * Defaults to "cache" - */ - readonly table?: TableName; - /** - * Defaults to "byKey" - */ - readonly index?: IndexName; - /** - * Defaults to "key" - */ - readonly keyField?: KeyFieldName; - /** - * Defaults to "value" - */ - readonly valueField?: ValueFieldName; - /** - * Defaults to `internal.langchain.db.upsert` - */ - readonly upsert?: UpsertMutation; - /** - * Defaults to `internal.langchain.db.lookup` - */ - readonly lookup?: LookupQuery; - /** - * Defaults to `internal.langchain.db.deleteMany` - */ - readonly deleteMany?: DeleteManyMutation; -}; - -/** - * Class that extends the BaseStore class to interact with a Convex - * database. It provides methods for getting, setting, and deleting key value pairs, - * as well as yielding keys from the database. - */ -export class ConvexKVStore< - T extends Value, - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - KeyFieldName extends FieldPaths>, - ValueFieldName extends FieldPaths>, - UpsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - LookupQuery extends FunctionReference< - "query", - "internal", - { table: string; index: string; keyField: string; key: string }, - object[] - >, - DeleteManyMutation extends FunctionReference< - "mutation", - "internal", - { table: string; index: string; keyField: string; key: string } - > -> extends BaseStore { - lc_namespace = ["langchain", "storage", "convex"]; - - private readonly ctx: GenericActionCtx; - - private readonly table: TableName; - - private readonly index: IndexName; - - private readonly keyField: KeyFieldName; - - private readonly valueField: ValueFieldName; - - private readonly upsert: UpsertMutation; - - private readonly lookup: LookupQuery; - - private readonly deleteMany: DeleteManyMutation; - - constructor( - config: ConvexKVStoreConfig< - DataModel, - TableName, - IndexName, - KeyFieldName, - ValueFieldName, - UpsertMutation, - LookupQuery, - DeleteManyMutation - > - ) { - super(config); - this.ctx = config.ctx; - this.table = config.table ?? ("cache" as TableName); - this.index = config.index ?? ("byKey" as IndexName); - this.keyField = config.keyField ?? ("key" as KeyFieldName); - this.valueField = config.valueField ?? ("value" as ValueFieldName); - this.upsert = - // eslint-disable-next-line @typescript-eslint/no-explicit-any - config.upsert ?? (makeFunctionReference("langchain/db:upsert") as any); - this.lookup = - // eslint-disable-next-line @typescript-eslint/no-explicit-any - config.lookup ?? (makeFunctionReference("langchain/db:lookup") as any); - this.deleteMany = - config.deleteMany ?? - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (makeFunctionReference("langchain/db:deleteMany") as any); - } - - /** - * Gets multiple keys from the Convex database. - * @param keys Array of keys to be retrieved. - * @returns An array of retrieved values. - */ - async mget(keys: string[]) { - return (await Promise.all( - keys.map(async (key) => { - const found = (await this.ctx.runQuery(this.lookup, { - table: this.table, - index: this.index, - keyField: this.keyField, - key, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any)) as any; - return found.length > 0 ? found[0][this.valueField] : undefined; - }) - )) as (T | undefined)[]; - } - - /** - * Sets multiple keys in the Convex database. - * @param keyValuePairs Array of key-value pairs to be set. - * @returns Promise that resolves when all keys have been set. - */ - async mset(keyValuePairs: [string, T][]): Promise { - // TODO: Remove chunking when Convex handles the concurrent requests correctly - const PAGE_SIZE = 16; - for (let i = 0; i < keyValuePairs.length; i += PAGE_SIZE) { - await Promise.all( - keyValuePairs.slice(i, i + PAGE_SIZE).map(([key, value]) => - this.ctx.runMutation(this.upsert, { - table: this.table, - index: this.index, - keyField: this.keyField, - key, - document: { [this.keyField]: key, [this.valueField]: value }, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any) - ) - ); - } - } - - /** - * Deletes multiple keys from the Convex database. - * @param keys Array of keys to be deleted. - * @returns Promise that resolves when all keys have been deleted. - */ - async mdelete(keys: string[]): Promise { - await Promise.all( - keys.map((key) => - this.ctx.runMutation(this.deleteMany, { - table: this.table, - index: this.index, - keyField: this.keyField, - key, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any) - ) - ); - } - - /** - * Yields keys from the Convex database. - * @param prefix Optional prefix to filter the keys. - * @returns An AsyncGenerator that yields keys from the Convex database. - */ - // eslint-disable-next-line require-yield - async *yieldKeys(_prefix?: string): AsyncGenerator { - throw new Error("yieldKeys not implemented yet for ConvexKVStore"); - } -} +export * from "@langchain/community/storage/convex"; \ No newline at end of file diff --git a/langchain/src/storage/ioredis.ts b/langchain/src/storage/ioredis.ts index d5864f59b6ea..c022f1520e97 100644 --- a/langchain/src/storage/ioredis.ts +++ b/langchain/src/storage/ioredis.ts @@ -1,159 +1 @@ -import type { Redis } from "ioredis"; - -import { BaseStore } from "../schema/storage.js"; - -/** - * Class that extends the BaseStore class to interact with a Redis - * database. It provides methods for getting, setting, and deleting data, - * as well as yielding keys from the database. - * @example - * ```typescript - * const store = new RedisByteStore({ client: new Redis({}) }); - * await store.mset([ - * [ - * "message:id:0", - * new TextEncoder().encode(JSON.stringify(new AIMessage("ai stuff..."))), - * ], - * [ - * "message:id:1", - * new TextEncoder().encode( - * JSON.stringify(new HumanMessage("human stuff...")), - * ), - * ], - * ]); - * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); - * console.log(retrievedMessages.map((v) => new TextDecoder().decode(v))); - * const yieldedKeys = []; - * for await (const key of store.yieldKeys("message:id:")) { - * yieldedKeys.push(key); - * } - * console.log(yieldedKeys); - * await store.mdelete(yieldedKeys); - * ``` - */ -export class RedisByteStore extends BaseStore { - lc_namespace = ["langchain", "storage"]; - - protected client: Redis; - - protected ttl?: number; - - protected namespace?: string; - - protected yieldKeysScanBatchSize = 1000; - - constructor(fields: { - client: Redis; - ttl?: number; - namespace?: string; - yieldKeysScanBatchSize?: number; - }) { - super(fields); - this.client = fields.client; - this.ttl = fields.ttl; - this.namespace = fields.namespace; - this.yieldKeysScanBatchSize = - fields.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; - } - - _getPrefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return `${this.namespace}${delimiter}${key}`; - } - return key; - } - - _getDeprefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return key.slice(this.namespace.length + delimiter.length); - } - return key; - } - - /** - * Gets multiple keys from the Redis database. - * @param keys Array of keys to be retrieved. - * @returns An array of retrieved values. - */ - async mget(keys: string[]) { - const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); - const retrievedValues = await this.client.mgetBuffer(prefixedKeys); - return retrievedValues.map((value) => { - if (!value) { - return undefined; - } else { - return value; - } - }); - } - - /** - * Sets multiple keys in the Redis database. - * @param keyValuePairs Array of key-value pairs to be set. - * @returns Promise that resolves when all keys have been set. - */ - async mset(keyValuePairs: [string, Uint8Array][]): Promise { - const decoder = new TextDecoder(); - const encodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ - this._getPrefixedKey(key), - decoder.decode(value), - ]); - const pipeline = this.client.pipeline(); - for (const [key, value] of encodedKeyValuePairs) { - if (this.ttl) { - pipeline.set(key, value, "EX", this.ttl); - } else { - pipeline.set(key, value); - } - } - await pipeline.exec(); - } - - /** - * Deletes multiple keys from the Redis database. - * @param keys Array of keys to be deleted. - * @returns Promise that resolves when all keys have been deleted. - */ - async mdelete(keys: string[]): Promise { - await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); - } - - /** - * Yields keys from the Redis database. - * @param prefix Optional prefix to filter the keys. - * @returns An AsyncGenerator that yields keys from the Redis database. - */ - async *yieldKeys(prefix?: string): AsyncGenerator { - let pattern; - if (prefix) { - const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; - pattern = this._getPrefixedKey(wildcardPrefix); - } else { - pattern = this._getPrefixedKey("*"); - } - let [cursor, batch] = await this.client.scan( - 0, - "MATCH", - pattern, - "COUNT", - this.yieldKeysScanBatchSize - ); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - while (cursor !== "0") { - [cursor, batch] = await this.client.scan( - cursor, - "MATCH", - pattern, - "COUNT", - this.yieldKeysScanBatchSize - ); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - } - } -} +export * from "@langchain/community/storage/ioredis"; \ No newline at end of file diff --git a/langchain/src/storage/upstash_redis.ts b/langchain/src/storage/upstash_redis.ts index 313444399661..e790c64033db 100644 --- a/langchain/src/storage/upstash_redis.ts +++ b/langchain/src/storage/upstash_redis.ts @@ -1,176 +1 @@ -import { Redis as UpstashRedis, type RedisConfigNodejs } from "@upstash/redis"; - -import { BaseStore } from "../schema/storage.js"; - -/** - * Type definition for the input parameters required to initialize an - * instance of the UpstashStoreInput class. - */ -export interface UpstashRedisStoreInput { - sessionTTL?: number; - config?: RedisConfigNodejs; - client?: UpstashRedis; - /** - * The amount of keys to retrieve per batch when yielding keys. - * @default 1000 - */ - yieldKeysScanBatchSize?: number; - /** - * The namespace to use for the keys in the database. - */ - namespace?: string; -} - -/** - * Class that extends the BaseStore class to interact with an Upstash Redis - * database. It provides methods for getting, setting, and deleting data, - * as well as yielding keys from the database. - * @example - * ```typescript - * const store = new UpstashRedisStore({ - * client: new Redis({ - * url: "your-upstash-redis-url", - * token: "your-upstash-redis-token", - * }), - * }); - * await store.mset([ - * ["message:id:0", "encoded-ai-message"], - * ["message:id:1", "encoded-human-message"], - * ]); - * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); - * const yieldedKeys = []; - * for await (const key of store.yieldKeys("message:id")) { - * yieldedKeys.push(key); - * } - * await store.mdelete(yieldedKeys); - * ``` - */ -export class UpstashRedisStore extends BaseStore { - lc_namespace = ["langchain", "storage"]; - - protected client: UpstashRedis; - - protected namespace?: string; - - protected yieldKeysScanBatchSize = 1000; - - private sessionTTL?: number; - - constructor(fields: UpstashRedisStoreInput) { - super(fields); - if (fields.client) { - this.client = fields.client; - } else if (fields.config) { - this.client = new UpstashRedis(fields.config); - } else { - throw new Error( - `Upstash Redis store requires either a config object or a pre-configured client.` - ); - } - this.sessionTTL = fields.sessionTTL; - this.yieldKeysScanBatchSize = - fields.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; - this.namespace = fields.namespace; - } - - _getPrefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return `${this.namespace}${delimiter}${key}`; - } - return key; - } - - _getDeprefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return key.slice(this.namespace.length + delimiter.length); - } - return key; - } - - /** - * Gets multiple keys from the Upstash Redis database. - * @param keys Array of keys to be retrieved. - * @returns An array of retrieved values. - */ - async mget(keys: string[]) { - const encoder = new TextEncoder(); - - const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); - const retrievedValues = await this.client.mget( - ...prefixedKeys - ); - return retrievedValues.map((value) => { - if (!value) { - return undefined; - } else if (typeof value === "object") { - return encoder.encode(JSON.stringify(value)); - } else { - return encoder.encode(value); - } - }); - } - - /** - * Sets multiple keys in the Upstash Redis database. - * @param keyValuePairs Array of key-value pairs to be set. - * @returns Promise that resolves when all keys have been set. - */ - async mset(keyValuePairs: [string, Uint8Array][]): Promise { - const decoder = new TextDecoder(); - const encodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ - this._getPrefixedKey(key), - decoder.decode(value), - ]); - const pipeline = this.client.pipeline(); - for (const [key, value] of encodedKeyValuePairs) { - if (this.sessionTTL) { - pipeline.setex(key, this.sessionTTL, value); - } else { - pipeline.set(key, value); - } - } - await pipeline.exec(); - } - - /** - * Deletes multiple keys from the Upstash Redis database. - * @param keys Array of keys to be deleted. - * @returns Promise that resolves when all keys have been deleted. - */ - async mdelete(keys: string[]): Promise { - await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); - } - - /** - * Yields keys from the Upstash Redis database. - * @param prefix Optional prefix to filter the keys. A wildcard (*) is always appended to the end. - * @returns An AsyncGenerator that yields keys from the Upstash Redis database. - */ - async *yieldKeys(prefix?: string): AsyncGenerator { - let pattern; - if (prefix) { - const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; - pattern = `${this._getPrefixedKey(wildcardPrefix)}*`; - } else { - pattern = this._getPrefixedKey("*"); - } - let [cursor, batch] = await this.client.scan(0, { - match: pattern, - count: this.yieldKeysScanBatchSize, - }); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - while (cursor !== 0) { - [cursor, batch] = await this.client.scan(cursor, { - match: pattern, - count: this.yieldKeysScanBatchSize, - }); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - } - } -} +export * from "@langchain/community/storage/upstash_redis"; \ No newline at end of file diff --git a/langchain/src/storage/vercel_kv.ts b/langchain/src/storage/vercel_kv.ts index 9b9646377713..a31d97725508 100644 --- a/langchain/src/storage/vercel_kv.ts +++ b/langchain/src/storage/vercel_kv.ts @@ -1,150 +1 @@ -import { kv, type VercelKV } from "@vercel/kv"; - -import { BaseStore } from "../schema/storage.js"; - -/** - * Class that extends the BaseStore class to interact with a Vercel KV - * database. It provides methods for getting, setting, and deleting data, - * as well as yielding keys from the database. - * @example - * ```typescript - * const store = new VercelKVStore({ - * client: getClient(), - * }); - * await store.mset([ - * { key: "message:id:0", value: "encoded message 0" }, - * { key: "message:id:1", value: "encoded message 1" }, - * ]); - * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); - * const yieldedKeys = []; - * for await (const key of store.yieldKeys("message:id:")) { - * yieldedKeys.push(key); - * } - * await store.mdelete(yieldedKeys); - * ``` - */ -export class VercelKVStore extends BaseStore { - lc_namespace = ["langchain", "storage"]; - - protected client: VercelKV; - - protected ttl?: number; - - protected namespace?: string; - - protected yieldKeysScanBatchSize = 1000; - - constructor(fields?: { - client?: VercelKV; - ttl?: number; - namespace?: string; - yieldKeysScanBatchSize?: number; - }) { - super(fields); - this.client = fields?.client ?? kv; - this.ttl = fields?.ttl; - this.namespace = fields?.namespace; - this.yieldKeysScanBatchSize = - fields?.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; - } - - _getPrefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return `${this.namespace}${delimiter}${key}`; - } - return key; - } - - _getDeprefixedKey(key: string) { - if (this.namespace) { - const delimiter = "/"; - return key.slice(this.namespace.length + delimiter.length); - } - return key; - } - - /** - * Gets multiple keys from the Redis database. - * @param keys Array of keys to be retrieved. - * @returns An array of retrieved values. - */ - async mget(keys: string[]) { - const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); - const retrievedValues = await this.client.mget<(string | undefined)[]>( - ...prefixedKeys - ); - const encoder = new TextEncoder(); - return retrievedValues.map((value) => { - if (value === undefined || value === null) { - return undefined; - } else if (typeof value === "object") { - return encoder.encode(JSON.stringify(value)); - } else { - return encoder.encode(value); - } - }); - } - - /** - * Sets multiple keys in the Redis database. - * @param keyValuePairs Array of key-value pairs to be set. - * @returns Promise that resolves when all keys have been set. - */ - async mset(keyValuePairs: [string, Uint8Array][]): Promise { - const decoder = new TextDecoder(); - const decodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ - this._getPrefixedKey(key), - decoder.decode(value), - ]); - const pipeline = this.client.pipeline(); - for (const [key, value] of decodedKeyValuePairs) { - if (this.ttl) { - pipeline.setex(key, this.ttl, value); - } else { - pipeline.set(key, value); - } - } - await pipeline.exec(); - } - - /** - * Deletes multiple keys from the Redis database. - * @param keys Array of keys to be deleted. - * @returns Promise that resolves when all keys have been deleted. - */ - async mdelete(keys: string[]): Promise { - await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); - } - - /** - * Yields keys from the Redis database. - * @param prefix Optional prefix to filter the keys. - * @returns An AsyncGenerator that yields keys from the Redis database. - */ - async *yieldKeys(prefix?: string): AsyncGenerator { - let pattern; - if (prefix) { - const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; - pattern = this._getPrefixedKey(wildcardPrefix); - } else { - pattern = this._getPrefixedKey("*"); - } - let [cursor, batch] = await this.client.scan(0, { - match: pattern, - count: this.yieldKeysScanBatchSize, - }); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - while (cursor !== 0) { - [cursor, batch] = await this.client.scan(cursor, { - match: pattern, - count: this.yieldKeysScanBatchSize, - }); - for (const key of batch) { - yield this._getDeprefixedKey(key); - } - } - } -} +export * from "@langchain/community/storage/vercel_kv"; \ No newline at end of file diff --git a/langchain/src/stores/message/cassandra.ts b/langchain/src/stores/message/cassandra.ts index 5e63b2b1f11f..7c675be8e570 100644 --- a/langchain/src/stores/message/cassandra.ts +++ b/langchain/src/stores/message/cassandra.ts @@ -1,154 +1 @@ -import { Client, DseClientOptions } from "cassandra-driver"; -import { - BaseMessage, - BaseListChatMessageHistory, - StoredMessage, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -export interface CassandraChatMessageHistoryOptions extends DseClientOptions { - keyspace: string; - table: string; - sessionId: string; -} - -/** - * Class for storing chat message history within Cassandra. It extends the - * BaseListChatMessageHistory class and provides methods to get, add, and - * clear messages. - * @example - * ```typescript - * const chatHistory = new CassandraChatMessageHistory({ - * cloud: { - * secureConnectBundle: "", - * }, - * credentials: { - * username: "token", - * password: "", - * }, - * keyspace: "langchain", - * table: "message_history", - * sessionId: "", - * }); - * - * const chain = new ConversationChain({ - * llm: new ChatOpenAI(), - * memory: chatHistory, - * }); - * - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class CassandraChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "cassandra"]; - - private keyspace: string; - - private table: string; - - private client: Client; - - private sessionId: string; - - private tableExists: boolean; - - private options: CassandraChatMessageHistoryOptions; - - private queries: { insert: string; select: string; delete: string }; - - constructor(options: CassandraChatMessageHistoryOptions) { - super(); - this.client = new Client(options); - this.keyspace = options.keyspace; - this.table = options.table; - this.sessionId = options.sessionId; - this.tableExists = false; - this.options = options; - } - - /** - * Method to get all the messages stored in the Cassandra database. - * @returns Array of stored BaseMessage instances. - */ - public async getMessages(): Promise { - await this.ensureTable(); - const resultSet = await this.client.execute( - this.queries.select, - [this.sessionId], - { prepare: true } - ); - const storedMessages: StoredMessage[] = resultSet.rows.map((row) => ({ - type: row.message_type, - data: JSON.parse(row.data), - })); - - const baseMessages = mapStoredMessagesToChatMessages(storedMessages); - return baseMessages; - } - - /** - * Method to add a new message to the Cassandra database. - * @param message The BaseMessage instance to add. - * @returns A promise that resolves when the message has been added. - */ - public async addMessage(message: BaseMessage): Promise { - await this.ensureTable(); - const messages = mapChatMessagesToStoredMessages([message]); - const { type, data } = messages[0]; - return this.client - .execute( - this.queries.insert, - [this.sessionId, type, JSON.stringify(data)], - { prepare: true, ...this.options } - ) - .then(() => {}); - } - - /** - * Method to clear all the messages from the Cassandra database. - * @returns A promise that resolves when all messages have been cleared. - */ - public async clear(): Promise { - await this.ensureTable(); - return this.client - .execute(this.queries.delete, [this.sessionId], { - prepare: true, - ...this.options, - }) - .then(() => {}); - } - - /** - * Method to initialize the Cassandra database. - * @returns Promise that resolves when the database has been initialized. - */ - private async ensureTable(): Promise { - if (this.tableExists) { - return; - } - - await this.client.execute(` - CREATE TABLE IF NOT EXISTS ${this.keyspace}.${this.table} ( - session_id text, - message_ts timestamp, - message_type text, - data text, - PRIMARY KEY ((session_id), message_ts) - ); - `); - - this.queries = { - insert: `INSERT INTO ${this.keyspace}.${this.table} (session_id, message_ts, message_type, data) VALUES (?, toTimestamp(now()), ?, ?);`, - select: `SELECT message_type, data FROM ${this.keyspace}.${this.table} WHERE session_id = ?;`, - delete: `DELETE FROM ${this.keyspace}.${this.table} WHERE session_id = ?;`, - }; - - this.tableExists = true; - } -} +export * from "@langchain/community/stores/message/cassandra"; \ No newline at end of file diff --git a/langchain/src/stores/message/cloudflare_d1.ts b/langchain/src/stores/message/cloudflare_d1.ts index a88cb595c2ca..46b21919631c 100644 --- a/langchain/src/stores/message/cloudflare_d1.ts +++ b/langchain/src/stores/message/cloudflare_d1.ts @@ -1,197 +1 @@ -import { v4 } from "uuid"; -import type { D1Database } from "@cloudflare/workers-types"; - -import { - BaseMessage, - BaseListChatMessageHistory, - StoredMessage, - StoredMessageData, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type definition for the input parameters required when instantiating a - * CloudflareD1MessageHistory object. - */ -export type CloudflareD1MessageHistoryInput = { - tableName?: string; - sessionId: string; - database?: D1Database; -}; - -/** - * Interface for the data transfer object used when selecting stored - * messages from the Cloudflare D1 database. - */ -interface selectStoredMessagesDTO { - id: string; - session_id: string; - type: string; - content: string; - role: string | null; - name: string | null; - additional_kwargs: string; -} - -/** - * Class for storing and retrieving chat message history from a - * Cloudflare D1 database. Extends the BaseListChatMessageHistory class. - * @example - * ```typescript - * const memory = new BufferMemory({ - * returnMessages: true, - * chatHistory: new CloudflareD1MessageHistory({ - * tableName: "stored_message", - * sessionId: "example", - * database: env.DB, - * }), - * }); - * - * const chainInput = { input }; - * - * const res = await memory.chatHistory.invoke(chainInput); - * await memory.saveContext(chainInput, { - * output: res, - * }); - * ``` - */ -export class CloudflareD1MessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "cloudflare_d1"]; - - public database: D1Database; - - private tableName: string; - - private sessionId: string; - - private tableInitialized: boolean; - - constructor(fields: CloudflareD1MessageHistoryInput) { - super(fields); - - const { sessionId, database, tableName } = fields; - - if (database) { - this.database = database; - } else { - throw new Error( - "Either a client or config must be provided to CloudflareD1MessageHistory" - ); - } - - this.tableName = tableName || "langchain_chat_histories"; - this.tableInitialized = false; - this.sessionId = sessionId; - } - - /** - * Private method to ensure that the necessary table exists in the - * Cloudflare D1 database before performing any operations. If the table - * does not exist, it is created. - * @returns Promise that resolves to void. - */ - private async ensureTable(): Promise { - if (this.tableInitialized) { - return; - } - - const query = `CREATE TABLE IF NOT EXISTS ${this.tableName} (id TEXT PRIMARY KEY, session_id TEXT, type TEXT, content TEXT, role TEXT, name TEXT, additional_kwargs TEXT);`; - await this.database.prepare(query).bind().all(); - - const idIndexQuery = `CREATE INDEX IF NOT EXISTS id_index ON ${this.tableName} (id);`; - await this.database.prepare(idIndexQuery).bind().all(); - - const sessionIdIndexQuery = `CREATE INDEX IF NOT EXISTS session_id_index ON ${this.tableName} (session_id);`; - await this.database.prepare(sessionIdIndexQuery).bind().all(); - - this.tableInitialized = true; - } - - /** - * Method to retrieve all messages from the Cloudflare D1 database for the - * current session. - * @returns Promise that resolves to an array of BaseMessage objects. - */ - async getMessages(): Promise { - await this.ensureTable(); - - const query = `SELECT * FROM ${this.tableName} WHERE session_id = ?`; - const rawStoredMessages = await this.database - .prepare(query) - .bind(this.sessionId) - .all(); - const storedMessagesObject = - rawStoredMessages.results as unknown as selectStoredMessagesDTO[]; - - const orderedMessages: StoredMessage[] = storedMessagesObject.map( - (message) => { - const data = { - content: message.content, - additional_kwargs: JSON.parse(message.additional_kwargs), - } as StoredMessageData; - - if (message.role) { - data.role = message.role; - } - - if (message.name) { - data.name = message.name; - } - - return { - type: message.type, - data, - }; - } - ); - - return mapStoredMessagesToChatMessages(orderedMessages); - } - - /** - * Method to add a new message to the Cloudflare D1 database for the current - * session. - * @param message The BaseMessage object to be added to the database. - * @returns Promise that resolves to void. - */ - async addMessage(message: BaseMessage): Promise { - await this.ensureTable(); - - const messageToAdd = mapChatMessagesToStoredMessages([message]); - - const query = `INSERT INTO ${this.tableName} (id, session_id, type, content, role, name, additional_kwargs) VALUES(?, ?, ?, ?, ?, ?, ?)`; - - const id = v4(); - - await this.database - .prepare(query) - .bind( - id, - this.sessionId, - messageToAdd[0].type || null, - messageToAdd[0].data.content || null, - messageToAdd[0].data.role || null, - messageToAdd[0].data.name || null, - JSON.stringify(messageToAdd[0].data.additional_kwargs) - ) - .all(); - } - - /** - * Method to delete all messages from the Cloudflare D1 database for the - * current session. - * @returns Promise that resolves to void. - */ - async clear(): Promise { - await this.ensureTable(); - - const query = `DELETE FROM ? WHERE session_id = ? `; - await this.database - .prepare(query) - .bind(this.tableName, this.sessionId) - .all(); - } -} +export * from "@langchain/community/stores/message/cloudflare_d1"; \ No newline at end of file diff --git a/langchain/src/stores/message/convex.ts b/langchain/src/stores/message/convex.ts index c060e076900f..150aafb7efd1 100644 --- a/langchain/src/stores/message/convex.ts +++ b/langchain/src/stores/message/convex.ts @@ -1,209 +1 @@ -/* eslint-disable @typescript-eslint/no-explicit-any */ - -// eslint-disable-next-line import/no-extraneous-dependencies -import { - DocumentByInfo, - DocumentByName, - FieldPaths, - FunctionReference, - GenericActionCtx, - GenericDataModel, - NamedTableInfo, - TableNamesInDataModel, - IndexNames, - makeFunctionReference, -} from "convex/server"; -import { BaseMessage, BaseListChatMessageHistory } from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type that defines the config required to initialize the - * ConvexChatMessageHistory class. At minimum it needs a sessionId - * and an ActionCtx. - */ -export type ConvexChatMessageHistoryInput< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel = "messages", - IndexName extends IndexNames< - NamedTableInfo - > = "bySessionId", - SessionIdFieldName extends FieldPaths< - NamedTableInfo - > = "sessionId", - MessageTextFieldName extends FieldPaths< - NamedTableInfo - > = "message", - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - > = any, - LookupQuery extends FunctionReference< - "query", - "internal", - { table: string; index: string; keyField: string; key: string }, - object[] - > = any, - DeleteManyMutation extends FunctionReference< - "mutation", - "internal", - { table: string; index: string; keyField: string; key: string } - > = any -> = { - readonly ctx: GenericActionCtx; - readonly sessionId: DocumentByName[SessionIdFieldName]; - /** - * Defaults to "messages" - */ - readonly table?: TableName; - /** - * Defaults to "bySessionId" - */ - readonly index?: IndexName; - /** - * Defaults to "sessionId" - */ - readonly sessionIdField?: SessionIdFieldName; - /** - * Defaults to "message" - */ - readonly messageTextFieldName?: MessageTextFieldName; - /** - * Defaults to `internal.langchain.db.insert` - */ - readonly insert?: InsertMutation; - /** - * Defaults to `internal.langchain.db.lookup` - */ - readonly lookup?: LookupQuery; - /** - * Defaults to `internal.langchain.db.deleteMany` - */ - readonly deleteMany?: DeleteManyMutation; -}; - -export class ConvexChatMessageHistory< - DataModel extends GenericDataModel, - SessionIdFieldName extends FieldPaths< - NamedTableInfo - > = "sessionId", - TableName extends TableNamesInDataModel = "messages", - IndexName extends IndexNames< - NamedTableInfo - > = "bySessionId", - MessageTextFieldName extends FieldPaths< - NamedTableInfo - > = "message", - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - > = any, - LookupQuery extends FunctionReference< - "query", - "internal", - { table: string; index: string; keyField: string; key: string }, - object[] - > = any, - DeleteManyMutation extends FunctionReference< - "mutation", - "internal", - { table: string; index: string; keyField: string; key: string } - > = any -> extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "convex"]; - - private readonly ctx: GenericActionCtx; - - private readonly sessionId: DocumentByInfo< - NamedTableInfo - >[SessionIdFieldName]; - - private readonly table: TableName; - - private readonly index: IndexName; - - private readonly sessionIdField: SessionIdFieldName; - - private readonly messageTextFieldName: MessageTextFieldName; - - private readonly insert: InsertMutation; - - private readonly lookup: LookupQuery; - - private readonly deleteMany: DeleteManyMutation; - - constructor( - config: ConvexChatMessageHistoryInput< - DataModel, - TableName, - IndexName, - SessionIdFieldName, - MessageTextFieldName, - InsertMutation, - LookupQuery, - DeleteManyMutation - > - ) { - super(); - this.ctx = config.ctx; - this.sessionId = config.sessionId; - this.table = config.table ?? ("messages" as TableName); - this.index = config.index ?? ("bySessionId" as IndexName); - this.sessionIdField = - config.sessionIdField ?? ("sessionId" as SessionIdFieldName); - this.messageTextFieldName = - config.messageTextFieldName ?? ("message" as MessageTextFieldName); - this.insert = - config.insert ?? (makeFunctionReference("langchain/db:insert") as any); - this.lookup = - config.lookup ?? (makeFunctionReference("langchain/db:lookup") as any); - this.deleteMany = - config.deleteMany ?? - (makeFunctionReference("langchain/db:deleteMany") as any); - } - - async getMessages(): Promise { - const convexDocuments: any[] = await this.ctx.runQuery(this.lookup, { - table: this.table, - index: this.index, - keyField: this.sessionIdField, - key: this.sessionId, - } as any); - - return mapStoredMessagesToChatMessages( - convexDocuments.map((doc) => doc[this.messageTextFieldName]) - ); - } - - async addMessage(message: BaseMessage): Promise { - const messages = mapChatMessagesToStoredMessages([message]); - // TODO: Remove chunking when Convex handles the concurrent requests correctly - const PAGE_SIZE = 16; - for (let i = 0; i < messages.length; i += PAGE_SIZE) { - await Promise.all( - messages.slice(i, i + PAGE_SIZE).map((message) => - this.ctx.runMutation(this.insert, { - table: this.table, - document: { - [this.sessionIdField]: this.sessionId, - [this.messageTextFieldName]: message, - }, - } as any) - ) - ); - } - } - - async clear(): Promise { - await this.ctx.runMutation(this.deleteMany, { - table: this.table, - index: this.index, - keyField: this.sessionIdField, - key: this.sessionId, - } as any); - } -} +export * from "@langchain/community/stores/message/convex"; \ No newline at end of file diff --git a/langchain/src/stores/message/dynamodb.ts b/langchain/src/stores/message/dynamodb.ts index 519f351fee27..6329ed88f80b 100644 --- a/langchain/src/stores/message/dynamodb.ts +++ b/langchain/src/stores/message/dynamodb.ts @@ -1,198 +1 @@ -import { - DynamoDBClient, - DynamoDBClientConfig, - GetItemCommand, - GetItemCommandInput, - UpdateItemCommand, - UpdateItemCommandInput, - DeleteItemCommand, - DeleteItemCommandInput, - AttributeValue, -} from "@aws-sdk/client-dynamodb"; - -import { - StoredMessage, - BaseMessage, - BaseListChatMessageHistory, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Interface defining the fields required to create an instance of - * `DynamoDBChatMessageHistory`. It includes the DynamoDB table name, - * session ID, partition key, sort key, message attribute name, and - * DynamoDB client configuration. - */ -export interface DynamoDBChatMessageHistoryFields { - tableName: string; - sessionId: string; - partitionKey?: string; - sortKey?: string; - messageAttributeName?: string; - config?: DynamoDBClientConfig; - key?: Record; -} - -/** - * Interface defining the structure of a chat message as it is stored in - * DynamoDB. - */ -interface DynamoDBSerializedChatMessage { - M: { - type: { - S: string; - }; - text: { - S: string; - }; - role?: { - S: string; - }; - }; -} - -/** - * Class providing methods to interact with a DynamoDB table to store and - * retrieve chat messages. It extends the `BaseListChatMessageHistory` - * class. - */ -export class DynamoDBChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "dynamodb"]; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - "config.credentials.accessKeyId": "AWS_ACCESS_KEY_ID", - "config.credentials.secretAccessKey": "AWS_SECRETE_ACCESS_KEY", - "config.credentials.sessionToken": "AWS_SESSION_TOKEN", - }; - } - - private tableName: string; - - private sessionId: string; - - private client: DynamoDBClient; - - private partitionKey = "id"; - - private sortKey?: string; - - private messageAttributeName = "messages"; - - private dynamoKey: Record = {}; - - constructor({ - tableName, - sessionId, - partitionKey, - sortKey, - messageAttributeName, - config, - key = {}, - }: DynamoDBChatMessageHistoryFields) { - super(); - - this.tableName = tableName; - this.sessionId = sessionId; - this.client = new DynamoDBClient(config ?? {}); - this.partitionKey = partitionKey ?? this.partitionKey; - this.sortKey = sortKey; - this.messageAttributeName = - messageAttributeName ?? this.messageAttributeName; - this.dynamoKey = key; - - // override dynamoKey with partition key and sort key when key not specified - if (Object.keys(this.dynamoKey).length === 0) { - this.dynamoKey[this.partitionKey] = { S: this.sessionId }; - if (this.sortKey) { - this.dynamoKey[this.sortKey] = { S: this.sortKey }; - } - } - } - - /** - * Retrieves all messages from the DynamoDB table and returns them as an - * array of `BaseMessage` instances. - * @returns Array of stored messages - */ - async getMessages(): Promise { - const params: GetItemCommandInput = { - TableName: this.tableName, - Key: this.dynamoKey, - }; - - const response = await this.client.send(new GetItemCommand(params)); - const items = response.Item - ? response.Item[this.messageAttributeName]?.L ?? [] - : []; - const messages = items - .map((item) => ({ - type: item.M?.type.S, - data: { - role: item.M?.role?.S, - content: item.M?.text.S, - }, - })) - .filter( - (x): x is StoredMessage => - x.type !== undefined && x.data.content !== undefined - ); - return mapStoredMessagesToChatMessages(messages); - } - - /** - * Deletes all messages from the DynamoDB table. - */ - async clear(): Promise { - const params: DeleteItemCommandInput = { - TableName: this.tableName, - Key: this.dynamoKey, - }; - await this.client.send(new DeleteItemCommand(params)); - } - - /** - * Adds a new message to the DynamoDB table. - * @param message The message to be added to the DynamoDB table. - */ - async addMessage(message: BaseMessage) { - const messages = mapChatMessagesToStoredMessages([message]); - - const params: UpdateItemCommandInput = { - TableName: this.tableName, - Key: this.dynamoKey, - ExpressionAttributeNames: { - "#m": this.messageAttributeName, - }, - ExpressionAttributeValues: { - ":empty_list": { - L: [], - }, - ":m": { - L: messages.map((message) => { - const dynamoSerializedMessage: DynamoDBSerializedChatMessage = { - M: { - type: { - S: message.type, - }, - text: { - S: message.data.content, - }, - }, - }; - if (message.data.role) { - dynamoSerializedMessage.M.role = { S: message.data.role }; - } - return dynamoSerializedMessage; - }), - }, - }, - UpdateExpression: - "SET #m = list_append(if_not_exists(#m, :empty_list), :m)", - }; - await this.client.send(new UpdateItemCommand(params)); - } -} +export * from "@langchain/community/stores/message/dynamodb"; \ No newline at end of file diff --git a/langchain/src/stores/message/firestore.ts b/langchain/src/stores/message/firestore.ts index 8d342bc5b981..a4486a6303a9 100644 --- a/langchain/src/stores/message/firestore.ts +++ b/langchain/src/stores/message/firestore.ts @@ -1,195 +1 @@ -import type { AppOptions } from "firebase-admin"; -import { getApps, initializeApp } from "firebase-admin/app"; -import { - getFirestore, - DocumentData, - Firestore, - DocumentReference, - FieldValue, -} from "firebase-admin/firestore"; - -import { - StoredMessage, - BaseMessage, - BaseListChatMessageHistory, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Interface for FirestoreDBChatMessageHistory. It includes the collection - * name, session ID, user ID, and optionally, the app index and - * configuration for the Firebase app. - */ -export interface FirestoreDBChatMessageHistory { - collectionName: string; - sessionId: string; - userId: string; - appIdx?: number; - config?: AppOptions; -} -/** - * Class for managing chat message history using Google's Firestore as a - * storage backend. Extends the BaseListChatMessageHistory class. - * @example - * ```typescript - * const chatHistory = new FirestoreChatMessageHistory({ - * collectionName: "langchain", - * sessionId: "lc-example", - * userId: "a@example.com", - * config: { projectId: "your-project-id" }, - * }); - * - * const chain = new ConversationChain({ - * llm: new ChatOpenAI(), - * memory: new BufferMemory({ chatHistory }), - * }); - * - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class FirestoreChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "firestore"]; - - private collectionName: string; - - private sessionId: string; - - private userId: string; - - private appIdx: number; - - private config: AppOptions; - - private firestoreClient: Firestore; - - private document: DocumentReference | null; - - constructor({ - collectionName, - sessionId, - userId, - appIdx = 0, - config, - }: FirestoreDBChatMessageHistory) { - super(); - this.collectionName = collectionName; - this.sessionId = sessionId; - this.userId = userId; - this.document = null; - this.appIdx = appIdx; - if (config) this.config = config; - - try { - this.ensureFirestore(); - } catch (error) { - throw new Error(`Unknown response type`); - } - } - - private ensureFirestore(): void { - let app; - // Check if the app is already initialized else get appIdx - if (!getApps().length) app = initializeApp(this.config); - else app = getApps()[this.appIdx]; - - this.firestoreClient = getFirestore(app); - - this.document = this.firestoreClient - .collection(this.collectionName) - .doc(this.sessionId); - } - - /** - * Method to retrieve all messages from the Firestore collection - * associated with the current session. Returns an array of BaseMessage - * objects. - * @returns Array of stored messages - */ - async getMessages(): Promise { - if (!this.document) { - throw new Error("Document not initialized"); - } - - const querySnapshot = await this.document - .collection("messages") - .orderBy("createdAt", "asc") - .get() - .catch((err) => { - throw new Error(`Unknown response type: ${err.toString()}`); - }); - - const response: StoredMessage[] = []; - querySnapshot.forEach((doc) => { - const { type, data } = doc.data(); - response.push({ type, data }); - }); - - return mapStoredMessagesToChatMessages(response); - } - - /** - * Method to add a new message to the Firestore collection. The message is - * passed as a BaseMessage object. - * @param message The message to be added as a BaseMessage object. - */ - public async addMessage(message: BaseMessage) { - const messages = mapChatMessagesToStoredMessages([message]); - await this.upsertMessage(messages[0]); - } - - private async upsertMessage(message: StoredMessage): Promise { - if (!this.document) { - throw new Error("Document not initialized"); - } - await this.document.set( - { - id: this.sessionId, - user_id: this.userId, - }, - { merge: true } - ); - await this.document - .collection("messages") - .add({ - type: message.type, - data: message.data, - createdBy: this.userId, - createdAt: FieldValue.serverTimestamp(), - }) - .catch((err) => { - throw new Error(`Unknown response type: ${err.toString()}`); - }); - } - - /** - * Method to delete all messages from the Firestore collection associated - * with the current session. - */ - public async clear(): Promise { - if (!this.document) { - throw new Error("Document not initialized"); - } - await this.document - .collection("messages") - .get() - .then((querySnapshot) => { - querySnapshot.docs.forEach((snapshot) => { - snapshot.ref.delete().catch((err) => { - throw new Error(`Unknown response type: ${err.toString()}`); - }); - }); - }) - .catch((err) => { - throw new Error(`Unknown response type: ${err.toString()}`); - }); - await this.document.delete().catch((err) => { - throw new Error(`Unknown response type: ${err.toString()}`); - }); - } -} +export * from "@langchain/community/stores/message/firestore"; \ No newline at end of file diff --git a/langchain/src/stores/message/ioredis.ts b/langchain/src/stores/message/ioredis.ts index c705b9228769..203cf6a39e3e 100644 --- a/langchain/src/stores/message/ioredis.ts +++ b/langchain/src/stores/message/ioredis.ts @@ -1,102 +1 @@ -import { Redis, RedisOptions } from "ioredis"; -import { BaseMessage, BaseListChatMessageHistory } from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type for the input parameter of the RedisChatMessageHistory - * constructor. It includes fields for the session ID, session TTL, Redis - * URL, Redis configuration, and Redis client. - */ -export type RedisChatMessageHistoryInput = { - sessionId: string; - sessionTTL?: number; - url?: string; - config?: RedisOptions; - client?: Redis; -}; - -/** - * Class used to store chat message history in Redis. It provides methods - * to add, retrieve, and clear messages from the chat history. - * @example - * ```typescript - * const chatHistory = new RedisChatMessageHistory({ - * sessionId: new Date().toISOString(), - * sessionTTL: 300, - * url: "redis: - * }); - * - * const chain = new ConversationChain({ - * llm: new ChatOpenAI({ temperature: 0 }), - * memory: { chatHistory }, - * }); - * - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class RedisChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "ioredis"]; - - get lc_secrets() { - return { - url: "REDIS_URL", - "config.username": "REDIS_USERNAME", - "config.password": "REDIS_PASSWORD", - }; - } - - public client: Redis; - - private sessionId: string; - - private sessionTTL?: number; - - constructor(fields: RedisChatMessageHistoryInput) { - super(fields); - - const { sessionId, sessionTTL, url, config, client } = fields; - this.client = (client ?? - (url ? new Redis(url) : new Redis(config ?? {}))) as Redis; - this.sessionId = sessionId; - this.sessionTTL = sessionTTL; - } - - /** - * Retrieves all messages from the chat history. - * @returns Promise that resolves with an array of BaseMessage instances. - */ - async getMessages(): Promise { - const rawStoredMessages = await this.client.lrange(this.sessionId, 0, -1); - const orderedMessages = rawStoredMessages - .reverse() - .map((message) => JSON.parse(message)); - return mapStoredMessagesToChatMessages(orderedMessages); - } - - /** - * Adds a message to the chat history. - * @param message The message to add to the chat history. - * @returns Promise that resolves when the message has been added. - */ - async addMessage(message: BaseMessage): Promise { - const messageToAdd = mapChatMessagesToStoredMessages([message]); - await this.client.lpush(this.sessionId, JSON.stringify(messageToAdd[0])); - if (this.sessionTTL) { - await this.client.expire(this.sessionId, this.sessionTTL); - } - } - - /** - * Clears all messages from the chat history. - * @returns Promise that resolves when the chat history has been cleared. - */ - async clear(): Promise { - await this.client.del(this.sessionId); - } -} +export * from "@langchain/community/stores/message/ioredis"; \ No newline at end of file diff --git a/langchain/src/stores/message/momento.ts b/langchain/src/stores/message/momento.ts index c902980f694b..b1aa8e025ee6 100644 --- a/langchain/src/stores/message/momento.ts +++ b/langchain/src/stores/message/momento.ts @@ -1,198 +1 @@ -/* eslint-disable no-instanceof/no-instanceof */ -import { - CacheDelete, - CacheListFetch, - CacheListPushBack, - ICacheClient, - InvalidArgumentError, - CollectionTtl, -} from "@gomomento/sdk-core"; -import { - BaseMessage, - BaseListChatMessageHistory, - StoredMessage, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; -import { ensureCacheExists } from "../../util/momento.js"; - -/** - * The settings to instantiate the Momento chat message history. - */ -export interface MomentoChatMessageHistoryProps { - /** - * The session ID to use to store the data. - */ - sessionId: string; - /** - * The Momento cache client. - */ - client: ICacheClient; - /** - * The name of the cache to use to store the data. - */ - cacheName: string; - /** - * The time to live for the cache items in seconds. - * If not specified, the cache client default is used. - */ - sessionTtl?: number; - /** - * If true, ensure that the cache exists before returning. - * If false, the cache is not checked for existence. - * Defaults to true. - */ - ensureCacheExists?: true; -} - -/** - * A class that stores chat message history using Momento Cache. It - * interacts with a Momento cache client to perform operations like - * fetching, adding, and deleting messages. - * @example - * ```typescript - * const chatHistory = await MomentoChatMessageHistory.fromProps({ - * client: new CacheClient({ - * configuration: Configurations.Laptop.v1(), - * credentialProvider: CredentialProvider.fromEnvironmentVariable({ - * environmentVariableName: "MOMENTO_API_KEY", - * }), - * defaultTtlSeconds: 60 * 60 * 24, - * }), - * cacheName: "langchain", - * sessionId: new Date().toISOString(), - * sessionTtl: 300, - * }); - * - * const messages = await chatHistory.getMessages(); - * console.log({ messages }); - * ``` - */ -export class MomentoChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "momento"]; - - private readonly sessionId: string; - - private readonly client: ICacheClient; - - private readonly cacheName: string; - - private readonly sessionTtl: CollectionTtl; - - private constructor(props: MomentoChatMessageHistoryProps) { - super(); - this.sessionId = props.sessionId; - this.client = props.client; - this.cacheName = props.cacheName; - - this.validateTtlSeconds(props.sessionTtl); - this.sessionTtl = - props.sessionTtl !== undefined - ? CollectionTtl.of(props.sessionTtl) - : CollectionTtl.fromCacheTtl(); - } - - /** - * Create a new chat message history backed by Momento. - * - * @param {MomentoCacheProps} props The settings to instantiate the Momento chat message history. - * @param {string} props.sessionId The session ID to use to store the data. - * @param {ICacheClient} props.client The Momento cache client. - * @param {string} props.cacheName The name of the cache to use to store the data. - * @param {number} props.sessionTtl The time to live for the cache items in seconds. - * If not specified, the cache client default is used. - * @param {boolean} props.ensureCacheExists If true, ensure that the cache exists before returning. - * If false, the cache is not checked for existence. - * @throws {InvalidArgumentError} If {@link props.sessionTtl} is not strictly positive. - * @returns A new chat message history backed by Momento. - */ - public static async fromProps( - props: MomentoChatMessageHistoryProps - ): Promise { - const instance = new MomentoChatMessageHistory(props); - if (props.ensureCacheExists || props.ensureCacheExists === undefined) { - await ensureCacheExists(props.client, props.cacheName); - } - return instance; - } - - /** - * Validate the user-specified TTL, if provided, is strictly positive. - * @param ttlSeconds The TTL to validate. - */ - private validateTtlSeconds(ttlSeconds?: number): void { - if (ttlSeconds !== undefined && ttlSeconds <= 0) { - throw new InvalidArgumentError("ttlSeconds must be positive."); - } - } - - /** - * Fetches messages from the cache. - * @returns A Promise that resolves to an array of BaseMessage instances. - */ - public async getMessages(): Promise { - const fetchResponse = await this.client.listFetch( - this.cacheName, - this.sessionId - ); - - let messages: StoredMessage[] = []; - if (fetchResponse instanceof CacheListFetch.Hit) { - messages = fetchResponse - .valueList() - .map((serializedStoredMessage) => JSON.parse(serializedStoredMessage)); - } else if (fetchResponse instanceof CacheListFetch.Miss) { - // pass - } else if (fetchResponse instanceof CacheListFetch.Error) { - throw fetchResponse.innerException(); - } else { - throw new Error(`Unknown response type: ${fetchResponse.toString()}`); - } - return mapStoredMessagesToChatMessages(messages); - } - - /** - * Adds a message to the cache. - * @param message The BaseMessage instance to add to the cache. - * @returns A Promise that resolves when the message has been added. - */ - public async addMessage(message: BaseMessage): Promise { - const messageToAdd = JSON.stringify( - mapChatMessagesToStoredMessages([message])[0] - ); - - const pushResponse = await this.client.listPushBack( - this.cacheName, - this.sessionId, - messageToAdd, - { ttl: this.sessionTtl } - ); - if (pushResponse instanceof CacheListPushBack.Success) { - // pass - } else if (pushResponse instanceof CacheListPushBack.Error) { - throw pushResponse.innerException(); - } else { - throw new Error(`Unknown response type: ${pushResponse.toString()}`); - } - } - - /** - * Deletes all messages from the cache. - * @returns A Promise that resolves when all messages have been deleted. - */ - public async clear(): Promise { - const deleteResponse = await this.client.delete( - this.cacheName, - this.sessionId - ); - if (deleteResponse instanceof CacheDelete.Success) { - // pass - } else if (deleteResponse instanceof CacheDelete.Error) { - throw deleteResponse.innerException(); - } else { - throw new Error(`Unknown response type: ${deleteResponse.toString()}`); - } - } -} +export * from "@langchain/community/stores/message/momento"; \ No newline at end of file diff --git a/langchain/src/stores/message/mongodb.ts b/langchain/src/stores/message/mongodb.ts index ccca599f1c91..333120d9f268 100644 --- a/langchain/src/stores/message/mongodb.ts +++ b/langchain/src/stores/message/mongodb.ts @@ -1,59 +1 @@ -import { Collection, Document as MongoDBDocument, ObjectId } from "mongodb"; -import { BaseMessage, BaseListChatMessageHistory } from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -export interface MongoDBChatMessageHistoryInput { - collection: Collection; - sessionId: string; -} - -/** - * @example - * ```typescript - * const chatHistory = new MongoDBChatMessageHistory({ - * collection: myCollection, - * sessionId: 'unique-session-id', - * }); - * const messages = await chatHistory.getMessages(); - * await chatHistory.clear(); - * ``` - */ -export class MongoDBChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "mongodb"]; - - private collection: Collection; - - private sessionId: string; - - constructor({ collection, sessionId }: MongoDBChatMessageHistoryInput) { - super(); - this.collection = collection; - this.sessionId = sessionId; - } - - async getMessages(): Promise { - const document = await this.collection.findOne({ - _id: new ObjectId(this.sessionId), - }); - const messages = document?.messages || []; - return mapStoredMessagesToChatMessages(messages); - } - - async addMessage(message: BaseMessage): Promise { - const messages = mapChatMessagesToStoredMessages([message]); - await this.collection.updateOne( - { _id: new ObjectId(this.sessionId) }, - { - $push: { messages: { $each: messages } }, - }, - { upsert: true } - ); - } - - async clear(): Promise { - await this.collection.deleteOne({ _id: new ObjectId(this.sessionId) }); - } -} +export * from "@langchain/community/stores/message/mongodb"; \ No newline at end of file diff --git a/langchain/src/stores/message/planetscale.ts b/langchain/src/stores/message/planetscale.ts index 10ca1cddf810..7f7a9018b027 100644 --- a/langchain/src/stores/message/planetscale.ts +++ b/langchain/src/stores/message/planetscale.ts @@ -1,210 +1 @@ -import { - Client as PlanetScaleClient, - Config as PlanetScaleConfig, - Connection as PlanetScaleConnection, -} from "@planetscale/database"; -import { - BaseMessage, - BaseListChatMessageHistory, - StoredMessage, - StoredMessageData, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type definition for the input parameters required when instantiating a - * PlanetScaleChatMessageHistory object. - */ -export type PlanetScaleChatMessageHistoryInput = { - tableName?: string; - sessionId: string; - config?: PlanetScaleConfig; - client?: PlanetScaleClient; -}; - -/** - * Interface for the data transfer object used when selecting stored - * messages from the PlanetScale database. - */ -interface selectStoredMessagesDTO { - id: string; - session_id: string; - type: string; - content: string; - role: string | null; - name: string | null; - additional_kwargs: string; -} - -/** - * Class for storing and retrieving chat message history from a - * PlanetScale database. Extends the BaseListChatMessageHistory class. - * @example - * ```typescript - * const chatHistory = new PlanetScaleChatMessageHistory({ - * tableName: "stored_message", - * sessionId: "lc-example", - * config: { - * url: "ADD_YOURS_HERE", - * }, - * }); - * const chain = new ConversationChain({ - * llm: new ChatOpenAI(), - * memory: chatHistory, - * }); - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class PlanetScaleChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "planetscale"]; - - get lc_secrets() { - return { - "config.host": "PLANETSCALE_HOST", - "config.username": "PLANETSCALE_USERNAME", - "config.password": "PLANETSCALE_PASSWORD", - "config.url": "PLANETSCALE_DATABASE_URL", - }; - } - - public client: PlanetScaleClient; - - private connection: PlanetScaleConnection; - - private tableName: string; - - private sessionId: string; - - private tableInitialized: boolean; - - constructor(fields: PlanetScaleChatMessageHistoryInput) { - super(fields); - - const { sessionId, config, client, tableName } = fields; - - if (client) { - this.client = client; - } else if (config) { - this.client = new PlanetScaleClient(config); - } else { - throw new Error( - "Either a client or config must be provided to PlanetScaleChatMessageHistory" - ); - } - - this.connection = this.client.connection(); - - this.tableName = tableName || "langchain_chat_histories"; - this.tableInitialized = false; - this.sessionId = sessionId; - } - - /** - * Private method to ensure that the necessary table exists in the - * PlanetScale database before performing any operations. If the table - * does not exist, it is created. - * @returns Promise that resolves to void. - */ - private async ensureTable(): Promise { - if (this.tableInitialized) { - return; - } - - const query = `CREATE TABLE IF NOT EXISTS ${this.tableName} (id BINARY(16) PRIMARY KEY, session_id VARCHAR(255), type VARCHAR(255), content VARCHAR(255), role VARCHAR(255), name VARCHAR(255), additional_kwargs VARCHAR(255));`; - - await this.connection.execute(query); - - const indexQuery = `ALTER TABLE ${this.tableName} MODIFY id BINARY(16) DEFAULT (UUID_TO_BIN(UUID()));`; - - await this.connection.execute(indexQuery); - - this.tableInitialized = true; - } - - /** - * Method to retrieve all messages from the PlanetScale database for the - * current session. - * @returns Promise that resolves to an array of BaseMessage objects. - */ - async getMessages(): Promise { - await this.ensureTable(); - - const query = `SELECT * FROM ${this.tableName} WHERE session_id = :session_id`; - const params = { - session_id: this.sessionId, - }; - - const rawStoredMessages = await this.connection.execute(query, params); - const storedMessagesObject = - rawStoredMessages.rows as unknown as selectStoredMessagesDTO[]; - - const orderedMessages: StoredMessage[] = storedMessagesObject.map( - (message) => { - const data = { - content: message.content, - additional_kwargs: JSON.parse(message.additional_kwargs), - } as StoredMessageData; - - if (message.role) { - data.role = message.role; - } - - if (message.name) { - data.name = message.name; - } - - return { - type: message.type, - data, - }; - } - ); - return mapStoredMessagesToChatMessages(orderedMessages); - } - - /** - * Method to add a new message to the PlanetScale database for the current - * session. - * @param message The BaseMessage object to be added to the database. - * @returns Promise that resolves to void. - */ - async addMessage(message: BaseMessage): Promise { - await this.ensureTable(); - - const messageToAdd = mapChatMessagesToStoredMessages([message]); - - const query = `INSERT INTO ${this.tableName} (session_id, type, content, role, name, additional_kwargs) VALUES (:session_id, :type, :content, :role, :name, :additional_kwargs)`; - - const params = { - session_id: this.sessionId, - type: messageToAdd[0].type, - content: messageToAdd[0].data.content, - role: messageToAdd[0].data.role, - name: messageToAdd[0].data.name, - additional_kwargs: JSON.stringify(messageToAdd[0].data.additional_kwargs), - }; - - await this.connection.execute(query, params); - } - - /** - * Method to delete all messages from the PlanetScale database for the - * current session. - * @returns Promise that resolves to void. - */ - async clear(): Promise { - await this.ensureTable(); - - const query = `DELETE FROM ${this.tableName} WHERE session_id = :session_id`; - const params = { - session_id: this.sessionId, - }; - await this.connection.execute(query, params); - } -} +export * from "@langchain/community/stores/message/planetscale"; \ No newline at end of file diff --git a/langchain/src/stores/message/redis.ts b/langchain/src/stores/message/redis.ts index fef97ac84af7..0d46c48cea30 100644 --- a/langchain/src/stores/message/redis.ts +++ b/langchain/src/stores/message/redis.ts @@ -1,129 +1 @@ -// TODO: Deprecate in favor of stores/message/ioredis.ts when LLMCache and other implementations are ported -import { - createClient, - RedisClientOptions, - RedisClientType, - RedisModules, - RedisFunctions, - RedisScripts, -} from "redis"; -import { BaseMessage, BaseListChatMessageHistory } from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type for the input to the `RedisChatMessageHistory` constructor. - */ -export type RedisChatMessageHistoryInput = { - sessionId: string; - sessionTTL?: number; - config?: RedisClientOptions; - // Typing issues with createClient output: https://github.com/redis/node-redis/issues/1865 - // eslint-disable-next-line @typescript-eslint/no-explicit-any - client?: any; -}; - -/** - * Class for storing chat message history using Redis. Extends the - * `BaseListChatMessageHistory` class. - * @example - * ```typescript - * const chatHistory = new RedisChatMessageHistory({ - * sessionId: new Date().toISOString(), - * sessionTTL: 300, - * url: "redis: - * }); - * - * const chain = new ConversationChain({ - * llm: new ChatOpenAI({ modelName: "gpt-3.5-turbo", temperature: 0 }), - * memory: { chatHistory }, - * }); - * - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class RedisChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "redis"]; - - get lc_secrets() { - return { - "config.url": "REDIS_URL", - "config.username": "REDIS_USERNAME", - "config.password": "REDIS_PASSWORD", - }; - } - - public client: RedisClientType; - - private sessionId: string; - - private sessionTTL?: number; - - constructor(fields: RedisChatMessageHistoryInput) { - super(fields); - - const { sessionId, sessionTTL, config, client } = fields; - this.client = (client ?? createClient(config ?? {})) as RedisClientType< - RedisModules, - RedisFunctions, - RedisScripts - >; - this.sessionId = sessionId; - this.sessionTTL = sessionTTL; - } - - /** - * Ensures the Redis client is ready to perform operations. If the client - * is not ready, it attempts to connect to the Redis database. - * @returns Promise resolving to true when the client is ready. - */ - async ensureReadiness() { - if (!this.client.isReady) { - await this.client.connect(); - } - return true; - } - - /** - * Retrieves all chat messages from the Redis database for the current - * session. - * @returns Promise resolving to an array of `BaseMessage` instances. - */ - async getMessages(): Promise { - await this.ensureReadiness(); - const rawStoredMessages = await this.client.lRange(this.sessionId, 0, -1); - const orderedMessages = rawStoredMessages - .reverse() - .map((message) => JSON.parse(message)); - return mapStoredMessagesToChatMessages(orderedMessages); - } - - /** - * Adds a new chat message to the Redis database for the current session. - * @param message The `BaseMessage` instance to add. - * @returns Promise resolving when the message has been added. - */ - async addMessage(message: BaseMessage): Promise { - await this.ensureReadiness(); - const messageToAdd = mapChatMessagesToStoredMessages([message]); - await this.client.lPush(this.sessionId, JSON.stringify(messageToAdd[0])); - if (this.sessionTTL) { - await this.client.expire(this.sessionId, this.sessionTTL); - } - } - - /** - * Deletes all chat messages from the Redis database for the current - * session. - * @returns Promise resolving when the messages have been deleted. - */ - async clear(): Promise { - await this.ensureReadiness(); - await this.client.del(this.sessionId); - } -} +export * from "@langchain/community/stores/message/redis"; \ No newline at end of file diff --git a/langchain/src/stores/message/upstash_redis.ts b/langchain/src/stores/message/upstash_redis.ts index 0d8f318e07b0..ffdb9ff36246 100644 --- a/langchain/src/stores/message/upstash_redis.ts +++ b/langchain/src/stores/message/upstash_redis.ts @@ -1,95 +1 @@ -import { Redis, type RedisConfigNodejs } from "@upstash/redis"; -import { - StoredMessage, - BaseMessage, - BaseListChatMessageHistory, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * Type definition for the input parameters required to initialize an - * instance of the UpstashRedisChatMessageHistory class. - */ -export type UpstashRedisChatMessageHistoryInput = { - sessionId: string; - sessionTTL?: number; - config?: RedisConfigNodejs; - client?: Redis; -}; - -/** - * Class used to store chat message history in Redis. It provides methods - * to add, get, and clear messages. - */ -export class UpstashRedisChatMessageHistory extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "upstash_redis"]; - - get lc_secrets() { - return { - "config.url": "UPSTASH_REDIS_REST_URL", - "config.token": "UPSTASH_REDIS_REST_TOKEN", - }; - } - - public client: Redis; - - private sessionId: string; - - private sessionTTL?: number; - - constructor(fields: UpstashRedisChatMessageHistoryInput) { - super(fields); - const { sessionId, sessionTTL, config, client } = fields; - if (client) { - this.client = client; - } else if (config) { - this.client = new Redis(config); - } else { - throw new Error( - `Upstash Redis message stores require either a config object or a pre-configured client.` - ); - } - this.sessionId = sessionId; - this.sessionTTL = sessionTTL; - } - - /** - * Retrieves the chat messages from the Redis database. - * @returns An array of BaseMessage instances representing the chat history. - */ - async getMessages(): Promise { - const rawStoredMessages: StoredMessage[] = - await this.client.lrange(this.sessionId, 0, -1); - - const orderedMessages = rawStoredMessages.reverse(); - const previousMessages = orderedMessages.filter( - (x): x is StoredMessage => - x.type !== undefined && x.data.content !== undefined - ); - return mapStoredMessagesToChatMessages(previousMessages); - } - - /** - * Adds a new message to the chat history in the Redis database. - * @param message The message to be added to the chat history. - * @returns Promise resolving to void. - */ - async addMessage(message: BaseMessage): Promise { - const messageToAdd = mapChatMessagesToStoredMessages([message]); - await this.client.lpush(this.sessionId, JSON.stringify(messageToAdd[0])); - if (this.sessionTTL) { - await this.client.expire(this.sessionId, this.sessionTTL); - } - } - - /** - * Deletes all messages from the chat history in the Redis database. - * @returns Promise resolving to void. - */ - async clear(): Promise { - await this.client.del(this.sessionId); - } -} +export * from "@langchain/community/stores/message/upstash_redis"; \ No newline at end of file diff --git a/langchain/src/stores/message/utils.ts b/langchain/src/stores/message/utils.ts index 81a958e5a7e2..b490d23f9991 100644 --- a/langchain/src/stores/message/utils.ts +++ b/langchain/src/stores/message/utils.ts @@ -1,31 +1 @@ -import { - BaseMessage, - StoredMessage, - mapStoredMessageToChatMessage, -} from "../../schema/index.js"; - -/** - * Transforms an array of `StoredMessage` instances into an array of - * `BaseMessage` instances. It uses the `mapV1MessageToStoredMessage` - * function to ensure all messages are in the `StoredMessage` format, then - * creates new instances of the appropriate `BaseMessage` subclass based - * on the type of each message. This function is used to prepare stored - * messages for use in a chat context. - */ -export function mapStoredMessagesToChatMessages( - messages: StoredMessage[] -): BaseMessage[] { - return messages.map(mapStoredMessageToChatMessage); -} - -/** - * Transforms an array of `BaseMessage` instances into an array of - * `StoredMessage` instances. It does this by calling the `toDict` method - * on each `BaseMessage`, which returns a `StoredMessage`. This function - * is used to prepare chat messages for storage. - */ -export function mapChatMessagesToStoredMessages( - messages: BaseMessage[] -): StoredMessage[] { - return messages.map((message) => message.toDict()); -} +export { mapStoredMessagesToChatMessages, mapChatMessagesToStoredMessages } from "@langchain/core/messages"; \ No newline at end of file diff --git a/langchain/src/stores/message/xata.ts b/langchain/src/stores/message/xata.ts index b459f38de8f2..32b73221198b 100644 --- a/langchain/src/stores/message/xata.ts +++ b/langchain/src/stores/message/xata.ts @@ -1,243 +1 @@ -import { - BaseClient, - BaseClientOptions, - GetTableSchemaResponse, - Schemas, - XataApiClient, - parseWorkspacesUrlParts, -} from "@xata.io/client"; -import { - BaseMessage, - BaseListChatMessageHistory, - StoredMessage, - StoredMessageData, -} from "../../schema/index.js"; -import { - mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, -} from "./utils.js"; - -/** - * An object type that represents the input for the XataChatMessageHistory - * class. - */ -export type XataChatMessageHistoryInput = { - sessionId: string; - config?: BaseClientOptions; - client?: XataClient; - table?: string; - createTable?: boolean; - apiKey?: string; -}; - -/** - * An interface that represents the data transfer object for stored - * messages. - */ -interface storedMessagesDTO { - id: string; - sessionId: string; - type: string; - content: string; - role?: string; - name?: string; - additionalKwargs: string; -} - -const chatMemoryColumns: Schemas.Column[] = [ - { name: "sessionId", type: "string" }, - { name: "type", type: "string" }, - { name: "role", type: "string" }, - { name: "content", type: "text" }, - { name: "name", type: "string" }, - { name: "additionalKwargs", type: "text" }, -]; - -/** - * A class for managing chat message history using Xata.io client. It - * extends the BaseListChatMessageHistory class and provides methods to - * get, add, and clear messages. It also ensures the existence of a table - * where the chat messages are stored. - * @example - * ```typescript - * const chatHistory = new XataChatMessageHistory({ - * table: "messages", - * sessionId: new Date().toISOString(), - * client: new BaseClient({ - * databaseURL: process.env.XATA_DB_URL, - * apiKey: process.env.XATA_API_KEY, - * branch: "main", - * }), - * apiKey: process.env.XATA_API_KEY, - * }); - * - * const chain = new ConversationChain({ - * llm: new ChatOpenAI(), - * memory: new BufferMemory({ chatHistory }), - * }); - * - * const response = await chain.invoke({ - * input: "What did I just say my name was?", - * }); - * console.log({ response }); - * ``` - */ -export class XataChatMessageHistory< - XataClient extends BaseClient -> extends BaseListChatMessageHistory { - lc_namespace = ["langchain", "stores", "message", "xata"]; - - public client: XataClient; - - private sessionId: string; - - private table: string; - - private tableInitialized: boolean; - - private createTable: boolean; - - private apiClient: XataApiClient; - - constructor(fields: XataChatMessageHistoryInput) { - super(fields); - - const { sessionId, config, client, table } = fields; - this.sessionId = sessionId; - this.table = table || "memory"; - if (client) { - this.client = client; - } else if (config) { - this.client = new BaseClient(config) as XataClient; - } else { - throw new Error( - "Either a client or a config must be provided to XataChatMessageHistoryInput" - ); - } - if (fields.createTable !== false) { - this.createTable = true; - const apiKey = fields.apiKey || fields.config?.apiKey; - if (!apiKey) { - throw new Error( - "If createTable is set, an apiKey must be provided to XataChatMessageHistoryInput, either directly or through the config object" - ); - } - this.apiClient = new XataApiClient({ apiKey }); - } else { - this.createTable = false; - } - this.tableInitialized = false; - } - - /** - * Retrieves all messages associated with the session ID, ordered by - * creation time. - * @returns A promise that resolves to an array of BaseMessage instances. - */ - async getMessages(): Promise { - await this.ensureTable(); - const records = await this.client.db[this.table] - .filter({ sessionId: this.sessionId }) - .sort("xata.createdAt", "asc") - .getAll(); - - const rawStoredMessages = records as unknown as storedMessagesDTO[]; - const orderedMessages: StoredMessage[] = rawStoredMessages.map( - (message: storedMessagesDTO) => { - const data = { - content: message.content, - additional_kwargs: JSON.parse(message.additionalKwargs), - } as StoredMessageData; - if (message.role) { - data.role = message.role; - } - if (message.name) { - data.name = message.name; - } - - return { - type: message.type, - data, - }; - } - ); - return mapStoredMessagesToChatMessages(orderedMessages); - } - - /** - * Adds a new message to the database. - * @param message The BaseMessage instance to be added. - * @returns A promise that resolves when the message has been added. - */ - async addMessage(message: BaseMessage): Promise { - await this.ensureTable(); - const messageToAdd = mapChatMessagesToStoredMessages([message]); - await this.client.db[this.table].create({ - sessionId: this.sessionId, - type: messageToAdd[0].type, - content: messageToAdd[0].data.content, - role: messageToAdd[0].data.role, - name: messageToAdd[0].data.name, - additionalKwargs: JSON.stringify(messageToAdd[0].data.additional_kwargs), - }); - } - - /** - * Deletes all messages associated with the session ID. - * @returns A promise that resolves when the messages have been deleted. - */ - async clear(): Promise { - await this.ensureTable(); - const records = await this.client.db[this.table] - .select(["id"]) - .filter({ sessionId: this.sessionId }) - .getAll(); - const ids = records.map((m) => m.id); - await this.client.db[this.table].delete(ids); - } - - /** - * Checks if the table exists and creates it if it doesn't. This method is - * called before any operation on the table. - * @returns A promise that resolves when the table has been ensured. - */ - private async ensureTable(): Promise { - if (!this.createTable) { - return; - } - if (this.tableInitialized) { - return; - } - - const { databaseURL, branch } = await this.client.getConfig(); - const [, , host, , database] = databaseURL.split("/"); - const urlParts = parseWorkspacesUrlParts(host); - if (urlParts == null) { - throw new Error("Invalid databaseURL"); - } - const { workspace, region } = urlParts; - const tableParams = { - workspace, - region, - database, - branch, - table: this.table, - }; - - let schema: GetTableSchemaResponse | null = null; - try { - schema = await this.apiClient.tables.getTableSchema(tableParams); - } catch (e) { - // pass - } - if (schema == null) { - await this.apiClient.tables.createTable(tableParams); - await this.apiClient.tables.setTableSchema({ - ...tableParams, - schema: { - columns: chatMemoryColumns, - }, - }); - } - } -} +export * from "@langchain/community/stores/message/xata"; \ No newline at end of file diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index fb99a6ef7063..7f3a870e6c10 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -313,3 +313,57 @@ graphs/neo4j_graph.d.ts utils/event_source_parse.cjs utils/event_source_parse.js utils/event_source_parse.d.ts +document_transformers/html_to_text.cjs +document_transformers/html_to_text.js +document_transformers/html_to_text.d.ts +document_transformers/mozilla_readability.cjs +document_transformers/mozilla_readability.js +document_transformers/mozilla_readability.d.ts +storage/convex.cjs +storage/convex.js +storage/convex.d.ts +storage/ioredis.cjs +storage/ioredis.js +storage/ioredis.d.ts +storage/upstash_redis.cjs +storage/upstash_redis.js +storage/upstash_redis.d.ts +storage/vercel_kv.cjs +storage/vercel_kv.js +storage/vercel_kv.d.ts +stores/message/cassandra.cjs +stores/message/cassandra.js +stores/message/cassandra.d.ts +stores/message/cloudflare_d1.cjs +stores/message/cloudflare_d1.js +stores/message/cloudflare_d1.d.ts +stores/message/convex.cjs +stores/message/convex.js +stores/message/convex.d.ts +stores/message/dynamodb.cjs +stores/message/dynamodb.js +stores/message/dynamodb.d.ts +stores/message/firestore.cjs +stores/message/firestore.js +stores/message/firestore.d.ts +stores/message/ioredis.cjs +stores/message/ioredis.js +stores/message/ioredis.d.ts +stores/message/momento.cjs +stores/message/momento.js +stores/message/momento.d.ts +stores/message/mongodb.cjs +stores/message/mongodb.js +stores/message/mongodb.d.ts +stores/message/planetscale.cjs +stores/message/planetscale.js +stores/message/planetscale.d.ts +stores/message/redis.cjs +stores/message/redis.js +stores/message/redis.d.ts +stores/message/upstash_redis.cjs +stores/message/upstash_redis.js +stores/message/upstash_redis.d.ts +stores/message/xata.cjs +stores/message/xata.js +stores/message/xata.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index c917eb6600e5..7b9556373417 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1120,6 +1120,96 @@ "import": "./utils/event_source_parse.js", "require": "./utils/event_source_parse.cjs" }, + "./document_transformers/html_to_text": { + "types": "./document_transformers/html_to_text.d.ts", + "import": "./document_transformers/html_to_text.js", + "require": "./document_transformers/html_to_text.cjs" + }, + "./document_transformers/mozilla_readability": { + "types": "./document_transformers/mozilla_readability.d.ts", + "import": "./document_transformers/mozilla_readability.js", + "require": "./document_transformers/mozilla_readability.cjs" + }, + "./storage/convex": { + "types": "./storage/convex.d.ts", + "import": "./storage/convex.js", + "require": "./storage/convex.cjs" + }, + "./storage/ioredis": { + "types": "./storage/ioredis.d.ts", + "import": "./storage/ioredis.js", + "require": "./storage/ioredis.cjs" + }, + "./storage/upstash_redis": { + "types": "./storage/upstash_redis.d.ts", + "import": "./storage/upstash_redis.js", + "require": "./storage/upstash_redis.cjs" + }, + "./storage/vercel_kv": { + "types": "./storage/vercel_kv.d.ts", + "import": "./storage/vercel_kv.js", + "require": "./storage/vercel_kv.cjs" + }, + "./stores/message/cassandra": { + "types": "./stores/message/cassandra.d.ts", + "import": "./stores/message/cassandra.js", + "require": "./stores/message/cassandra.cjs" + }, + "./stores/message/cloudflare_d1": { + "types": "./stores/message/cloudflare_d1.d.ts", + "import": "./stores/message/cloudflare_d1.js", + "require": "./stores/message/cloudflare_d1.cjs" + }, + "./stores/message/convex": { + "types": "./stores/message/convex.d.ts", + "import": "./stores/message/convex.js", + "require": "./stores/message/convex.cjs" + }, + "./stores/message/dynamodb": { + "types": "./stores/message/dynamodb.d.ts", + "import": "./stores/message/dynamodb.js", + "require": "./stores/message/dynamodb.cjs" + }, + "./stores/message/firestore": { + "types": "./stores/message/firestore.d.ts", + "import": "./stores/message/firestore.js", + "require": "./stores/message/firestore.cjs" + }, + "./stores/message/ioredis": { + "types": "./stores/message/ioredis.d.ts", + "import": "./stores/message/ioredis.js", + "require": "./stores/message/ioredis.cjs" + }, + "./stores/message/momento": { + "types": "./stores/message/momento.d.ts", + "import": "./stores/message/momento.js", + "require": "./stores/message/momento.cjs" + }, + "./stores/message/mongodb": { + "types": "./stores/message/mongodb.d.ts", + "import": "./stores/message/mongodb.js", + "require": "./stores/message/mongodb.cjs" + }, + "./stores/message/planetscale": { + "types": "./stores/message/planetscale.d.ts", + "import": "./stores/message/planetscale.js", + "require": "./stores/message/planetscale.cjs" + }, + "./stores/message/redis": { + "types": "./stores/message/redis.d.ts", + "import": "./stores/message/redis.js", + "require": "./stores/message/redis.cjs" + }, + "./stores/message/upstash_redis": { + "types": "./stores/message/upstash_redis.d.ts", + "import": "./stores/message/upstash_redis.js", + "require": "./stores/message/upstash_redis.cjs" + }, + "./stores/message/xata": { + "types": "./stores/message/xata.d.ts", + "import": "./stores/message/xata.js", + "require": "./stores/message/xata.cjs" + }, "./package.json": "./package.json" }, "files": [ @@ -1438,6 +1528,60 @@ "graphs/neo4j_graph.d.ts", "utils/event_source_parse.cjs", "utils/event_source_parse.js", - "utils/event_source_parse.d.ts" + "utils/event_source_parse.d.ts", + "document_transformers/html_to_text.cjs", + "document_transformers/html_to_text.js", + "document_transformers/html_to_text.d.ts", + "document_transformers/mozilla_readability.cjs", + "document_transformers/mozilla_readability.js", + "document_transformers/mozilla_readability.d.ts", + "storage/convex.cjs", + "storage/convex.js", + "storage/convex.d.ts", + "storage/ioredis.cjs", + "storage/ioredis.js", + "storage/ioredis.d.ts", + "storage/upstash_redis.cjs", + "storage/upstash_redis.js", + "storage/upstash_redis.d.ts", + "storage/vercel_kv.cjs", + "storage/vercel_kv.js", + "storage/vercel_kv.d.ts", + "stores/message/cassandra.cjs", + "stores/message/cassandra.js", + "stores/message/cassandra.d.ts", + "stores/message/cloudflare_d1.cjs", + "stores/message/cloudflare_d1.js", + "stores/message/cloudflare_d1.d.ts", + "stores/message/convex.cjs", + "stores/message/convex.js", + "stores/message/convex.d.ts", + "stores/message/dynamodb.cjs", + "stores/message/dynamodb.js", + "stores/message/dynamodb.d.ts", + "stores/message/firestore.cjs", + "stores/message/firestore.js", + "stores/message/firestore.d.ts", + "stores/message/ioredis.cjs", + "stores/message/ioredis.js", + "stores/message/ioredis.d.ts", + "stores/message/momento.cjs", + "stores/message/momento.js", + "stores/message/momento.d.ts", + "stores/message/mongodb.cjs", + "stores/message/mongodb.js", + "stores/message/mongodb.d.ts", + "stores/message/planetscale.cjs", + "stores/message/planetscale.js", + "stores/message/planetscale.d.ts", + "stores/message/redis.cjs", + "stores/message/redis.js", + "stores/message/redis.d.ts", + "stores/message/upstash_redis.cjs", + "stores/message/upstash_redis.js", + "stores/message/upstash_redis.d.ts", + "stores/message/xata.cjs", + "stores/message/xata.js", + "stores/message/xata.d.ts" ] } diff --git a/libs/langchain-community/scripts/check-tree-shaking.js b/libs/langchain-community/scripts/check-tree-shaking.js index 851d0bbba8a4..62f289fd855e 100644 --- a/libs/langchain-community/scripts/check-tree-shaking.js +++ b/libs/langchain-community/scripts/check-tree-shaking.js @@ -33,6 +33,8 @@ export function listExternals() { "@rockset/client/dist/codegen/api.js", "mysql2/promise", "web-auth-library/google", + "firebase-admin/app", + "firebase-admin/firestore", ]; } diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index f51a44b5e5f8..b04416657cf9 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -121,6 +121,28 @@ const entrypoints = { // graphs "graphs/neo4j_graph": "graphs/neo4j_graph", "utils/event_source_parse": "utils/event_source_parse", + // document transformers + "document_transformers/html_to_text": "document_transformers/html_to_text", + "document_transformers/mozilla_readability": + "document_transformers/mozilla_readability", + // storage + "storage/convex": "storage/convex", + "storage/ioredis": "storage/ioredis", + "storage/upstash_redis": "storage/upstash_redis", + "storage/vercel_kv": "storage/vercel_kv", + // stores + "stores/message/cassandra": "stores/message/cassandra", + "stores/message/cloudflare_d1": "stores/message/cloudflare_d1", + "stores/message/convex": "stores/message/convex", + "stores/message/dynamodb": "stores/message/dynamodb", + "stores/message/firestore": "stores/message/firestore", + "stores/message/ioredis": "stores/message/ioredis", + "stores/message/momento": "stores/message/momento", + "stores/message/mongodb": "stores/message/mongodb", + "stores/message/planetscale": "stores/message/planetscale", + "stores/message/redis": "stores/message/redis", + "stores/message/upstash_redis": "stores/message/upstash_redis", + "stores/message/xata": "stores/message/xata", }; // Entrypoints in this list will @@ -198,6 +220,27 @@ const requiresOptionalDependency = [ "cache/momento", "cache/upstash_redis", "graphs/neo4j_graph", + // document_transformers + "document_transformers/html_to_text", + "document_transformers/mozilla_readability", + // storage + "storage/convex", + "storage/ioredis", + "storage/upstash_redis", + "storage/vercel_kv", + // stores + "stores/message/cassandra", + "stores/message/cloudflare_d1", + "stores/message/convex", + "stores/message/dynamodb", + "stores/message/firestore", + "stores/message/ioredis", + "stores/message/momento", + "stores/message/mongodb", + "stores/message/planetscale", + "stores/message/redis", + "stores/message/upstash_redis", + "stores/message/xata", ]; const updateJsonFile = (relativePath, updateFunction) => { diff --git a/libs/langchain-community/src/document_transformers/html_to_text.ts b/libs/langchain-community/src/document_transformers/html_to_text.ts new file mode 100644 index 000000000000..9021ba68976a --- /dev/null +++ b/libs/langchain-community/src/document_transformers/html_to_text.ts @@ -0,0 +1,43 @@ +import { htmlToText } from "html-to-text"; +import type { HtmlToTextOptions } from "html-to-text"; +import { Document } from "@langchain/core/documents"; +import { MappingDocumentTransformer } from "@langchain/core/documents"; + +/** + * A transformer that converts HTML content to plain text. + * @example + * ```typescript + * const loader = new CheerioWebBaseLoader("https://example.com/some-page"); + * const docs = await loader.load(); + * + * const splitter = new RecursiveCharacterTextSplitter({ + * maxCharacterCount: 1000, + * }); + * const transformer = new HtmlToTextTransformer(); + * + * // The sequence of text splitting followed by HTML to text transformation + * const sequence = splitter.pipe(transformer); + * + * // Processing the loaded documents through the sequence + * const newDocuments = await sequence.invoke(docs); + * + * console.log(newDocuments); + * ``` + */ +export class HtmlToTextTransformer extends MappingDocumentTransformer { + static lc_name() { + return "HtmlToTextTransformer"; + } + + constructor(protected options: HtmlToTextOptions = {}) { + super(options); + } + + async _transformDocument(document: Document): Promise { + const extractedContent = htmlToText(document.pageContent, this.options); + return new Document({ + pageContent: extractedContent, + metadata: { ...document.metadata }, + }); + } +} diff --git a/libs/langchain-community/src/document_transformers/mozilla_readability.ts b/libs/langchain-community/src/document_transformers/mozilla_readability.ts new file mode 100644 index 000000000000..5d295ac85ddf --- /dev/null +++ b/libs/langchain-community/src/document_transformers/mozilla_readability.ts @@ -0,0 +1,52 @@ +import { Readability } from "@mozilla/readability"; +import { JSDOM } from "jsdom"; +import { Options } from "mozilla-readability"; +import { Document } from "@langchain/core/documents"; +import { MappingDocumentTransformer } from "@langchain/core/documents"; + +/** + * A transformer that uses the Mozilla Readability library to extract the + * main content from a web page. + * @example + * ```typescript + * const loader = new CheerioWebBaseLoader("https://example.com/article"); + * const docs = await loader.load(); + * + * const splitter = new RecursiveCharacterTextSplitter({ + * maxCharacterCount: 5000, + * }); + * const transformer = new MozillaReadabilityTransformer(); + * + * // The sequence processes the loaded documents through the splitter and then the transformer. + * const sequence = splitter.pipe(transformer); + * + * // Invoke the sequence to transform the documents into a more readable format. + * const newDocuments = await sequence.invoke(docs); + * + * console.log(newDocuments); + * ``` + */ +export class MozillaReadabilityTransformer extends MappingDocumentTransformer { + static lc_name() { + return "MozillaReadabilityTransformer"; + } + + constructor(protected options: Options = {}) { + super(options); + } + + async _transformDocument(document: Document): Promise { + const doc = new JSDOM(document.pageContent); + + const readability = new Readability(doc.window.document, this.options); + + const result = readability.parse(); + + return new Document({ + pageContent: result?.textContent ?? "", + metadata: { + ...document.metadata, + }, + }); + } +} diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index 79405fe1d31b..ed5bdb480ebb 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -64,4 +64,22 @@ export const optionalImportEntrypoints = [ "langchain_community/retrievers/supabase", "langchain_community/retrievers/zep", "langchain_community/graphs/neo4j_graph", + "langchain_community/document_transformers/html_to_text", + "langchain_community/document_transformers/mozilla_readability", + "langchain_community/storage/convex", + "langchain_community/storage/ioredis", + "langchain_community/storage/upstash_redis", + "langchain_community/storage/vercel_kv", + "langchain_community/stores/message/cassandra", + "langchain_community/stores/message/cloudflare_d1", + "langchain_community/stores/message/convex", + "langchain_community/stores/message/dynamodb", + "langchain_community/stores/message/firestore", + "langchain_community/stores/message/ioredis", + "langchain_community/stores/message/momento", + "langchain_community/stores/message/mongodb", + "langchain_community/stores/message/planetscale", + "langchain_community/stores/message/redis", + "langchain_community/stores/message/upstash_redis", + "langchain_community/stores/message/xata", ]; diff --git a/libs/langchain-community/src/load/import_type.d.ts b/libs/langchain-community/src/load/import_type.d.ts index b30dc747f249..71530d0e8030 100644 --- a/libs/langchain-community/src/load/import_type.d.ts +++ b/libs/langchain-community/src/load/import_type.d.ts @@ -190,10 +190,65 @@ export interface OptionalImportMap { "@langchain/community/graphs/neo4j_graph"?: | typeof import("../graphs/neo4j_graph.js") | Promise; + "@langchain/community/document_transformers/html_to_text"?: + | typeof import("../document_transformers/html_to_text.js") + | Promise; + "@langchain/community/document_transformers/mozilla_readability"?: + | typeof import("../document_transformers/mozilla_readability.js") + | Promise; + "@langchain/community/storage/convex"?: + | typeof import("../storage/convex.js") + | Promise; + "@langchain/community/storage/ioredis"?: + | typeof import("../storage/ioredis.js") + | Promise; + "@langchain/community/storage/upstash_redis"?: + | typeof import("../storage/upstash_redis.js") + | Promise; + "@langchain/community/storage/vercel_kv"?: + | typeof import("../storage/vercel_kv.js") + | Promise; + "@langchain/community/stores/message/cassandra"?: + | typeof import("../stores/message/cassandra.js") + | Promise; + "@langchain/community/stores/message/cloudflare_d1"?: + | typeof import("../stores/message/cloudflare_d1.js") + | Promise; + "@langchain/community/stores/message/convex"?: + | typeof import("../stores/message/convex.js") + | Promise; + "@langchain/community/stores/message/dynamodb"?: + | typeof import("../stores/message/dynamodb.js") + | Promise; + "@langchain/community/stores/message/firestore"?: + | typeof import("../stores/message/firestore.js") + | Promise; + "@langchain/community/stores/message/ioredis"?: + | typeof import("../stores/message/ioredis.js") + | Promise; + "@langchain/community/stores/message/momento"?: + | typeof import("../stores/message/momento.js") + | Promise; + "@langchain/community/stores/message/mongodb"?: + | typeof import("../stores/message/mongodb.js") + | Promise; + "@langchain/community/stores/message/planetscale"?: + | typeof import("../stores/message/planetscale.js") + | Promise; + "@langchain/community/stores/message/redis"?: + | typeof import("../stores/message/redis.js") + | Promise; + "@langchain/community/stores/message/upstash_redis"?: + | typeof import("../stores/message/upstash_redis.js") + | Promise; + "@langchain/community/stores/message/xata"?: + | typeof import("../stores/message/xata.js") + | Promise; } export interface SecretMap { AWS_ACCESS_KEY_ID?: string; + AWS_SECRETE_ACCESS_KEY?: string; AWS_SECRET_ACCESS_KEY?: string; AWS_SESSION_TOKEN?: string; BAIDU_API_KEY?: string; @@ -219,10 +274,19 @@ export interface SecretMap { MILVUS_USERNAME?: string; MINIMAX_API_KEY?: string; MINIMAX_GROUP_ID?: string; + PLANETSCALE_DATABASE_URL?: string; + PLANETSCALE_HOST?: string; + PLANETSCALE_PASSWORD?: string; + PLANETSCALE_USERNAME?: string; QDRANT_API_KEY?: string; QDRANT_URL?: string; + REDIS_PASSWORD?: string; + REDIS_URL?: string; + REDIS_USERNAME?: string; REPLICATE_API_TOKEN?: string; SEARXNG_API_BASE?: string; + UPSTASH_REDIS_REST_TOKEN?: string; + UPSTASH_REDIS_REST_URL?: string; VECTARA_API_KEY?: string; VECTARA_CORPUS_ID?: string; VECTARA_CUSTOMER_ID?: string; diff --git a/libs/langchain-community/src/storage/convex.ts b/libs/langchain-community/src/storage/convex.ts new file mode 100644 index 000000000000..adbddf26c33c --- /dev/null +++ b/libs/langchain-community/src/storage/convex.ts @@ -0,0 +1,224 @@ +// eslint-disable-next-line import/no-extraneous-dependencies +import { + FieldPaths, + FunctionReference, + GenericActionCtx, + GenericDataModel, + NamedTableInfo, + TableNamesInDataModel, + VectorIndexNames, + makeFunctionReference, +} from "convex/server"; +// eslint-disable-next-line import/no-extraneous-dependencies +import { Value } from "convex/values"; +import { BaseStore } from "@langchain/core/stores"; + +/** + * Type that defines the config required to initialize the + * ConvexKVStore class. It includes the table name, + * index name, field name. + */ +export type ConvexKVStoreConfig< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + KeyFieldName extends FieldPaths>, + ValueFieldName extends FieldPaths>, + UpsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + LookupQuery extends FunctionReference< + "query", + "internal", + { table: string; index: string; keyField: string; key: string }, + object[] + >, + DeleteManyMutation extends FunctionReference< + "mutation", + "internal", + { table: string; index: string; keyField: string; key: string } + > +> = { + readonly ctx: GenericActionCtx; + /** + * Defaults to "cache" + */ + readonly table?: TableName; + /** + * Defaults to "byKey" + */ + readonly index?: IndexName; + /** + * Defaults to "key" + */ + readonly keyField?: KeyFieldName; + /** + * Defaults to "value" + */ + readonly valueField?: ValueFieldName; + /** + * Defaults to `internal.langchain.db.upsert` + */ + readonly upsert?: UpsertMutation; + /** + * Defaults to `internal.langchain.db.lookup` + */ + readonly lookup?: LookupQuery; + /** + * Defaults to `internal.langchain.db.deleteMany` + */ + readonly deleteMany?: DeleteManyMutation; +}; + +/** + * Class that extends the BaseStore class to interact with a Convex + * database. It provides methods for getting, setting, and deleting key value pairs, + * as well as yielding keys from the database. + */ +export class ConvexKVStore< + T extends Value, + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel, + IndexName extends VectorIndexNames>, + KeyFieldName extends FieldPaths>, + ValueFieldName extends FieldPaths>, + UpsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + >, + LookupQuery extends FunctionReference< + "query", + "internal", + { table: string; index: string; keyField: string; key: string }, + object[] + >, + DeleteManyMutation extends FunctionReference< + "mutation", + "internal", + { table: string; index: string; keyField: string; key: string } + > +> extends BaseStore { + lc_namespace = ["langchain", "storage", "convex"]; + + private readonly ctx: GenericActionCtx; + + private readonly table: TableName; + + private readonly index: IndexName; + + private readonly keyField: KeyFieldName; + + private readonly valueField: ValueFieldName; + + private readonly upsert: UpsertMutation; + + private readonly lookup: LookupQuery; + + private readonly deleteMany: DeleteManyMutation; + + constructor( + config: ConvexKVStoreConfig< + DataModel, + TableName, + IndexName, + KeyFieldName, + ValueFieldName, + UpsertMutation, + LookupQuery, + DeleteManyMutation + > + ) { + super(config); + this.ctx = config.ctx; + this.table = config.table ?? ("cache" as TableName); + this.index = config.index ?? ("byKey" as IndexName); + this.keyField = config.keyField ?? ("key" as KeyFieldName); + this.valueField = config.valueField ?? ("value" as ValueFieldName); + this.upsert = + // eslint-disable-next-line @typescript-eslint/no-explicit-any + config.upsert ?? (makeFunctionReference("langchain/db:upsert") as any); + this.lookup = + // eslint-disable-next-line @typescript-eslint/no-explicit-any + config.lookup ?? (makeFunctionReference("langchain/db:lookup") as any); + this.deleteMany = + config.deleteMany ?? + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (makeFunctionReference("langchain/db:deleteMany") as any); + } + + /** + * Gets multiple keys from the Convex database. + * @param keys Array of keys to be retrieved. + * @returns An array of retrieved values. + */ + async mget(keys: string[]) { + return (await Promise.all( + keys.map(async (key) => { + const found = (await this.ctx.runQuery(this.lookup, { + table: this.table, + index: this.index, + keyField: this.keyField, + key, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any)) as any; + return found.length > 0 ? found[0][this.valueField] : undefined; + }) + )) as (T | undefined)[]; + } + + /** + * Sets multiple keys in the Convex database. + * @param keyValuePairs Array of key-value pairs to be set. + * @returns Promise that resolves when all keys have been set. + */ + async mset(keyValuePairs: [string, T][]): Promise { + // TODO: Remove chunking when Convex handles the concurrent requests correctly + const PAGE_SIZE = 16; + for (let i = 0; i < keyValuePairs.length; i += PAGE_SIZE) { + await Promise.all( + keyValuePairs.slice(i, i + PAGE_SIZE).map(([key, value]) => + this.ctx.runMutation(this.upsert, { + table: this.table, + index: this.index, + keyField: this.keyField, + key, + document: { [this.keyField]: key, [this.valueField]: value }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any) + ) + ); + } + } + + /** + * Deletes multiple keys from the Convex database. + * @param keys Array of keys to be deleted. + * @returns Promise that resolves when all keys have been deleted. + */ + async mdelete(keys: string[]): Promise { + await Promise.all( + keys.map((key) => + this.ctx.runMutation(this.deleteMany, { + table: this.table, + index: this.index, + keyField: this.keyField, + key, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any) + ) + ); + } + + /** + * Yields keys from the Convex database. + * @param prefix Optional prefix to filter the keys. + * @returns An AsyncGenerator that yields keys from the Convex database. + */ + // eslint-disable-next-line require-yield + async *yieldKeys(_prefix?: string): AsyncGenerator { + throw new Error("yieldKeys not implemented yet for ConvexKVStore"); + } +} diff --git a/libs/langchain-community/src/storage/ioredis.ts b/libs/langchain-community/src/storage/ioredis.ts new file mode 100644 index 000000000000..89deb527d571 --- /dev/null +++ b/libs/langchain-community/src/storage/ioredis.ts @@ -0,0 +1,159 @@ +import type { Redis } from "ioredis"; + +import { BaseStore } from "@langchain/core/stores"; + +/** + * Class that extends the BaseStore class to interact with a Redis + * database. It provides methods for getting, setting, and deleting data, + * as well as yielding keys from the database. + * @example + * ```typescript + * const store = new RedisByteStore({ client: new Redis({}) }); + * await store.mset([ + * [ + * "message:id:0", + * new TextEncoder().encode(JSON.stringify(new AIMessage("ai stuff..."))), + * ], + * [ + * "message:id:1", + * new TextEncoder().encode( + * JSON.stringify(new HumanMessage("human stuff...")), + * ), + * ], + * ]); + * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); + * console.log(retrievedMessages.map((v) => new TextDecoder().decode(v))); + * const yieldedKeys = []; + * for await (const key of store.yieldKeys("message:id:")) { + * yieldedKeys.push(key); + * } + * console.log(yieldedKeys); + * await store.mdelete(yieldedKeys); + * ``` + */ +export class RedisByteStore extends BaseStore { + lc_namespace = ["langchain", "storage"]; + + protected client: Redis; + + protected ttl?: number; + + protected namespace?: string; + + protected yieldKeysScanBatchSize = 1000; + + constructor(fields: { + client: Redis; + ttl?: number; + namespace?: string; + yieldKeysScanBatchSize?: number; + }) { + super(fields); + this.client = fields.client; + this.ttl = fields.ttl; + this.namespace = fields.namespace; + this.yieldKeysScanBatchSize = + fields.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; + } + + _getPrefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return `${this.namespace}${delimiter}${key}`; + } + return key; + } + + _getDeprefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return key.slice(this.namespace.length + delimiter.length); + } + return key; + } + + /** + * Gets multiple keys from the Redis database. + * @param keys Array of keys to be retrieved. + * @returns An array of retrieved values. + */ + async mget(keys: string[]) { + const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); + const retrievedValues = await this.client.mgetBuffer(prefixedKeys); + return retrievedValues.map((value) => { + if (!value) { + return undefined; + } else { + return value; + } + }); + } + + /** + * Sets multiple keys in the Redis database. + * @param keyValuePairs Array of key-value pairs to be set. + * @returns Promise that resolves when all keys have been set. + */ + async mset(keyValuePairs: [string, Uint8Array][]): Promise { + const decoder = new TextDecoder(); + const encodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ + this._getPrefixedKey(key), + decoder.decode(value), + ]); + const pipeline = this.client.pipeline(); + for (const [key, value] of encodedKeyValuePairs) { + if (this.ttl) { + pipeline.set(key, value, "EX", this.ttl); + } else { + pipeline.set(key, value); + } + } + await pipeline.exec(); + } + + /** + * Deletes multiple keys from the Redis database. + * @param keys Array of keys to be deleted. + * @returns Promise that resolves when all keys have been deleted. + */ + async mdelete(keys: string[]): Promise { + await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); + } + + /** + * Yields keys from the Redis database. + * @param prefix Optional prefix to filter the keys. + * @returns An AsyncGenerator that yields keys from the Redis database. + */ + async *yieldKeys(prefix?: string): AsyncGenerator { + let pattern; + if (prefix) { + const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; + pattern = this._getPrefixedKey(wildcardPrefix); + } else { + pattern = this._getPrefixedKey("*"); + } + let [cursor, batch] = await this.client.scan( + 0, + "MATCH", + pattern, + "COUNT", + this.yieldKeysScanBatchSize + ); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + while (cursor !== "0") { + [cursor, batch] = await this.client.scan( + cursor, + "MATCH", + pattern, + "COUNT", + this.yieldKeysScanBatchSize + ); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + } + } +} diff --git a/libs/langchain-community/src/storage/upstash_redis.ts b/libs/langchain-community/src/storage/upstash_redis.ts new file mode 100644 index 000000000000..489ec4fd0261 --- /dev/null +++ b/libs/langchain-community/src/storage/upstash_redis.ts @@ -0,0 +1,176 @@ +import { Redis as UpstashRedis, type RedisConfigNodejs } from "@upstash/redis"; + +import { BaseStore } from "@langchain/core/stores"; + +/** + * Type definition for the input parameters required to initialize an + * instance of the UpstashStoreInput class. + */ +export interface UpstashRedisStoreInput { + sessionTTL?: number; + config?: RedisConfigNodejs; + client?: UpstashRedis; + /** + * The amount of keys to retrieve per batch when yielding keys. + * @default 1000 + */ + yieldKeysScanBatchSize?: number; + /** + * The namespace to use for the keys in the database. + */ + namespace?: string; +} + +/** + * Class that extends the BaseStore class to interact with an Upstash Redis + * database. It provides methods for getting, setting, and deleting data, + * as well as yielding keys from the database. + * @example + * ```typescript + * const store = new UpstashRedisStore({ + * client: new Redis({ + * url: "your-upstash-redis-url", + * token: "your-upstash-redis-token", + * }), + * }); + * await store.mset([ + * ["message:id:0", "encoded-ai-message"], + * ["message:id:1", "encoded-human-message"], + * ]); + * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); + * const yieldedKeys = []; + * for await (const key of store.yieldKeys("message:id")) { + * yieldedKeys.push(key); + * } + * await store.mdelete(yieldedKeys); + * ``` + */ +export class UpstashRedisStore extends BaseStore { + lc_namespace = ["langchain", "storage"]; + + protected client: UpstashRedis; + + protected namespace?: string; + + protected yieldKeysScanBatchSize = 1000; + + private sessionTTL?: number; + + constructor(fields: UpstashRedisStoreInput) { + super(fields); + if (fields.client) { + this.client = fields.client; + } else if (fields.config) { + this.client = new UpstashRedis(fields.config); + } else { + throw new Error( + `Upstash Redis store requires either a config object or a pre-configured client.` + ); + } + this.sessionTTL = fields.sessionTTL; + this.yieldKeysScanBatchSize = + fields.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; + this.namespace = fields.namespace; + } + + _getPrefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return `${this.namespace}${delimiter}${key}`; + } + return key; + } + + _getDeprefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return key.slice(this.namespace.length + delimiter.length); + } + return key; + } + + /** + * Gets multiple keys from the Upstash Redis database. + * @param keys Array of keys to be retrieved. + * @returns An array of retrieved values. + */ + async mget(keys: string[]) { + const encoder = new TextEncoder(); + + const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); + const retrievedValues = await this.client.mget( + ...prefixedKeys + ); + return retrievedValues.map((value) => { + if (!value) { + return undefined; + } else if (typeof value === "object") { + return encoder.encode(JSON.stringify(value)); + } else { + return encoder.encode(value); + } + }); + } + + /** + * Sets multiple keys in the Upstash Redis database. + * @param keyValuePairs Array of key-value pairs to be set. + * @returns Promise that resolves when all keys have been set. + */ + async mset(keyValuePairs: [string, Uint8Array][]): Promise { + const decoder = new TextDecoder(); + const encodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ + this._getPrefixedKey(key), + decoder.decode(value), + ]); + const pipeline = this.client.pipeline(); + for (const [key, value] of encodedKeyValuePairs) { + if (this.sessionTTL) { + pipeline.setex(key, this.sessionTTL, value); + } else { + pipeline.set(key, value); + } + } + await pipeline.exec(); + } + + /** + * Deletes multiple keys from the Upstash Redis database. + * @param keys Array of keys to be deleted. + * @returns Promise that resolves when all keys have been deleted. + */ + async mdelete(keys: string[]): Promise { + await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); + } + + /** + * Yields keys from the Upstash Redis database. + * @param prefix Optional prefix to filter the keys. A wildcard (*) is always appended to the end. + * @returns An AsyncGenerator that yields keys from the Upstash Redis database. + */ + async *yieldKeys(prefix?: string): AsyncGenerator { + let pattern; + if (prefix) { + const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; + pattern = `${this._getPrefixedKey(wildcardPrefix)}*`; + } else { + pattern = this._getPrefixedKey("*"); + } + let [cursor, batch] = await this.client.scan(0, { + match: pattern, + count: this.yieldKeysScanBatchSize, + }); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + while (cursor !== 0) { + [cursor, batch] = await this.client.scan(cursor, { + match: pattern, + count: this.yieldKeysScanBatchSize, + }); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + } + } +} diff --git a/libs/langchain-community/src/storage/vercel_kv.ts b/libs/langchain-community/src/storage/vercel_kv.ts new file mode 100644 index 000000000000..8f520a0d8543 --- /dev/null +++ b/libs/langchain-community/src/storage/vercel_kv.ts @@ -0,0 +1,150 @@ +import { kv, type VercelKV } from "@vercel/kv"; + +import { BaseStore } from "@langchain/core/stores"; + +/** + * Class that extends the BaseStore class to interact with a Vercel KV + * database. It provides methods for getting, setting, and deleting data, + * as well as yielding keys from the database. + * @example + * ```typescript + * const store = new VercelKVStore({ + * client: getClient(), + * }); + * await store.mset([ + * { key: "message:id:0", value: "encoded message 0" }, + * { key: "message:id:1", value: "encoded message 1" }, + * ]); + * const retrievedMessages = await store.mget(["message:id:0", "message:id:1"]); + * const yieldedKeys = []; + * for await (const key of store.yieldKeys("message:id:")) { + * yieldedKeys.push(key); + * } + * await store.mdelete(yieldedKeys); + * ``` + */ +export class VercelKVStore extends BaseStore { + lc_namespace = ["langchain", "storage"]; + + protected client: VercelKV; + + protected ttl?: number; + + protected namespace?: string; + + protected yieldKeysScanBatchSize = 1000; + + constructor(fields?: { + client?: VercelKV; + ttl?: number; + namespace?: string; + yieldKeysScanBatchSize?: number; + }) { + super(fields); + this.client = fields?.client ?? kv; + this.ttl = fields?.ttl; + this.namespace = fields?.namespace; + this.yieldKeysScanBatchSize = + fields?.yieldKeysScanBatchSize ?? this.yieldKeysScanBatchSize; + } + + _getPrefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return `${this.namespace}${delimiter}${key}`; + } + return key; + } + + _getDeprefixedKey(key: string) { + if (this.namespace) { + const delimiter = "/"; + return key.slice(this.namespace.length + delimiter.length); + } + return key; + } + + /** + * Gets multiple keys from the Redis database. + * @param keys Array of keys to be retrieved. + * @returns An array of retrieved values. + */ + async mget(keys: string[]) { + const prefixedKeys = keys.map(this._getPrefixedKey.bind(this)); + const retrievedValues = await this.client.mget<(string | undefined)[]>( + ...prefixedKeys + ); + const encoder = new TextEncoder(); + return retrievedValues.map((value) => { + if (value === undefined || value === null) { + return undefined; + } else if (typeof value === "object") { + return encoder.encode(JSON.stringify(value)); + } else { + return encoder.encode(value); + } + }); + } + + /** + * Sets multiple keys in the Redis database. + * @param keyValuePairs Array of key-value pairs to be set. + * @returns Promise that resolves when all keys have been set. + */ + async mset(keyValuePairs: [string, Uint8Array][]): Promise { + const decoder = new TextDecoder(); + const decodedKeyValuePairs = keyValuePairs.map(([key, value]) => [ + this._getPrefixedKey(key), + decoder.decode(value), + ]); + const pipeline = this.client.pipeline(); + for (const [key, value] of decodedKeyValuePairs) { + if (this.ttl) { + pipeline.setex(key, this.ttl, value); + } else { + pipeline.set(key, value); + } + } + await pipeline.exec(); + } + + /** + * Deletes multiple keys from the Redis database. + * @param keys Array of keys to be deleted. + * @returns Promise that resolves when all keys have been deleted. + */ + async mdelete(keys: string[]): Promise { + await this.client.del(...keys.map(this._getPrefixedKey.bind(this))); + } + + /** + * Yields keys from the Redis database. + * @param prefix Optional prefix to filter the keys. + * @returns An AsyncGenerator that yields keys from the Redis database. + */ + async *yieldKeys(prefix?: string): AsyncGenerator { + let pattern; + if (prefix) { + const wildcardPrefix = prefix.endsWith("*") ? prefix : `${prefix}*`; + pattern = this._getPrefixedKey(wildcardPrefix); + } else { + pattern = this._getPrefixedKey("*"); + } + let [cursor, batch] = await this.client.scan(0, { + match: pattern, + count: this.yieldKeysScanBatchSize, + }); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + while (cursor !== 0) { + [cursor, batch] = await this.client.scan(cursor, { + match: pattern, + count: this.yieldKeysScanBatchSize, + }); + for (const key of batch) { + yield this._getDeprefixedKey(key); + } + } + } +} diff --git a/libs/langchain-community/src/stores/message/cassandra.ts b/libs/langchain-community/src/stores/message/cassandra.ts new file mode 100644 index 000000000000..32dab99e166e --- /dev/null +++ b/libs/langchain-community/src/stores/message/cassandra.ts @@ -0,0 +1,150 @@ +import { Client, DseClientOptions } from "cassandra-driver"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +export interface CassandraChatMessageHistoryOptions extends DseClientOptions { + keyspace: string; + table: string; + sessionId: string; +} + +/** + * Class for storing chat message history within Cassandra. It extends the + * BaseListChatMessageHistory class and provides methods to get, add, and + * clear messages. + * @example + * ```typescript + * const chatHistory = new CassandraChatMessageHistory({ + * cloud: { + * secureConnectBundle: "", + * }, + * credentials: { + * username: "token", + * password: "", + * }, + * keyspace: "langchain", + * table: "message_history", + * sessionId: "", + * }); + * + * const chain = new ConversationChain({ + * llm: new ChatOpenAI(), + * memory: chatHistory, + * }); + * + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class CassandraChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "cassandra"]; + + private keyspace: string; + + private table: string; + + private client: Client; + + private sessionId: string; + + private tableExists: boolean; + + private options: CassandraChatMessageHistoryOptions; + + private queries: { insert: string; select: string; delete: string }; + + constructor(options: CassandraChatMessageHistoryOptions) { + super(); + this.client = new Client(options); + this.keyspace = options.keyspace; + this.table = options.table; + this.sessionId = options.sessionId; + this.tableExists = false; + this.options = options; + } + + /** + * Method to get all the messages stored in the Cassandra database. + * @returns Array of stored BaseMessage instances. + */ + public async getMessages(): Promise { + await this.ensureTable(); + const resultSet = await this.client.execute( + this.queries.select, + [this.sessionId], + { prepare: true } + ); + const storedMessages: StoredMessage[] = resultSet.rows.map((row) => ({ + type: row.message_type, + data: JSON.parse(row.data), + })); + + const baseMessages = mapStoredMessagesToChatMessages(storedMessages); + return baseMessages; + } + + /** + * Method to add a new message to the Cassandra database. + * @param message The BaseMessage instance to add. + * @returns A promise that resolves when the message has been added. + */ + public async addMessage(message: BaseMessage): Promise { + await this.ensureTable(); + const messages = mapChatMessagesToStoredMessages([message]); + const { type, data } = messages[0]; + return this.client + .execute( + this.queries.insert, + [this.sessionId, type, JSON.stringify(data)], + { prepare: true, ...this.options } + ) + .then(() => {}); + } + + /** + * Method to clear all the messages from the Cassandra database. + * @returns A promise that resolves when all messages have been cleared. + */ + public async clear(): Promise { + await this.ensureTable(); + return this.client + .execute(this.queries.delete, [this.sessionId], { + prepare: true, + ...this.options, + }) + .then(() => {}); + } + + /** + * Method to initialize the Cassandra database. + * @returns Promise that resolves when the database has been initialized. + */ + private async ensureTable(): Promise { + if (this.tableExists) { + return; + } + + await this.client.execute(` + CREATE TABLE IF NOT EXISTS ${this.keyspace}.${this.table} ( + session_id text, + message_ts timestamp, + message_type text, + data text, + PRIMARY KEY ((session_id), message_ts) + ); + `); + + this.queries = { + insert: `INSERT INTO ${this.keyspace}.${this.table} (session_id, message_ts, message_type, data) VALUES (?, toTimestamp(now()), ?, ?);`, + select: `SELECT message_type, data FROM ${this.keyspace}.${this.table} WHERE session_id = ?;`, + delete: `DELETE FROM ${this.keyspace}.${this.table} WHERE session_id = ?;`, + }; + + this.tableExists = true; + } +} diff --git a/libs/langchain-community/src/stores/message/cloudflare_d1.ts b/libs/langchain-community/src/stores/message/cloudflare_d1.ts new file mode 100644 index 000000000000..654c90f594b0 --- /dev/null +++ b/libs/langchain-community/src/stores/message/cloudflare_d1.ts @@ -0,0 +1,190 @@ +import { v4 } from "uuid"; +import type { D1Database } from "@cloudflare/workers-types"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; +/** + * Type definition for the input parameters required when instantiating a + * CloudflareD1MessageHistory object. + */ +export type CloudflareD1MessageHistoryInput = { + tableName?: string; + sessionId: string; + database?: D1Database; +}; + +/** + * Interface for the data transfer object used when selecting stored + * messages from the Cloudflare D1 database. + */ +interface selectStoredMessagesDTO { + id: string; + session_id: string; + type: string; + content: string; + role: string | null; + name: string | null; + additional_kwargs: string; +} + +/** + * Class for storing and retrieving chat message history from a + * Cloudflare D1 database. Extends the BaseListChatMessageHistory class. + * @example + * ```typescript + * const memory = new BufferMemory({ + * returnMessages: true, + * chatHistory: new CloudflareD1MessageHistory({ + * tableName: "stored_message", + * sessionId: "example", + * database: env.DB, + * }), + * }); + * + * const chainInput = { input }; + * + * const res = await memory.chatHistory.invoke(chainInput); + * await memory.saveContext(chainInput, { + * output: res, + * }); + * ``` + */ +export class CloudflareD1MessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "cloudflare_d1"]; + + public database: D1Database; + + private tableName: string; + + private sessionId: string; + + private tableInitialized: boolean; + + constructor(fields: CloudflareD1MessageHistoryInput) { + super(fields); + + const { sessionId, database, tableName } = fields; + + if (database) { + this.database = database; + } else { + throw new Error( + "Either a client or config must be provided to CloudflareD1MessageHistory" + ); + } + + this.tableName = tableName || "langchain_chat_histories"; + this.tableInitialized = false; + this.sessionId = sessionId; + } + + /** + * Private method to ensure that the necessary table exists in the + * Cloudflare D1 database before performing any operations. If the table + * does not exist, it is created. + * @returns Promise that resolves to void. + */ + private async ensureTable(): Promise { + if (this.tableInitialized) { + return; + } + + const query = `CREATE TABLE IF NOT EXISTS ${this.tableName} (id TEXT PRIMARY KEY, session_id TEXT, type TEXT, content TEXT, role TEXT, name TEXT, additional_kwargs TEXT);`; + await this.database.prepare(query).bind().all(); + + const idIndexQuery = `CREATE INDEX IF NOT EXISTS id_index ON ${this.tableName} (id);`; + await this.database.prepare(idIndexQuery).bind().all(); + + const sessionIdIndexQuery = `CREATE INDEX IF NOT EXISTS session_id_index ON ${this.tableName} (session_id);`; + await this.database.prepare(sessionIdIndexQuery).bind().all(); + + this.tableInitialized = true; + } + + /** + * Method to retrieve all messages from the Cloudflare D1 database for the + * current session. + * @returns Promise that resolves to an array of BaseMessage objects. + */ + async getMessages(): Promise { + await this.ensureTable(); + + const query = `SELECT * FROM ${this.tableName} WHERE session_id = ?`; + const rawStoredMessages = await this.database + .prepare(query) + .bind(this.sessionId) + .all(); + const storedMessagesObject = + rawStoredMessages.results as unknown as selectStoredMessagesDTO[]; + + const orderedMessages: StoredMessage[] = storedMessagesObject.map( + (message) => { + const data = { + content: message.content, + additional_kwargs: JSON.parse(message.additional_kwargs), + } as StoredMessageData; + + if (message.role) { + data.role = message.role; + } + + if (message.name) { + data.name = message.name; + } + + return { + type: message.type, + data, + }; + } + ); + + return mapStoredMessagesToChatMessages(orderedMessages); + } + + /** + * Method to add a new message to the Cloudflare D1 database for the current + * session. + * @param message The BaseMessage object to be added to the database. + * @returns Promise that resolves to void. + */ + async addMessage(message: BaseMessage): Promise { + await this.ensureTable(); + + const messageToAdd = mapChatMessagesToStoredMessages([message]); + + const query = `INSERT INTO ${this.tableName} (id, session_id, type, content, role, name, additional_kwargs) VALUES(?, ?, ?, ?, ?, ?, ?)`; + + const id = v4(); + + await this.database + .prepare(query) + .bind( + id, + this.sessionId, + messageToAdd[0].type || null, + messageToAdd[0].data.content || null, + messageToAdd[0].data.role || null, + messageToAdd[0].data.name || null, + JSON.stringify(messageToAdd[0].data.additional_kwargs) + ) + .all(); + } + + /** + * Method to delete all messages from the Cloudflare D1 database for the + * current session. + * @returns Promise that resolves to void. + */ + async clear(): Promise { + await this.ensureTable(); + + const query = `DELETE FROM ? WHERE session_id = ? `; + await this.database + .prepare(query) + .bind(this.tableName, this.sessionId) + .all(); + } +} diff --git a/libs/langchain-community/src/stores/message/convex.ts b/libs/langchain-community/src/stores/message/convex.ts new file mode 100644 index 000000000000..f0433c2a69a7 --- /dev/null +++ b/libs/langchain-community/src/stores/message/convex.ts @@ -0,0 +1,209 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ + +// eslint-disable-next-line import/no-extraneous-dependencies +import { + DocumentByInfo, + DocumentByName, + FieldPaths, + FunctionReference, + GenericActionCtx, + GenericDataModel, + NamedTableInfo, + TableNamesInDataModel, + IndexNames, + makeFunctionReference, +} from "convex/server"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Type that defines the config required to initialize the + * ConvexChatMessageHistory class. At minimum it needs a sessionId + * and an ActionCtx. + */ +export type ConvexChatMessageHistoryInput< + DataModel extends GenericDataModel, + TableName extends TableNamesInDataModel = "messages", + IndexName extends IndexNames< + NamedTableInfo + > = "bySessionId", + SessionIdFieldName extends FieldPaths< + NamedTableInfo + > = "sessionId", + MessageTextFieldName extends FieldPaths< + NamedTableInfo + > = "message", + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + > = any, + LookupQuery extends FunctionReference< + "query", + "internal", + { table: string; index: string; keyField: string; key: string }, + object[] + > = any, + DeleteManyMutation extends FunctionReference< + "mutation", + "internal", + { table: string; index: string; keyField: string; key: string } + > = any +> = { + readonly ctx: GenericActionCtx; + readonly sessionId: DocumentByName[SessionIdFieldName]; + /** + * Defaults to "messages" + */ + readonly table?: TableName; + /** + * Defaults to "bySessionId" + */ + readonly index?: IndexName; + /** + * Defaults to "sessionId" + */ + readonly sessionIdField?: SessionIdFieldName; + /** + * Defaults to "message" + */ + readonly messageTextFieldName?: MessageTextFieldName; + /** + * Defaults to `internal.langchain.db.insert` + */ + readonly insert?: InsertMutation; + /** + * Defaults to `internal.langchain.db.lookup` + */ + readonly lookup?: LookupQuery; + /** + * Defaults to `internal.langchain.db.deleteMany` + */ + readonly deleteMany?: DeleteManyMutation; +}; + +export class ConvexChatMessageHistory< + DataModel extends GenericDataModel, + SessionIdFieldName extends FieldPaths< + NamedTableInfo + > = "sessionId", + TableName extends TableNamesInDataModel = "messages", + IndexName extends IndexNames< + NamedTableInfo + > = "bySessionId", + MessageTextFieldName extends FieldPaths< + NamedTableInfo + > = "message", + InsertMutation extends FunctionReference< + "mutation", + "internal", + { table: string; document: object } + > = any, + LookupQuery extends FunctionReference< + "query", + "internal", + { table: string; index: string; keyField: string; key: string }, + object[] + > = any, + DeleteManyMutation extends FunctionReference< + "mutation", + "internal", + { table: string; index: string; keyField: string; key: string } + > = any +> extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "convex"]; + + private readonly ctx: GenericActionCtx; + + private readonly sessionId: DocumentByInfo< + NamedTableInfo + >[SessionIdFieldName]; + + private readonly table: TableName; + + private readonly index: IndexName; + + private readonly sessionIdField: SessionIdFieldName; + + private readonly messageTextFieldName: MessageTextFieldName; + + private readonly insert: InsertMutation; + + private readonly lookup: LookupQuery; + + private readonly deleteMany: DeleteManyMutation; + + constructor( + config: ConvexChatMessageHistoryInput< + DataModel, + TableName, + IndexName, + SessionIdFieldName, + MessageTextFieldName, + InsertMutation, + LookupQuery, + DeleteManyMutation + > + ) { + super(); + this.ctx = config.ctx; + this.sessionId = config.sessionId; + this.table = config.table ?? ("messages" as TableName); + this.index = config.index ?? ("bySessionId" as IndexName); + this.sessionIdField = + config.sessionIdField ?? ("sessionId" as SessionIdFieldName); + this.messageTextFieldName = + config.messageTextFieldName ?? ("message" as MessageTextFieldName); + this.insert = + config.insert ?? (makeFunctionReference("langchain/db:insert") as any); + this.lookup = + config.lookup ?? (makeFunctionReference("langchain/db:lookup") as any); + this.deleteMany = + config.deleteMany ?? + (makeFunctionReference("langchain/db:deleteMany") as any); + } + + async getMessages(): Promise { + const convexDocuments: any[] = await this.ctx.runQuery(this.lookup, { + table: this.table, + index: this.index, + keyField: this.sessionIdField, + key: this.sessionId, + } as any); + + return mapStoredMessagesToChatMessages( + convexDocuments.map((doc) => doc[this.messageTextFieldName]) + ); + } + + async addMessage(message: BaseMessage): Promise { + const messages = mapChatMessagesToStoredMessages([message]); + // TODO: Remove chunking when Convex handles the concurrent requests correctly + const PAGE_SIZE = 16; + for (let i = 0; i < messages.length; i += PAGE_SIZE) { + await Promise.all( + messages.slice(i, i + PAGE_SIZE).map((message) => + this.ctx.runMutation(this.insert, { + table: this.table, + document: { + [this.sessionIdField]: this.sessionId, + [this.messageTextFieldName]: message, + }, + } as any) + ) + ); + } + } + + async clear(): Promise { + await this.ctx.runMutation(this.deleteMany, { + table: this.table, + index: this.index, + keyField: this.sessionIdField, + key: this.sessionId, + } as any); + } +} diff --git a/libs/langchain-community/src/stores/message/dynamodb.ts b/libs/langchain-community/src/stores/message/dynamodb.ts new file mode 100644 index 000000000000..b9b257bf8f75 --- /dev/null +++ b/libs/langchain-community/src/stores/message/dynamodb.ts @@ -0,0 +1,194 @@ +import { + DynamoDBClient, + DynamoDBClientConfig, + GetItemCommand, + GetItemCommandInput, + UpdateItemCommand, + UpdateItemCommandInput, + DeleteItemCommand, + DeleteItemCommandInput, + AttributeValue, +} from "@aws-sdk/client-dynamodb"; + +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Interface defining the fields required to create an instance of + * `DynamoDBChatMessageHistory`. It includes the DynamoDB table name, + * session ID, partition key, sort key, message attribute name, and + * DynamoDB client configuration. + */ +export interface DynamoDBChatMessageHistoryFields { + tableName: string; + sessionId: string; + partitionKey?: string; + sortKey?: string; + messageAttributeName?: string; + config?: DynamoDBClientConfig; + key?: Record; +} + +/** + * Interface defining the structure of a chat message as it is stored in + * DynamoDB. + */ +interface DynamoDBSerializedChatMessage { + M: { + type: { + S: string; + }; + text: { + S: string; + }; + role?: { + S: string; + }; + }; +} + +/** + * Class providing methods to interact with a DynamoDB table to store and + * retrieve chat messages. It extends the `BaseListChatMessageHistory` + * class. + */ +export class DynamoDBChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "dynamodb"]; + + get lc_secrets(): { [key: string]: string } | undefined { + return { + "config.credentials.accessKeyId": "AWS_ACCESS_KEY_ID", + "config.credentials.secretAccessKey": "AWS_SECRETE_ACCESS_KEY", + "config.credentials.sessionToken": "AWS_SESSION_TOKEN", + }; + } + + private tableName: string; + + private sessionId: string; + + private client: DynamoDBClient; + + private partitionKey = "id"; + + private sortKey?: string; + + private messageAttributeName = "messages"; + + private dynamoKey: Record = {}; + + constructor({ + tableName, + sessionId, + partitionKey, + sortKey, + messageAttributeName, + config, + key = {}, + }: DynamoDBChatMessageHistoryFields) { + super(); + + this.tableName = tableName; + this.sessionId = sessionId; + this.client = new DynamoDBClient(config ?? {}); + this.partitionKey = partitionKey ?? this.partitionKey; + this.sortKey = sortKey; + this.messageAttributeName = + messageAttributeName ?? this.messageAttributeName; + this.dynamoKey = key; + + // override dynamoKey with partition key and sort key when key not specified + if (Object.keys(this.dynamoKey).length === 0) { + this.dynamoKey[this.partitionKey] = { S: this.sessionId }; + if (this.sortKey) { + this.dynamoKey[this.sortKey] = { S: this.sortKey }; + } + } + } + + /** + * Retrieves all messages from the DynamoDB table and returns them as an + * array of `BaseMessage` instances. + * @returns Array of stored messages + */ + async getMessages(): Promise { + const params: GetItemCommandInput = { + TableName: this.tableName, + Key: this.dynamoKey, + }; + + const response = await this.client.send(new GetItemCommand(params)); + const items = response.Item + ? response.Item[this.messageAttributeName]?.L ?? [] + : []; + const messages = items + .map((item) => ({ + type: item.M?.type.S, + data: { + role: item.M?.role?.S, + content: item.M?.text.S, + }, + })) + .filter( + (x): x is StoredMessage => + x.type !== undefined && x.data.content !== undefined + ); + return mapStoredMessagesToChatMessages(messages); + } + + /** + * Deletes all messages from the DynamoDB table. + */ + async clear(): Promise { + const params: DeleteItemCommandInput = { + TableName: this.tableName, + Key: this.dynamoKey, + }; + await this.client.send(new DeleteItemCommand(params)); + } + + /** + * Adds a new message to the DynamoDB table. + * @param message The message to be added to the DynamoDB table. + */ + async addMessage(message: BaseMessage) { + const messages = mapChatMessagesToStoredMessages([message]); + + const params: UpdateItemCommandInput = { + TableName: this.tableName, + Key: this.dynamoKey, + ExpressionAttributeNames: { + "#m": this.messageAttributeName, + }, + ExpressionAttributeValues: { + ":empty_list": { + L: [], + }, + ":m": { + L: messages.map((message) => { + const dynamoSerializedMessage: DynamoDBSerializedChatMessage = { + M: { + type: { + S: message.type, + }, + text: { + S: message.data.content, + }, + }, + }; + if (message.data.role) { + dynamoSerializedMessage.M.role = { S: message.data.role }; + } + return dynamoSerializedMessage; + }), + }, + }, + UpdateExpression: + "SET #m = list_append(if_not_exists(#m, :empty_list), :m)", + }; + await this.client.send(new UpdateItemCommand(params)); + } +} diff --git a/libs/langchain-community/src/stores/message/firestore.ts b/libs/langchain-community/src/stores/message/firestore.ts new file mode 100644 index 000000000000..52f4f14eb999 --- /dev/null +++ b/libs/langchain-community/src/stores/message/firestore.ts @@ -0,0 +1,191 @@ +import type { AppOptions } from "firebase-admin"; +import { getApps, initializeApp } from "firebase-admin/app"; +import { + getFirestore, + DocumentData, + Firestore, + DocumentReference, + FieldValue, +} from "firebase-admin/firestore"; + +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Interface for FirestoreDBChatMessageHistory. It includes the collection + * name, session ID, user ID, and optionally, the app index and + * configuration for the Firebase app. + */ +export interface FirestoreDBChatMessageHistory { + collectionName: string; + sessionId: string; + userId: string; + appIdx?: number; + config?: AppOptions; +} +/** + * Class for managing chat message history using Google's Firestore as a + * storage backend. Extends the BaseListChatMessageHistory class. + * @example + * ```typescript + * const chatHistory = new FirestoreChatMessageHistory({ + * collectionName: "langchain", + * sessionId: "lc-example", + * userId: "a@example.com", + * config: { projectId: "your-project-id" }, + * }); + * + * const chain = new ConversationChain({ + * llm: new ChatOpenAI(), + * memory: new BufferMemory({ chatHistory }), + * }); + * + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class FirestoreChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "firestore"]; + + private collectionName: string; + + private sessionId: string; + + private userId: string; + + private appIdx: number; + + private config: AppOptions; + + private firestoreClient: Firestore; + + private document: DocumentReference | null; + + constructor({ + collectionName, + sessionId, + userId, + appIdx = 0, + config, + }: FirestoreDBChatMessageHistory) { + super(); + this.collectionName = collectionName; + this.sessionId = sessionId; + this.userId = userId; + this.document = null; + this.appIdx = appIdx; + if (config) this.config = config; + + try { + this.ensureFirestore(); + } catch (error) { + throw new Error(`Unknown response type`); + } + } + + private ensureFirestore(): void { + let app; + // Check if the app is already initialized else get appIdx + if (!getApps().length) app = initializeApp(this.config); + else app = getApps()[this.appIdx]; + + this.firestoreClient = getFirestore(app); + + this.document = this.firestoreClient + .collection(this.collectionName) + .doc(this.sessionId); + } + + /** + * Method to retrieve all messages from the Firestore collection + * associated with the current session. Returns an array of BaseMessage + * objects. + * @returns Array of stored messages + */ + async getMessages(): Promise { + if (!this.document) { + throw new Error("Document not initialized"); + } + + const querySnapshot = await this.document + .collection("messages") + .orderBy("createdAt", "asc") + .get() + .catch((err) => { + throw new Error(`Unknown response type: ${err.toString()}`); + }); + + const response: StoredMessage[] = []; + querySnapshot.forEach((doc) => { + const { type, data } = doc.data(); + response.push({ type, data }); + }); + + return mapStoredMessagesToChatMessages(response); + } + + /** + * Method to add a new message to the Firestore collection. The message is + * passed as a BaseMessage object. + * @param message The message to be added as a BaseMessage object. + */ + public async addMessage(message: BaseMessage) { + const messages = mapChatMessagesToStoredMessages([message]); + await this.upsertMessage(messages[0]); + } + + private async upsertMessage(message: StoredMessage): Promise { + if (!this.document) { + throw new Error("Document not initialized"); + } + await this.document.set( + { + id: this.sessionId, + user_id: this.userId, + }, + { merge: true } + ); + await this.document + .collection("messages") + .add({ + type: message.type, + data: message.data, + createdBy: this.userId, + createdAt: FieldValue.serverTimestamp(), + }) + .catch((err) => { + throw new Error(`Unknown response type: ${err.toString()}`); + }); + } + + /** + * Method to delete all messages from the Firestore collection associated + * with the current session. + */ + public async clear(): Promise { + if (!this.document) { + throw new Error("Document not initialized"); + } + await this.document + .collection("messages") + .get() + .then((querySnapshot) => { + querySnapshot.docs.forEach((snapshot) => { + snapshot.ref.delete().catch((err) => { + throw new Error(`Unknown response type: ${err.toString()}`); + }); + }); + }) + .catch((err) => { + throw new Error(`Unknown response type: ${err.toString()}`); + }); + await this.document.delete().catch((err) => { + throw new Error(`Unknown response type: ${err.toString()}`); + }); + } +} diff --git a/libs/langchain-community/src/stores/message/ioredis.ts b/libs/langchain-community/src/stores/message/ioredis.ts new file mode 100644 index 000000000000..9b634aa87071 --- /dev/null +++ b/libs/langchain-community/src/stores/message/ioredis.ts @@ -0,0 +1,102 @@ +import { Redis, RedisOptions } from "ioredis"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Type for the input parameter of the RedisChatMessageHistory + * constructor. It includes fields for the session ID, session TTL, Redis + * URL, Redis configuration, and Redis client. + */ +export type RedisChatMessageHistoryInput = { + sessionId: string; + sessionTTL?: number; + url?: string; + config?: RedisOptions; + client?: Redis; +}; + +/** + * Class used to store chat message history in Redis. It provides methods + * to add, retrieve, and clear messages from the chat history. + * @example + * ```typescript + * const chatHistory = new RedisChatMessageHistory({ + * sessionId: new Date().toISOString(), + * sessionTTL: 300, + * url: "redis: + * }); + * + * const chain = new ConversationChain({ + * llm: new ChatOpenAI({ temperature: 0 }), + * memory: { chatHistory }, + * }); + * + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class RedisChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "ioredis"]; + + get lc_secrets() { + return { + url: "REDIS_URL", + "config.username": "REDIS_USERNAME", + "config.password": "REDIS_PASSWORD", + }; + } + + public client: Redis; + + private sessionId: string; + + private sessionTTL?: number; + + constructor(fields: RedisChatMessageHistoryInput) { + super(fields); + + const { sessionId, sessionTTL, url, config, client } = fields; + this.client = (client ?? + (url ? new Redis(url) : new Redis(config ?? {}))) as Redis; + this.sessionId = sessionId; + this.sessionTTL = sessionTTL; + } + + /** + * Retrieves all messages from the chat history. + * @returns Promise that resolves with an array of BaseMessage instances. + */ + async getMessages(): Promise { + const rawStoredMessages = await this.client.lrange(this.sessionId, 0, -1); + const orderedMessages = rawStoredMessages + .reverse() + .map((message) => JSON.parse(message)); + return mapStoredMessagesToChatMessages(orderedMessages); + } + + /** + * Adds a message to the chat history. + * @param message The message to add to the chat history. + * @returns Promise that resolves when the message has been added. + */ + async addMessage(message: BaseMessage): Promise { + const messageToAdd = mapChatMessagesToStoredMessages([message]); + await this.client.lpush(this.sessionId, JSON.stringify(messageToAdd[0])); + if (this.sessionTTL) { + await this.client.expire(this.sessionId, this.sessionTTL); + } + } + + /** + * Clears all messages from the chat history. + * @returns Promise that resolves when the chat history has been cleared. + */ + async clear(): Promise { + await this.client.del(this.sessionId); + } +} diff --git a/libs/langchain-community/src/stores/message/momento.ts b/libs/langchain-community/src/stores/message/momento.ts new file mode 100644 index 000000000000..92473c98cfa9 --- /dev/null +++ b/libs/langchain-community/src/stores/message/momento.ts @@ -0,0 +1,194 @@ +/* eslint-disable no-instanceof/no-instanceof */ +import { + CacheDelete, + CacheListFetch, + CacheListPushBack, + ICacheClient, + InvalidArgumentError, + CollectionTtl, +} from "@gomomento/sdk-core"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; +import { ensureCacheExists } from "../../utils/momento.js"; + +/** + * The settings to instantiate the Momento chat message history. + */ +export interface MomentoChatMessageHistoryProps { + /** + * The session ID to use to store the data. + */ + sessionId: string; + /** + * The Momento cache client. + */ + client: ICacheClient; + /** + * The name of the cache to use to store the data. + */ + cacheName: string; + /** + * The time to live for the cache items in seconds. + * If not specified, the cache client default is used. + */ + sessionTtl?: number; + /** + * If true, ensure that the cache exists before returning. + * If false, the cache is not checked for existence. + * Defaults to true. + */ + ensureCacheExists?: true; +} + +/** + * A class that stores chat message history using Momento Cache. It + * interacts with a Momento cache client to perform operations like + * fetching, adding, and deleting messages. + * @example + * ```typescript + * const chatHistory = await MomentoChatMessageHistory.fromProps({ + * client: new CacheClient({ + * configuration: Configurations.Laptop.v1(), + * credentialProvider: CredentialProvider.fromEnvironmentVariable({ + * environmentVariableName: "MOMENTO_API_KEY", + * }), + * defaultTtlSeconds: 60 * 60 * 24, + * }), + * cacheName: "langchain", + * sessionId: new Date().toISOString(), + * sessionTtl: 300, + * }); + * + * const messages = await chatHistory.getMessages(); + * console.log({ messages }); + * ``` + */ +export class MomentoChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "momento"]; + + private readonly sessionId: string; + + private readonly client: ICacheClient; + + private readonly cacheName: string; + + private readonly sessionTtl: CollectionTtl; + + private constructor(props: MomentoChatMessageHistoryProps) { + super(); + this.sessionId = props.sessionId; + this.client = props.client; + this.cacheName = props.cacheName; + + this.validateTtlSeconds(props.sessionTtl); + this.sessionTtl = + props.sessionTtl !== undefined + ? CollectionTtl.of(props.sessionTtl) + : CollectionTtl.fromCacheTtl(); + } + + /** + * Create a new chat message history backed by Momento. + * + * @param {MomentoCacheProps} props The settings to instantiate the Momento chat message history. + * @param {string} props.sessionId The session ID to use to store the data. + * @param {ICacheClient} props.client The Momento cache client. + * @param {string} props.cacheName The name of the cache to use to store the data. + * @param {number} props.sessionTtl The time to live for the cache items in seconds. + * If not specified, the cache client default is used. + * @param {boolean} props.ensureCacheExists If true, ensure that the cache exists before returning. + * If false, the cache is not checked for existence. + * @throws {InvalidArgumentError} If {@link props.sessionTtl} is not strictly positive. + * @returns A new chat message history backed by Momento. + */ + public static async fromProps( + props: MomentoChatMessageHistoryProps + ): Promise { + const instance = new MomentoChatMessageHistory(props); + if (props.ensureCacheExists || props.ensureCacheExists === undefined) { + await ensureCacheExists(props.client, props.cacheName); + } + return instance; + } + + /** + * Validate the user-specified TTL, if provided, is strictly positive. + * @param ttlSeconds The TTL to validate. + */ + private validateTtlSeconds(ttlSeconds?: number): void { + if (ttlSeconds !== undefined && ttlSeconds <= 0) { + throw new InvalidArgumentError("ttlSeconds must be positive."); + } + } + + /** + * Fetches messages from the cache. + * @returns A Promise that resolves to an array of BaseMessage instances. + */ + public async getMessages(): Promise { + const fetchResponse = await this.client.listFetch( + this.cacheName, + this.sessionId + ); + + let messages: StoredMessage[] = []; + if (fetchResponse instanceof CacheListFetch.Hit) { + messages = fetchResponse + .valueList() + .map((serializedStoredMessage) => JSON.parse(serializedStoredMessage)); + } else if (fetchResponse instanceof CacheListFetch.Miss) { + // pass + } else if (fetchResponse instanceof CacheListFetch.Error) { + throw fetchResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${fetchResponse.toString()}`); + } + return mapStoredMessagesToChatMessages(messages); + } + + /** + * Adds a message to the cache. + * @param message The BaseMessage instance to add to the cache. + * @returns A Promise that resolves when the message has been added. + */ + public async addMessage(message: BaseMessage): Promise { + const messageToAdd = JSON.stringify( + mapChatMessagesToStoredMessages([message])[0] + ); + + const pushResponse = await this.client.listPushBack( + this.cacheName, + this.sessionId, + messageToAdd, + { ttl: this.sessionTtl } + ); + if (pushResponse instanceof CacheListPushBack.Success) { + // pass + } else if (pushResponse instanceof CacheListPushBack.Error) { + throw pushResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${pushResponse.toString()}`); + } + } + + /** + * Deletes all messages from the cache. + * @returns A Promise that resolves when all messages have been deleted. + */ + public async clear(): Promise { + const deleteResponse = await this.client.delete( + this.cacheName, + this.sessionId + ); + if (deleteResponse instanceof CacheDelete.Success) { + // pass + } else if (deleteResponse instanceof CacheDelete.Error) { + throw deleteResponse.innerException(); + } else { + throw new Error(`Unknown response type: ${deleteResponse.toString()}`); + } + } +} diff --git a/libs/langchain-community/src/stores/message/mongodb.ts b/libs/langchain-community/src/stores/message/mongodb.ts new file mode 100644 index 000000000000..8addfb842c7f --- /dev/null +++ b/libs/langchain-community/src/stores/message/mongodb.ts @@ -0,0 +1,59 @@ +import { Collection, Document as MongoDBDocument, ObjectId } from "mongodb"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +export interface MongoDBChatMessageHistoryInput { + collection: Collection; + sessionId: string; +} + +/** + * @example + * ```typescript + * const chatHistory = new MongoDBChatMessageHistory({ + * collection: myCollection, + * sessionId: 'unique-session-id', + * }); + * const messages = await chatHistory.getMessages(); + * await chatHistory.clear(); + * ``` + */ +export class MongoDBChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "mongodb"]; + + private collection: Collection; + + private sessionId: string; + + constructor({ collection, sessionId }: MongoDBChatMessageHistoryInput) { + super(); + this.collection = collection; + this.sessionId = sessionId; + } + + async getMessages(): Promise { + const document = await this.collection.findOne({ + _id: new ObjectId(this.sessionId), + }); + const messages = document?.messages || []; + return mapStoredMessagesToChatMessages(messages); + } + + async addMessage(message: BaseMessage): Promise { + const messages = mapChatMessagesToStoredMessages([message]); + await this.collection.updateOne( + { _id: new ObjectId(this.sessionId) }, + { + $push: { messages: { $each: messages } }, + }, + { upsert: true } + ); + } + + async clear(): Promise { + await this.collection.deleteOne({ _id: new ObjectId(this.sessionId) }); + } +} diff --git a/libs/langchain-community/src/stores/message/planetscale.ts b/libs/langchain-community/src/stores/message/planetscale.ts new file mode 100644 index 000000000000..a18a5dbc338d --- /dev/null +++ b/libs/langchain-community/src/stores/message/planetscale.ts @@ -0,0 +1,205 @@ +import { + Client as PlanetScaleClient, + Config as PlanetScaleConfig, + Connection as PlanetScaleConnection, +} from "@planetscale/database"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Type definition for the input parameters required when instantiating a + * PlanetScaleChatMessageHistory object. + */ +export type PlanetScaleChatMessageHistoryInput = { + tableName?: string; + sessionId: string; + config?: PlanetScaleConfig; + client?: PlanetScaleClient; +}; + +/** + * Interface for the data transfer object used when selecting stored + * messages from the PlanetScale database. + */ +interface selectStoredMessagesDTO { + id: string; + session_id: string; + type: string; + content: string; + role: string | null; + name: string | null; + additional_kwargs: string; +} + +/** + * Class for storing and retrieving chat message history from a + * PlanetScale database. Extends the BaseListChatMessageHistory class. + * @example + * ```typescript + * const chatHistory = new PlanetScaleChatMessageHistory({ + * tableName: "stored_message", + * sessionId: "lc-example", + * config: { + * url: "ADD_YOURS_HERE", + * }, + * }); + * const chain = new ConversationChain({ + * llm: new ChatOpenAI(), + * memory: chatHistory, + * }); + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class PlanetScaleChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "planetscale"]; + + get lc_secrets() { + return { + "config.host": "PLANETSCALE_HOST", + "config.username": "PLANETSCALE_USERNAME", + "config.password": "PLANETSCALE_PASSWORD", + "config.url": "PLANETSCALE_DATABASE_URL", + }; + } + + public client: PlanetScaleClient; + + private connection: PlanetScaleConnection; + + private tableName: string; + + private sessionId: string; + + private tableInitialized: boolean; + + constructor(fields: PlanetScaleChatMessageHistoryInput) { + super(fields); + + const { sessionId, config, client, tableName } = fields; + + if (client) { + this.client = client; + } else if (config) { + this.client = new PlanetScaleClient(config); + } else { + throw new Error( + "Either a client or config must be provided to PlanetScaleChatMessageHistory" + ); + } + + this.connection = this.client.connection(); + + this.tableName = tableName || "langchain_chat_histories"; + this.tableInitialized = false; + this.sessionId = sessionId; + } + + /** + * Private method to ensure that the necessary table exists in the + * PlanetScale database before performing any operations. If the table + * does not exist, it is created. + * @returns Promise that resolves to void. + */ + private async ensureTable(): Promise { + if (this.tableInitialized) { + return; + } + + const query = `CREATE TABLE IF NOT EXISTS ${this.tableName} (id BINARY(16) PRIMARY KEY, session_id VARCHAR(255), type VARCHAR(255), content VARCHAR(255), role VARCHAR(255), name VARCHAR(255), additional_kwargs VARCHAR(255));`; + + await this.connection.execute(query); + + const indexQuery = `ALTER TABLE ${this.tableName} MODIFY id BINARY(16) DEFAULT (UUID_TO_BIN(UUID()));`; + + await this.connection.execute(indexQuery); + + this.tableInitialized = true; + } + + /** + * Method to retrieve all messages from the PlanetScale database for the + * current session. + * @returns Promise that resolves to an array of BaseMessage objects. + */ + async getMessages(): Promise { + await this.ensureTable(); + + const query = `SELECT * FROM ${this.tableName} WHERE session_id = :session_id`; + const params = { + session_id: this.sessionId, + }; + + const rawStoredMessages = await this.connection.execute(query, params); + const storedMessagesObject = + rawStoredMessages.rows as unknown as selectStoredMessagesDTO[]; + + const orderedMessages: StoredMessage[] = storedMessagesObject.map( + (message) => { + const data = { + content: message.content, + additional_kwargs: JSON.parse(message.additional_kwargs), + } as StoredMessageData; + + if (message.role) { + data.role = message.role; + } + + if (message.name) { + data.name = message.name; + } + + return { + type: message.type, + data, + }; + } + ); + return mapStoredMessagesToChatMessages(orderedMessages); + } + + /** + * Method to add a new message to the PlanetScale database for the current + * session. + * @param message The BaseMessage object to be added to the database. + * @returns Promise that resolves to void. + */ + async addMessage(message: BaseMessage): Promise { + await this.ensureTable(); + + const messageToAdd = mapChatMessagesToStoredMessages([message]); + + const query = `INSERT INTO ${this.tableName} (session_id, type, content, role, name, additional_kwargs) VALUES (:session_id, :type, :content, :role, :name, :additional_kwargs)`; + + const params = { + session_id: this.sessionId, + type: messageToAdd[0].type, + content: messageToAdd[0].data.content, + role: messageToAdd[0].data.role, + name: messageToAdd[0].data.name, + additional_kwargs: JSON.stringify(messageToAdd[0].data.additional_kwargs), + }; + + await this.connection.execute(query, params); + } + + /** + * Method to delete all messages from the PlanetScale database for the + * current session. + * @returns Promise that resolves to void. + */ + async clear(): Promise { + await this.ensureTable(); + + const query = `DELETE FROM ${this.tableName} WHERE session_id = :session_id`; + const params = { + session_id: this.sessionId, + }; + await this.connection.execute(query, params); + } +} diff --git a/libs/langchain-community/src/stores/message/redis.ts b/libs/langchain-community/src/stores/message/redis.ts new file mode 100644 index 000000000000..5bfab7222262 --- /dev/null +++ b/libs/langchain-community/src/stores/message/redis.ts @@ -0,0 +1,129 @@ +// TODO: Deprecate in favor of stores/message/ioredis.ts when LLMCache and other implementations are ported +import { + createClient, + RedisClientOptions, + RedisClientType, + RedisModules, + RedisFunctions, + RedisScripts, +} from "redis"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Type for the input to the `RedisChatMessageHistory` constructor. + */ +export type RedisChatMessageHistoryInput = { + sessionId: string; + sessionTTL?: number; + config?: RedisClientOptions; + // Typing issues with createClient output: https://github.com/redis/node-redis/issues/1865 + // eslint-disable-next-line @typescript-eslint/no-explicit-any + client?: any; +}; + +/** + * Class for storing chat message history using Redis. Extends the + * `BaseListChatMessageHistory` class. + * @example + * ```typescript + * const chatHistory = new RedisChatMessageHistory({ + * sessionId: new Date().toISOString(), + * sessionTTL: 300, + * url: "redis: + * }); + * + * const chain = new ConversationChain({ + * llm: new ChatOpenAI({ modelName: "gpt-3.5-turbo", temperature: 0 }), + * memory: { chatHistory }, + * }); + * + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class RedisChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "redis"]; + + get lc_secrets() { + return { + "config.url": "REDIS_URL", + "config.username": "REDIS_USERNAME", + "config.password": "REDIS_PASSWORD", + }; + } + + public client: RedisClientType; + + private sessionId: string; + + private sessionTTL?: number; + + constructor(fields: RedisChatMessageHistoryInput) { + super(fields); + + const { sessionId, sessionTTL, config, client } = fields; + this.client = (client ?? createClient(config ?? {})) as RedisClientType< + RedisModules, + RedisFunctions, + RedisScripts + >; + this.sessionId = sessionId; + this.sessionTTL = sessionTTL; + } + + /** + * Ensures the Redis client is ready to perform operations. If the client + * is not ready, it attempts to connect to the Redis database. + * @returns Promise resolving to true when the client is ready. + */ + async ensureReadiness() { + if (!this.client.isReady) { + await this.client.connect(); + } + return true; + } + + /** + * Retrieves all chat messages from the Redis database for the current + * session. + * @returns Promise resolving to an array of `BaseMessage` instances. + */ + async getMessages(): Promise { + await this.ensureReadiness(); + const rawStoredMessages = await this.client.lRange(this.sessionId, 0, -1); + const orderedMessages = rawStoredMessages + .reverse() + .map((message) => JSON.parse(message)); + return mapStoredMessagesToChatMessages(orderedMessages); + } + + /** + * Adds a new chat message to the Redis database for the current session. + * @param message The `BaseMessage` instance to add. + * @returns Promise resolving when the message has been added. + */ + async addMessage(message: BaseMessage): Promise { + await this.ensureReadiness(); + const messageToAdd = mapChatMessagesToStoredMessages([message]); + await this.client.lPush(this.sessionId, JSON.stringify(messageToAdd[0])); + if (this.sessionTTL) { + await this.client.expire(this.sessionId, this.sessionTTL); + } + } + + /** + * Deletes all chat messages from the Redis database for the current + * session. + * @returns Promise resolving when the messages have been deleted. + */ + async clear(): Promise { + await this.ensureReadiness(); + await this.client.del(this.sessionId); + } +} diff --git a/libs/langchain-community/src/stores/message/upstash_redis.ts b/libs/langchain-community/src/stores/message/upstash_redis.ts new file mode 100644 index 000000000000..591921237b77 --- /dev/null +++ b/libs/langchain-community/src/stores/message/upstash_redis.ts @@ -0,0 +1,91 @@ +import { Redis, type RedisConfigNodejs } from "@upstash/redis"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * Type definition for the input parameters required to initialize an + * instance of the UpstashRedisChatMessageHistory class. + */ +export type UpstashRedisChatMessageHistoryInput = { + sessionId: string; + sessionTTL?: number; + config?: RedisConfigNodejs; + client?: Redis; +}; + +/** + * Class used to store chat message history in Redis. It provides methods + * to add, get, and clear messages. + */ +export class UpstashRedisChatMessageHistory extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "upstash_redis"]; + + get lc_secrets() { + return { + "config.url": "UPSTASH_REDIS_REST_URL", + "config.token": "UPSTASH_REDIS_REST_TOKEN", + }; + } + + public client: Redis; + + private sessionId: string; + + private sessionTTL?: number; + + constructor(fields: UpstashRedisChatMessageHistoryInput) { + super(fields); + const { sessionId, sessionTTL, config, client } = fields; + if (client) { + this.client = client; + } else if (config) { + this.client = new Redis(config); + } else { + throw new Error( + `Upstash Redis message stores require either a config object or a pre-configured client.` + ); + } + this.sessionId = sessionId; + this.sessionTTL = sessionTTL; + } + + /** + * Retrieves the chat messages from the Redis database. + * @returns An array of BaseMessage instances representing the chat history. + */ + async getMessages(): Promise { + const rawStoredMessages: StoredMessage[] = + await this.client.lrange(this.sessionId, 0, -1); + + const orderedMessages = rawStoredMessages.reverse(); + const previousMessages = orderedMessages.filter( + (x): x is StoredMessage => + x.type !== undefined && x.data.content !== undefined + ); + return mapStoredMessagesToChatMessages(previousMessages); + } + + /** + * Adds a new message to the chat history in the Redis database. + * @param message The message to be added to the chat history. + * @returns Promise resolving to void. + */ + async addMessage(message: BaseMessage): Promise { + const messageToAdd = mapChatMessagesToStoredMessages([message]); + await this.client.lpush(this.sessionId, JSON.stringify(messageToAdd[0])); + if (this.sessionTTL) { + await this.client.expire(this.sessionId, this.sessionTTL); + } + } + + /** + * Deletes all messages from the chat history in the Redis database. + * @returns Promise resolving to void. + */ + async clear(): Promise { + await this.client.del(this.sessionId); + } +} diff --git a/libs/langchain-community/src/stores/message/xata.ts b/libs/langchain-community/src/stores/message/xata.ts new file mode 100644 index 000000000000..6ce75337a3a9 --- /dev/null +++ b/libs/langchain-community/src/stores/message/xata.ts @@ -0,0 +1,238 @@ +import { + BaseClient, + BaseClientOptions, + GetTableSchemaResponse, + Schemas, + XataApiClient, + parseWorkspacesUrlParts, +} from "@xata.io/client"; +import { + BaseListChatMessageHistory +} from "@langchain/core/chat_history"; +import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + +/** + * An object type that represents the input for the XataChatMessageHistory + * class. + */ +export type XataChatMessageHistoryInput = { + sessionId: string; + config?: BaseClientOptions; + client?: XataClient; + table?: string; + createTable?: boolean; + apiKey?: string; +}; + +/** + * An interface that represents the data transfer object for stored + * messages. + */ +interface storedMessagesDTO { + id: string; + sessionId: string; + type: string; + content: string; + role?: string; + name?: string; + additionalKwargs: string; +} + +const chatMemoryColumns: Schemas.Column[] = [ + { name: "sessionId", type: "string" }, + { name: "type", type: "string" }, + { name: "role", type: "string" }, + { name: "content", type: "text" }, + { name: "name", type: "string" }, + { name: "additionalKwargs", type: "text" }, +]; + +/** + * A class for managing chat message history using Xata.io client. It + * extends the BaseListChatMessageHistory class and provides methods to + * get, add, and clear messages. It also ensures the existence of a table + * where the chat messages are stored. + * @example + * ```typescript + * const chatHistory = new XataChatMessageHistory({ + * table: "messages", + * sessionId: new Date().toISOString(), + * client: new BaseClient({ + * databaseURL: process.env.XATA_DB_URL, + * apiKey: process.env.XATA_API_KEY, + * branch: "main", + * }), + * apiKey: process.env.XATA_API_KEY, + * }); + * + * const chain = new ConversationChain({ + * llm: new ChatOpenAI(), + * memory: new BufferMemory({ chatHistory }), + * }); + * + * const response = await chain.invoke({ + * input: "What did I just say my name was?", + * }); + * console.log({ response }); + * ``` + */ +export class XataChatMessageHistory< + XataClient extends BaseClient +> extends BaseListChatMessageHistory { + lc_namespace = ["langchain", "stores", "message", "xata"]; + + public client: XataClient; + + private sessionId: string; + + private table: string; + + private tableInitialized: boolean; + + private createTable: boolean; + + private apiClient: XataApiClient; + + constructor(fields: XataChatMessageHistoryInput) { + super(fields); + + const { sessionId, config, client, table } = fields; + this.sessionId = sessionId; + this.table = table || "memory"; + if (client) { + this.client = client; + } else if (config) { + this.client = new BaseClient(config) as XataClient; + } else { + throw new Error( + "Either a client or a config must be provided to XataChatMessageHistoryInput" + ); + } + if (fields.createTable !== false) { + this.createTable = true; + const apiKey = fields.apiKey || fields.config?.apiKey; + if (!apiKey) { + throw new Error( + "If createTable is set, an apiKey must be provided to XataChatMessageHistoryInput, either directly or through the config object" + ); + } + this.apiClient = new XataApiClient({ apiKey }); + } else { + this.createTable = false; + } + this.tableInitialized = false; + } + + /** + * Retrieves all messages associated with the session ID, ordered by + * creation time. + * @returns A promise that resolves to an array of BaseMessage instances. + */ + async getMessages(): Promise { + await this.ensureTable(); + const records = await this.client.db[this.table] + .filter({ sessionId: this.sessionId }) + .sort("xata.createdAt", "asc") + .getAll(); + + const rawStoredMessages = records as unknown as storedMessagesDTO[]; + const orderedMessages: StoredMessage[] = rawStoredMessages.map( + (message: storedMessagesDTO) => { + const data = { + content: message.content, + additional_kwargs: JSON.parse(message.additionalKwargs), + } as StoredMessageData; + if (message.role) { + data.role = message.role; + } + if (message.name) { + data.name = message.name; + } + + return { + type: message.type, + data, + }; + } + ); + return mapStoredMessagesToChatMessages(orderedMessages); + } + + /** + * Adds a new message to the database. + * @param message The BaseMessage instance to be added. + * @returns A promise that resolves when the message has been added. + */ + async addMessage(message: BaseMessage): Promise { + await this.ensureTable(); + const messageToAdd = mapChatMessagesToStoredMessages([message]); + await this.client.db[this.table].create({ + sessionId: this.sessionId, + type: messageToAdd[0].type, + content: messageToAdd[0].data.content, + role: messageToAdd[0].data.role, + name: messageToAdd[0].data.name, + additionalKwargs: JSON.stringify(messageToAdd[0].data.additional_kwargs), + }); + } + + /** + * Deletes all messages associated with the session ID. + * @returns A promise that resolves when the messages have been deleted. + */ + async clear(): Promise { + await this.ensureTable(); + const records = await this.client.db[this.table] + .select(["id"]) + .filter({ sessionId: this.sessionId }) + .getAll(); + const ids = records.map((m) => m.id); + await this.client.db[this.table].delete(ids); + } + + /** + * Checks if the table exists and creates it if it doesn't. This method is + * called before any operation on the table. + * @returns A promise that resolves when the table has been ensured. + */ + private async ensureTable(): Promise { + if (!this.createTable) { + return; + } + if (this.tableInitialized) { + return; + } + + const { databaseURL, branch } = await this.client.getConfig(); + const [, , host, , database] = databaseURL.split("/"); + const urlParts = parseWorkspacesUrlParts(host); + if (urlParts == null) { + throw new Error("Invalid databaseURL"); + } + const { workspace, region } = urlParts; + const tableParams = { + workspace, + region, + database, + branch, + table: this.table, + }; + + let schema: GetTableSchemaResponse | null = null; + try { + schema = await this.apiClient.tables.getTableSchema(tableParams); + } catch (e) { + // pass + } + if (schema == null) { + await this.apiClient.tables.createTable(tableParams); + await this.apiClient.tables.setTableSchema({ + ...tableParams, + schema: { + columns: chatMemoryColumns, + }, + }); + } + } +} From 26743fdfa54e1c750d0918fbf252e502cfaa2d94 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 15:57:00 -0800 Subject: [PATCH 14/22] Move more modules, fix build --- langchain-core/.gitignore | 3 + langchain-core/package.json | 12 +- langchain-core/scripts/create-entrypoints.js | 1 + langchain-core/src/load/import_map.ts | 1 + langchain-core/src/utils/math.ts | 180 +++++ .../src/utils}/tests/math_utils.test.ts | 0 langchain/package.json | 7 +- .../src/document_transformers/html_to_text.ts | 2 +- .../mozilla_readability.ts | 2 +- langchain/src/load/import_type.d.ts | 8 - langchain/src/schema/document.ts | 5 +- langchain/src/schema/index.ts | 11 +- langchain/src/storage/convex.ts | 2 +- langchain/src/storage/ioredis.ts | 2 +- langchain/src/storage/upstash_redis.ts | 2 +- langchain/src/storage/vercel_kv.ts | 2 +- langchain/src/stores/doc/in_memory.ts | 114 +-- langchain/src/stores/message/cassandra.ts | 2 +- langchain/src/stores/message/cloudflare_d1.ts | 2 +- langchain/src/stores/message/convex.ts | 2 +- langchain/src/stores/message/dynamodb.ts | 2 +- langchain/src/stores/message/firestore.ts | 2 +- langchain/src/stores/message/ioredis.ts | 2 +- langchain/src/stores/message/momento.ts | 2 +- langchain/src/stores/message/mongodb.ts | 2 +- langchain/src/stores/message/planetscale.ts | 2 +- langchain/src/stores/message/redis.ts | 2 +- langchain/src/stores/message/upstash_redis.ts | 2 +- langchain/src/stores/message/utils.ts | 5 +- langchain/src/stores/message/xata.ts | 2 +- langchain/src/util/math.ts | 181 +---- langchain/src/vectorstores/analyticdb.ts | 391 +--------- langchain/src/vectorstores/base.ts | 300 +------ langchain/src/vectorstores/cassandra.ts | 582 +------------- langchain/src/vectorstores/chroma.ts | 365 +-------- langchain/src/vectorstores/clickhouse.ts | 339 +------- .../src/vectorstores/closevector/node.ts | 183 +---- langchain/src/vectorstores/closevector/web.ts | 180 +---- .../src/vectorstores/cloudflare_vectorize.ts | 228 +----- langchain/src/vectorstores/convex.ts | 377 +-------- langchain/src/vectorstores/elasticsearch.ts | 343 +------- langchain/src/vectorstores/faiss.ts | 462 +---------- langchain/src/vectorstores/googlevertexai.ts | 738 +----------------- langchain/src/vectorstores/hnswlib.ts | 355 +-------- langchain/src/vectorstores/lancedb.ts | 153 +--- langchain/src/vectorstores/milvus.ts | 675 +--------------- .../src/vectorstores/momento_vector_index.ts | 400 +--------- langchain/src/vectorstores/mongodb_atlas.ts | 280 +------ langchain/src/vectorstores/myscale.ts | 315 +------- langchain/src/vectorstores/neo4j_vector.ts | 732 +---------------- langchain/src/vectorstores/opensearch.ts | 327 +------- langchain/src/vectorstores/pgvector.ts | 441 +---------- langchain/src/vectorstores/pinecone.ts | 361 +-------- langchain/src/vectorstores/prisma.ts | 512 +----------- langchain/src/vectorstores/qdrant.ts | 261 +------ langchain/src/vectorstores/redis.ts | 459 +---------- langchain/src/vectorstores/rockset.ts | 454 +---------- langchain/src/vectorstores/singlestore.ts | 295 +------ langchain/src/vectorstores/supabase.ts | 311 +------- .../tests/convex/convex/langchain/db.ts | 1 - langchain/src/vectorstores/tigris.ts | 178 +---- langchain/src/vectorstores/typeorm.ts | 299 +------ langchain/src/vectorstores/typesense.ts | 321 +------- langchain/src/vectorstores/usearch.ts | 224 +----- langchain/src/vectorstores/vectara.ts | 533 +------------ langchain/src/vectorstores/vercel_postgres.ts | 394 +--------- langchain/src/vectorstores/voy.ts | 192 +---- langchain/src/vectorstores/weaviate.ts | 433 +--------- langchain/src/vectorstores/xata.ts | 150 +--- langchain/src/vectorstores/zep.ts | 425 +--------- libs/langchain-community/.gitignore | 45 ++ .../cache/cloudflare_kv.cjs | 1 - .../cache/cloudflare_kv.d.ts | 1 - .../cache/cloudflare_kv.js | 1 - libs/langchain-community/cache/momento.cjs | 1 - libs/langchain-community/cache/momento.d.ts | 1 - libs/langchain-community/cache/momento.js | 1 - .../cache/upstash_redis.cjs | 1 - .../cache/upstash_redis.d.ts | 1 - .../cache/upstash_redis.js | 1 - libs/langchain-community/package.json | 214 ++--- .../scripts/create-entrypoints.js | 29 + .../tests/chatbaiduwenxin.int.test.ts | 136 ---- .../chat_models/tests/chatbedrock.int.test.ts | 186 ----- .../chatcloudflare_workersai.int.test.ts | 131 ---- .../tests/chatfireworks.int.test.ts | 73 -- .../chat_models/tests/chatgooglepalm.test.ts | 252 ------ .../tests/chatgooglevertexai.test.ts | 116 --- .../tests/chatgooglevertexai_web.test.ts | 149 ---- .../tests/chatiflytekxinghuo.int.test.ts | 12 - .../src/chat_models/tests/minimax.int.test.ts | 341 -------- .../src/document_transformers/html_to_text.ts | 9 +- .../mozilla_readability.ts | 8 +- .../src/graphs/tests/neo4j_graph.int.test.ts | 56 -- .../src/llms/tests/ai21.int.test.ts | 51 -- .../src/llms/tests/aleph_alpha.int.test.ts | 54 -- .../src/llms/tests/bedrock.int.test.ts | 182 ----- .../tests/cloudflare_workersai.int.test.ts | 50 -- .../src/llms/tests/cohere.int.test.ts | 8 - .../src/llms/tests/fireworks.int.test.ts | 20 - .../src/llms/tests/googlepalm.int.test.ts | 32 - .../src/llms/tests/googlepalm.test.ts | 75 -- .../src/llms/tests/googlevertexai.int.test.ts | 78 -- .../llms/tests/googlevertexai_web.int.test.ts | 77 -- .../llms/tests/huggingface_hub.int.test.ts | 8 - .../src/llms/tests/llama_cpp.int.test.ts | 47 -- .../src/llms/tests/ollama.int.test.ts | 113 --- .../src/llms/tests/replicate.int.test.ts | 57 -- .../llms/tests/sagemaker_endpoint.int.test.ts | 133 ---- .../src/llms/tests/writer.int.test.ts | 8 - .../src/load/import_constants.ts | 14 + .../src/load/import_map.ts | 3 +- .../src/load/import_type.d.ts | 42 + .../tests/amazon_kendra.int.test.ts | 22 - .../src/retrievers/tests/metal.int.test.ts | 22 - .../src/retrievers/tests/supabase.int.test.ts | 27 - .../tests/tavily_search_api.int.test.ts | 17 - .../src/stores/doc/base.ts | 11 + .../src/stores/doc/in_memory.ts | 113 +++ .../src/stores/message/cassandra.ts | 10 +- .../src/stores/message/cloudflare_d1.ts | 11 +- .../src/stores/message/convex.ts | 9 +- .../src/stores/message/dynamodb.ts | 10 +- .../src/stores/message/firestore.ts | 10 +- .../src/stores/message/ioredis.ts | 9 +- .../src/stores/message/momento.ts | 10 +- .../src/stores/message/mongodb.ts | 9 +- .../src/stores/message/planetscale.ts | 11 +- .../src/stores/message/redis.ts | 9 +- .../src/stores/message/upstash_redis.ts | 10 +- .../src/stores/message/xata.ts | 11 +- .../src/types/assemblyai-types.ts | 6 - .../src/vectorstores/closevector/common.ts | 6 +- .../src/vectorstores/closevector/node.ts | 182 +++++ .../src/vectorstores/closevector/web.ts | 179 +++++ .../src/vectorstores/faiss.ts | 461 +++++++++++ .../src/vectorstores/googlevertexai.ts | 738 ++++++++++++++++++ .../src/vectorstores/hnswlib.ts | 354 +++++++++ .../src/vectorstores/momento_vector_index.ts | 402 ++++++++++ .../src/vectorstores/mongodb_atlas.ts | 282 +++++++ .../src/vectorstores/pgvector.ts | 440 +++++++++++ .../src/vectorstores/pinecone.ts | 366 +++++++++ .../src/vectorstores/supabase.ts | 313 ++++++++ .../vectorstores/tests/analyticdb.int.test.ts | 4 +- .../vectorstores/tests/cassandra.int.test.ts | 4 +- .../src/vectorstores/tests/chroma.int.test.ts | 4 +- .../src/vectorstores/tests/chroma.test.ts | 2 +- .../vectorstores/tests/clickhouse.int.test.ts | 2 +- .../tests/closevector_node.int.test.ts | 4 +- .../tests/closevector_node.test.ts | 4 +- .../src/vectorstores/tests/convex.int.test.ts | 0 .../tests/convex/convex/_generated/api.d.ts | 0 .../tests/convex/convex/_generated/api.js | 0 .../convex/convex/_generated/dataModel.d.ts | 0 .../convex/convex/_generated/server.d.ts | 0 .../tests/convex/convex/_generated/server.js | 0 .../tests/convex/convex/langchain/db.ts | 1 + .../vectorstores/tests/convex/convex/lib.ts | 2 +- .../tests/convex/convex/schema.ts | 0 .../vectorstores/tests/convex/package.json | 0 .../tests/elasticsearch.int.test.ts | 4 +- .../faiss.int.test.data/faiss.int.test.py | 0 .../faiss_index/index.faiss | Bin .../faiss.int.test.data/faiss_index/index.pkl | Bin .../faiss.int.test.data/requirements.txt | 0 .../src/vectorstores/tests/faiss.int.test.ts | 4 +- .../src/vectorstores/tests/faiss.test.ts | 4 +- .../tests/googlevertexai.int.test.ts | 6 +- .../vectorstores/tests/googlevertexai.test.ts | 4 +- .../vectorstores/tests/hnswlib.int.test.ts | 5 +- .../src/vectorstores/tests/hnswlib.test.ts | 5 +- .../vectorstores/tests/lancedb.int.test.ts | 6 +- .../src/vectorstores/tests/milvus.int.test.ts | 2 +- .../tests/momento_vector_index.int.test.ts | 7 +- .../tests/mongodb_atlas.int.test.ts | 4 +- .../vectorstores/tests/myscale.int.test.ts | 4 +- .../tests/neo4j_vector.int.test.ts | 4 +- .../vectorstores/tests/opensearch.int.test.ts | 4 +- .../vectorstores/tests/pgvector.int.test.ts | 2 +- .../vectorstores/tests/pinecone.int.test.ts | 4 +- .../src/vectorstores/tests/pinecone.test.ts | 2 +- .../src/vectorstores/tests/qdrant.int.test.ts | 4 +- .../src/vectorstores/tests/qdrant.test.ts | 2 +- .../src/vectorstores/tests/redis.int.test.ts | 4 +- .../src/vectorstores/tests/redis.test.ts | 2 +- .../vectorstores/tests/rockset.int.test.ts | 8 +- .../tests/singlestore.int.test.ts | 4 +- .../vectorstores/tests/supabase.int.test.ts | 4 +- .../src/vectorstores/tests/supabase.test.ts | 2 +- .../src/vectorstores/tests/tigris.test.ts | 2 +- .../vectorstores/tests/typeorm.int.test.ts | 2 +- .../src/vectorstores/tests/typesense.test.ts | 4 +- .../vectorstores/tests/usearch.int.test.ts | 4 +- .../vectorstores/tests/vectara.int.test.ts | 4 +- .../tests/vercel_postgres.int.test.ts | 2 +- .../src/vectorstores/tests/voy.int.test.ts | 4 +- .../src/vectorstores/tests/voy.test.ts | 4 +- .../vectorstores/tests/weaviate.int.test.ts | 4 +- .../src/vectorstores/tests/weaviate.test.ts | 0 .../src/vectorstores/tests/xata.int.test.ts | 4 +- .../src/vectorstores/tests/zep.test.ts | 6 +- .../src/vectorstores/usearch.ts | 223 ++++++ .../src/vectorstores/weaviate.ts | 435 +++++++++++ .../src/vectorstores/zep.ts | 427 ++++++++++ yarn.lock | 83 +- 205 files changed, 5624 insertions(+), 17123 deletions(-) create mode 100644 langchain-core/src/utils/math.ts rename {langchain/src/util => langchain-core/src/utils}/tests/math_utils.test.ts (100%) delete mode 100644 langchain/src/vectorstores/tests/convex/convex/langchain/db.ts delete mode 100644 libs/langchain-community/cache/cloudflare_kv.cjs delete mode 100644 libs/langchain-community/cache/cloudflare_kv.d.ts delete mode 100644 libs/langchain-community/cache/cloudflare_kv.js delete mode 100644 libs/langchain-community/cache/momento.cjs delete mode 100644 libs/langchain-community/cache/momento.d.ts delete mode 100644 libs/langchain-community/cache/momento.js delete mode 100644 libs/langchain-community/cache/upstash_redis.cjs delete mode 100644 libs/langchain-community/cache/upstash_redis.d.ts delete mode 100644 libs/langchain-community/cache/upstash_redis.js delete mode 100644 libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts delete mode 100644 libs/langchain-community/src/chat_models/tests/minimax.int.test.ts delete mode 100644 libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/ai21.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/bedrock.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/cohere.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/fireworks.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/googlepalm.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/googlepalm.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/ollama.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/replicate.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts delete mode 100644 libs/langchain-community/src/llms/tests/writer.int.test.ts delete mode 100644 libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts delete mode 100644 libs/langchain-community/src/retrievers/tests/metal.int.test.ts delete mode 100644 libs/langchain-community/src/retrievers/tests/supabase.int.test.ts delete mode 100644 libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts create mode 100644 libs/langchain-community/src/stores/doc/base.ts create mode 100644 libs/langchain-community/src/stores/doc/in_memory.ts delete mode 100644 libs/langchain-community/src/types/assemblyai-types.ts rename {langchain => libs/langchain-community}/src/vectorstores/closevector/common.ts (96%) create mode 100644 libs/langchain-community/src/vectorstores/closevector/node.ts create mode 100644 libs/langchain-community/src/vectorstores/closevector/web.ts create mode 100644 libs/langchain-community/src/vectorstores/faiss.ts create mode 100644 libs/langchain-community/src/vectorstores/googlevertexai.ts create mode 100644 libs/langchain-community/src/vectorstores/hnswlib.ts create mode 100644 libs/langchain-community/src/vectorstores/momento_vector_index.ts create mode 100755 libs/langchain-community/src/vectorstores/mongodb_atlas.ts create mode 100644 libs/langchain-community/src/vectorstores/pgvector.ts create mode 100644 libs/langchain-community/src/vectorstores/pinecone.ts create mode 100644 libs/langchain-community/src/vectorstores/supabase.ts rename {langchain => libs/langchain-community}/src/vectorstores/tests/analyticdb.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/cassandra.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/chroma.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/chroma.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/clickhouse.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/closevector_node.int.test.ts (89%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/closevector_node.test.ts (93%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/_generated/api.d.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/_generated/api.js (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/_generated/server.d.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/_generated/server.js (100%) create mode 100644 libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/lib.ts (94%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/convex/schema.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/convex/package.json (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/elasticsearch.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.int.test.data/faiss.int.test.py (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.faiss (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.pkl (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.int.test.data/requirements.txt (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/faiss.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/googlevertexai.int.test.ts (96%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/googlevertexai.test.ts (95%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/hnswlib.int.test.ts (96%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/hnswlib.test.ts (93%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/lancedb.int.test.ts (91%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/milvus.int.test.ts (99%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/momento_vector_index.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/mongodb_atlas.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/myscale.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/neo4j_vector.int.test.ts (99%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/opensearch.int.test.ts (91%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/pgvector.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/pinecone.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/pinecone.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/qdrant.int.test.ts (94%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/qdrant.test.ts (93%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/redis.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/redis.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/rockset.int.test.ts (92%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/singlestore.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/supabase.int.test.ts (99%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/supabase.test.ts (96%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/tigris.test.ts (96%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/typeorm.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/typesense.test.ts (96%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/usearch.int.test.ts (94%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/vectara.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/vercel_postgres.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/voy.int.test.ts (92%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/voy.test.ts (92%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/weaviate.int.test.ts (98%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/weaviate.test.ts (100%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/xata.int.test.ts (97%) rename {langchain => libs/langchain-community}/src/vectorstores/tests/zep.test.ts (98%) create mode 100644 libs/langchain-community/src/vectorstores/usearch.ts create mode 100644 libs/langchain-community/src/vectorstores/weaviate.ts create mode 100644 libs/langchain-community/src/vectorstores/zep.ts diff --git a/langchain-core/.gitignore b/langchain-core/.gitignore index 05ba9919a328..c6100ae28393 100644 --- a/langchain-core/.gitignore +++ b/langchain-core/.gitignore @@ -106,6 +106,9 @@ utils/json_patch.d.ts utils/json_schema.cjs utils/json_schema.js utils/json_schema.d.ts +utils/math.cjs +utils/math.js +utils/math.d.ts utils/stream.cjs utils/stream.js utils/stream.d.ts diff --git a/langchain-core/package.json b/langchain-core/package.json index 132e1840c301..fafa426e1b89 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.0.10", + "version": "0.0.11-rc.1", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { @@ -40,6 +40,7 @@ "decamelize": "1.2.0", "js-tiktoken": "^1.0.8", "langsmith": "~0.0.48", + "ml-distance": "^4.0.0", "p-queue": "^6.6.2", "p-retry": "4", "uuid": "^9.0.0", @@ -59,6 +60,7 @@ "eslint-plugin-prettier": "^4.2.1", "jest": "^29.5.0", "jest-environment-node": "^29.6.4", + "ml-matrix": "^6.10.4", "prettier": "^2.8.3", "release-it": "^15.10.1", "rimraf": "^5.0.1", @@ -263,6 +265,11 @@ "import": "./utils/json_schema.js", "require": "./utils/json_schema.cjs" }, + "./utils/math": { + "types": "./utils/math.d.ts", + "import": "./utils/math.js", + "require": "./utils/math.cjs" + }, "./utils/stream": { "types": "./utils/stream.d.ts", "import": "./utils/stream.js", @@ -400,6 +407,9 @@ "utils/json_schema.cjs", "utils/json_schema.js", "utils/json_schema.d.ts", + "utils/math.cjs", + "utils/math.js", + "utils/math.d.ts", "utils/stream.cjs", "utils/stream.js", "utils/stream.d.ts", diff --git a/langchain-core/scripts/create-entrypoints.js b/langchain-core/scripts/create-entrypoints.js index b49de2176afb..328796a52d64 100644 --- a/langchain-core/scripts/create-entrypoints.js +++ b/langchain-core/scripts/create-entrypoints.js @@ -44,6 +44,7 @@ const entrypoints = { "utils/hash": "utils/hash", "utils/json_patch": "utils/json_patch", "utils/json_schema": "utils/json_schema", + "utils/math": "utils/math", "utils/stream": "utils/stream", "utils/testing": "utils/testing/index", "utils/tiktoken": "utils/tiktoken", diff --git a/langchain-core/src/load/import_map.ts b/langchain-core/src/load/import_map.ts index 40fe9f8af9a4..0c94e2838e41 100644 --- a/langchain-core/src/load/import_map.ts +++ b/langchain-core/src/load/import_map.ts @@ -35,6 +35,7 @@ export * as utils__env from "../utils/env.js"; export * as utils__hash from "../utils/hash.js"; export * as utils__json_patch from "../utils/json_patch.js"; export * as utils__json_schema from "../utils/json_schema.js"; +export * as utils__math from "../utils/math.js"; export * as utils__stream from "../utils/stream.js"; export * as utils__testing from "../utils/testing/index.js"; export * as utils__tiktoken from "../utils/tiktoken.js"; diff --git a/langchain-core/src/utils/math.ts b/langchain-core/src/utils/math.ts new file mode 100644 index 000000000000..fe703c2d5f79 --- /dev/null +++ b/langchain-core/src/utils/math.ts @@ -0,0 +1,180 @@ +import { + similarity as ml_distance_similarity, + distance as ml_distance, +} from "ml-distance"; + +type VectorFunction = (xVector: number[], yVector: number[]) => number; + +/** + * Apply a row-wise function between two matrices with the same number of columns. + * + * @param {number[][]} X - The first matrix. + * @param {number[][]} Y - The second matrix. + * @param {VectorFunction} func - The function to apply. + * + * @throws {Error} If the number of columns in X and Y are not the same. + * + * @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y. + */ + +export function matrixFunc( + X: number[][], + Y: number[][], + func: VectorFunction +): number[][] { + if ( + X.length === 0 || + X[0].length === 0 || + Y.length === 0 || + Y[0].length === 0 + ) { + return [[]]; + } + + if (X[0].length !== Y[0].length) { + throw new Error( + `Number of columns in X and Y must be the same. X has shape ${[ + X.length, + X[0].length, + ]} and Y has shape ${[Y.length, Y[0].length]}.` + ); + } + + return X.map((xVector) => + Y.map((yVector) => func(xVector, yVector)).map((similarity) => + Number.isNaN(similarity) ? 0 : similarity + ) + ); +} + +export function normalize(M: number[][], similarity = false): number[][] { + const max = matrixMaxVal(M); + return M.map((row) => + row.map((val) => (similarity ? 1 - val / max : val / max)) + ); +} + +/** + * This function calculates the row-wise cosine similarity between two matrices with the same number of columns. + * + * @param {number[][]} X - The first matrix. + * @param {number[][]} Y - The second matrix. + * + * @throws {Error} If the number of columns in X and Y are not the same. + * + * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y. + */ +export function cosineSimilarity(X: number[][], Y: number[][]): number[][] { + return matrixFunc(X, Y, ml_distance_similarity.cosine); +} + +export function innerProduct(X: number[][], Y: number[][]): number[][] { + return matrixFunc(X, Y, ml_distance.innerProduct); +} + +export function euclideanDistance(X: number[][], Y: number[][]): number[][] { + return matrixFunc(X, Y, ml_distance.euclidean); +} + +/** + * This function implements the Maximal Marginal Relevance algorithm + * to select a set of embeddings that maximizes the diversity and relevance to a query embedding. + * + * @param {number[]|number[][]} queryEmbedding - The query embedding. + * @param {number[][]} embeddingList - The list of embeddings to select from. + * @param {number} [lambda=0.5] - The trade-off parameter between relevance and diversity. + * @param {number} [k=4] - The maximum number of embeddings to select. + * + * @returns {number[]} The indexes of the selected embeddings in the embeddingList. + */ +export function maximalMarginalRelevance( + queryEmbedding: number[] | number[][], + embeddingList: number[][], + lambda = 0.5, + k = 4 +): number[] { + if (Math.min(k, embeddingList.length) <= 0) { + return []; + } + + const queryEmbeddingExpanded = ( + Array.isArray(queryEmbedding[0]) ? queryEmbedding : [queryEmbedding] + ) as number[][]; + + const similarityToQuery = cosineSimilarity( + queryEmbeddingExpanded, + embeddingList + )[0]; + const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex; + + const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]]; + const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex]; + + while (selectedEmbeddingsIndexes.length < Math.min(k, embeddingList.length)) { + let bestScore = -Infinity; + let bestIndex = -1; + + const similarityToSelected = cosineSimilarity( + embeddingList, + selectedEmbeddings + ); + + similarityToQuery.forEach((queryScore, queryScoreIndex) => { + if (selectedEmbeddingsIndexes.includes(queryScoreIndex)) { + return; + } + const maxSimilarityToSelected = Math.max( + ...similarityToSelected[queryScoreIndex] + ); + const score = + lambda * queryScore - (1 - lambda) * maxSimilarityToSelected; + + if (score > bestScore) { + bestScore = score; + bestIndex = queryScoreIndex; + } + }); + selectedEmbeddings.push(embeddingList[bestIndex]); + selectedEmbeddingsIndexes.push(bestIndex); + } + + return selectedEmbeddingsIndexes; +} + +type MaxInfo = { + maxIndex: number; + maxValue: number; +}; + +/** + * Finds the index of the maximum value in the given array. + * @param {number[]} array - The input array. + * + * @returns {number} The index of the maximum value in the array. If the array is empty, returns -1. + */ +function argMax(array: number[]): MaxInfo { + if (array.length === 0) { + return { + maxIndex: -1, + maxValue: NaN, + }; + } + + let maxValue = array[0]; + let maxIndex = 0; + + for (let i = 1; i < array.length; i += 1) { + if (array[i] > maxValue) { + maxIndex = i; + maxValue = array[i]; + } + } + return { maxIndex, maxValue }; +} + +function matrixMaxVal(arrays: number[][]): number { + return arrays.reduce( + (acc, array) => Math.max(acc, argMax(array).maxValue), + 0 + ); +} diff --git a/langchain/src/util/tests/math_utils.test.ts b/langchain-core/src/utils/tests/math_utils.test.ts similarity index 100% rename from langchain/src/util/tests/math_utils.test.ts rename to langchain-core/src/utils/tests/math_utils.test.ts diff --git a/langchain/package.json b/langchain/package.json index 71ae43c0b452..2c73a180a33a 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1,6 +1,6 @@ { "name": "langchain", - "version": "0.0.203", + "version": "0.0.204-rc.0", "description": "Typescript bindings for langchain", "type": "module", "engines": { @@ -930,7 +930,6 @@ "@tsconfig/recommended": "^1.0.2", "@types/d3-dsv": "^2", "@types/decamelize": "^1.2.0", - "@types/flat": "^5.0.2", "@types/html-to-text": "^9", "@types/js-yaml": "^4", "@types/jsdom": "^21.1.1", @@ -988,7 +987,6 @@ "llmonitor": "^0.5.9", "lodash": "^4.17.21", "mammoth": "^1.5.1", - "ml-matrix": "^6.10.4", "mongodb": "^5.2.0", "mysql2": "^3.3.3", "neo4j-driver": "^5.12.0", @@ -1436,10 +1434,9 @@ "dependencies": { "@anthropic-ai/sdk": "^0.9.1", "@langchain/community": "~0.0.0", - "@langchain/core": "~0.0.10", + "@langchain/core": "~0.0.11-rc.1", "binary-extensions": "^2.2.0", "expr-eval": "^2.0.2", - "flat": "^5.0.2", "js-tiktoken": "^1.0.7", "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", diff --git a/langchain/src/document_transformers/html_to_text.ts b/langchain/src/document_transformers/html_to_text.ts index df29570f9a90..a3d4023dfdd0 100644 --- a/langchain/src/document_transformers/html_to_text.ts +++ b/langchain/src/document_transformers/html_to_text.ts @@ -1 +1 @@ -export * from "@langchain/community/document_transformers/html_to_text"; \ No newline at end of file +export * from "@langchain/community/document_transformers/html_to_text"; diff --git a/langchain/src/document_transformers/mozilla_readability.ts b/langchain/src/document_transformers/mozilla_readability.ts index 4b64753acb38..481f786c13d0 100644 --- a/langchain/src/document_transformers/mozilla_readability.ts +++ b/langchain/src/document_transformers/mozilla_readability.ts @@ -1 +1 @@ -export * from "@langchain/community/document_transformers/mozilla_readability"; \ No newline at end of file +export * from "@langchain/community/document_transformers/mozilla_readability"; diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index 0ca37eab8c29..a02354d086b2 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -524,17 +524,9 @@ export interface SecretMap { AWS_ACCESS_KEY_ID?: string; AWS_SECRET_ACCESS_KEY?: string; AZURE_OPENAI_API_KEY?: string; - MILVUS_PASSWORD?: string; - MILVUS_SSL?: string; - MILVUS_USERNAME?: string; OPENAI_API_KEY?: string; OPENAI_ORGANIZATION?: string; PROMPTLAYER_API_KEY?: string; - QDRANT_API_KEY?: string; - QDRANT_URL?: string; REMOTE_RETRIEVER_AUTH_BEARER?: string; - VECTARA_API_KEY?: string; - VECTARA_CORPUS_ID?: string; - VECTARA_CUSTOMER_ID?: string; ZAPIER_NLA_API_KEY?: string; } diff --git a/langchain/src/schema/document.ts b/langchain/src/schema/document.ts index 5e3f6459952e..8394191d16e2 100644 --- a/langchain/src/schema/document.ts +++ b/langchain/src/schema/document.ts @@ -1 +1,4 @@ -export { BaseDocumentTransformer, MappingDocumentTransformer } from "@langchain/core/documents"; +export { + BaseDocumentTransformer, + MappingDocumentTransformer, +} from "@langchain/core/documents"; diff --git a/langchain/src/schema/index.ts b/langchain/src/schema/index.ts index 299068a67252..784a99b42770 100644 --- a/langchain/src/schema/index.ts +++ b/langchain/src/schema/index.ts @@ -5,7 +5,6 @@ import { AIMessage, SystemMessage, } from "@langchain/core/messages"; -import { Document } from "../document.js"; import { Serializable } from "../load/serializable.js"; export { @@ -128,12 +127,4 @@ export abstract class BaseEntityStore extends Serializable { abstract clear(): Promise; } -/** - * Abstract class for a document store. All document stores should extend - * this class. - */ -export abstract class Docstore { - abstract search(search: string): Promise; - - abstract add(texts: Record): Promise; -} +export { Docstore } from "@langchain/community/stores/doc/base"; diff --git a/langchain/src/storage/convex.ts b/langchain/src/storage/convex.ts index ad8d8c311a06..3593ef2bfc30 100644 --- a/langchain/src/storage/convex.ts +++ b/langchain/src/storage/convex.ts @@ -1 +1 @@ -export * from "@langchain/community/storage/convex"; \ No newline at end of file +export * from "@langchain/community/storage/convex"; diff --git a/langchain/src/storage/ioredis.ts b/langchain/src/storage/ioredis.ts index c022f1520e97..3e2d8437d910 100644 --- a/langchain/src/storage/ioredis.ts +++ b/langchain/src/storage/ioredis.ts @@ -1 +1 @@ -export * from "@langchain/community/storage/ioredis"; \ No newline at end of file +export * from "@langchain/community/storage/ioredis"; diff --git a/langchain/src/storage/upstash_redis.ts b/langchain/src/storage/upstash_redis.ts index e790c64033db..c8732435bbeb 100644 --- a/langchain/src/storage/upstash_redis.ts +++ b/langchain/src/storage/upstash_redis.ts @@ -1 +1 @@ -export * from "@langchain/community/storage/upstash_redis"; \ No newline at end of file +export * from "@langchain/community/storage/upstash_redis"; diff --git a/langchain/src/storage/vercel_kv.ts b/langchain/src/storage/vercel_kv.ts index a31d97725508..1853b88290d7 100644 --- a/langchain/src/storage/vercel_kv.ts +++ b/langchain/src/storage/vercel_kv.ts @@ -1 +1 @@ -export * from "@langchain/community/storage/vercel_kv"; \ No newline at end of file +export * from "@langchain/community/storage/vercel_kv"; diff --git a/langchain/src/stores/doc/in_memory.ts b/langchain/src/stores/doc/in_memory.ts index f11220f66170..5b3db5facd35 100644 --- a/langchain/src/stores/doc/in_memory.ts +++ b/langchain/src/stores/doc/in_memory.ts @@ -1,113 +1 @@ -import { Document } from "../../document.js"; -import { Docstore } from "../../schema/index.js"; -import { BaseStoreInterface } from "../../schema/storage.js"; - -/** - * Class for storing and retrieving documents in memory asynchronously. - * Extends the Docstore class. - */ -export class InMemoryDocstore - extends Docstore - implements BaseStoreInterface -{ - _docs: Map; - - constructor(docs?: Map) { - super(); - this._docs = docs ?? new Map(); - } - - /** - * Searches for a document in the store based on its ID. - * @param search The ID of the document to search for. - * @returns The document with the given ID. - */ - async search(search: string): Promise { - const result = this._docs.get(search); - if (!result) { - throw new Error(`ID ${search} not found.`); - } else { - return result; - } - } - - /** - * Adds new documents to the store. - * @param texts An object where the keys are document IDs and the values are the documents themselves. - * @returns Void - */ - async add(texts: Record): Promise { - const keys = [...this._docs.keys()]; - const overlapping = Object.keys(texts).filter((x) => keys.includes(x)); - - if (overlapping.length > 0) { - throw new Error(`Tried to add ids that already exist: ${overlapping}`); - } - - for (const [key, value] of Object.entries(texts)) { - this._docs.set(key, value); - } - } - - async mget(keys: string[]): Promise { - return Promise.all(keys.map((key) => this.search(key))); - } - - async mset(keyValuePairs: [string, Document][]): Promise { - await Promise.all( - keyValuePairs.map(([key, value]) => this.add({ [key]: value })) - ); - } - - async mdelete(_keys: string[]): Promise { - throw new Error("Not implemented."); - } - - // eslint-disable-next-line require-yield - async *yieldKeys(_prefix?: string): AsyncGenerator { - throw new Error("Not implemented"); - } -} - -/** - * Class for storing and retrieving documents in memory synchronously. - */ -export class SynchronousInMemoryDocstore { - _docs: Map; - - constructor(docs?: Map) { - this._docs = docs ?? new Map(); - } - - /** - * Searches for a document in the store based on its ID. - * @param search The ID of the document to search for. - * @returns The document with the given ID. - */ - search(search: string): Document { - const result = this._docs.get(search); - if (!result) { - throw new Error(`ID ${search} not found.`); - } else { - return result; - } - } - - /** - * Adds new documents to the store. - * @param texts An object where the keys are document IDs and the values are the documents themselves. - * @returns Void - */ - add(texts: Record): void { - const keys = [...this._docs.keys()]; - const overlapping = Object.keys(texts).filter((x) => keys.includes(x)); - - if (overlapping.length > 0) { - throw new Error(`Tried to add ids that already exist: ${overlapping}`); - } - - for (const [key, value] of Object.entries(texts)) { - this._docs.set(key, value); - } - } -} +export * from "@langchain/community/stores/doc/in_memory"; diff --git a/langchain/src/stores/message/cassandra.ts b/langchain/src/stores/message/cassandra.ts index 7c675be8e570..88b1293512e5 100644 --- a/langchain/src/stores/message/cassandra.ts +++ b/langchain/src/stores/message/cassandra.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/cassandra"; \ No newline at end of file +export * from "@langchain/community/stores/message/cassandra"; diff --git a/langchain/src/stores/message/cloudflare_d1.ts b/langchain/src/stores/message/cloudflare_d1.ts index 46b21919631c..3dccbdea5a83 100644 --- a/langchain/src/stores/message/cloudflare_d1.ts +++ b/langchain/src/stores/message/cloudflare_d1.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/cloudflare_d1"; \ No newline at end of file +export * from "@langchain/community/stores/message/cloudflare_d1"; diff --git a/langchain/src/stores/message/convex.ts b/langchain/src/stores/message/convex.ts index 150aafb7efd1..f312698008e5 100644 --- a/langchain/src/stores/message/convex.ts +++ b/langchain/src/stores/message/convex.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/convex"; \ No newline at end of file +export * from "@langchain/community/stores/message/convex"; diff --git a/langchain/src/stores/message/dynamodb.ts b/langchain/src/stores/message/dynamodb.ts index 6329ed88f80b..11875415e05f 100644 --- a/langchain/src/stores/message/dynamodb.ts +++ b/langchain/src/stores/message/dynamodb.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/dynamodb"; \ No newline at end of file +export * from "@langchain/community/stores/message/dynamodb"; diff --git a/langchain/src/stores/message/firestore.ts b/langchain/src/stores/message/firestore.ts index a4486a6303a9..94de701fb5f5 100644 --- a/langchain/src/stores/message/firestore.ts +++ b/langchain/src/stores/message/firestore.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/firestore"; \ No newline at end of file +export * from "@langchain/community/stores/message/firestore"; diff --git a/langchain/src/stores/message/ioredis.ts b/langchain/src/stores/message/ioredis.ts index 203cf6a39e3e..2e854bc6c17e 100644 --- a/langchain/src/stores/message/ioredis.ts +++ b/langchain/src/stores/message/ioredis.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/ioredis"; \ No newline at end of file +export * from "@langchain/community/stores/message/ioredis"; diff --git a/langchain/src/stores/message/momento.ts b/langchain/src/stores/message/momento.ts index b1aa8e025ee6..060690357c95 100644 --- a/langchain/src/stores/message/momento.ts +++ b/langchain/src/stores/message/momento.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/momento"; \ No newline at end of file +export * from "@langchain/community/stores/message/momento"; diff --git a/langchain/src/stores/message/mongodb.ts b/langchain/src/stores/message/mongodb.ts index 333120d9f268..f4bb9a41cc6b 100644 --- a/langchain/src/stores/message/mongodb.ts +++ b/langchain/src/stores/message/mongodb.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/mongodb"; \ No newline at end of file +export * from "@langchain/community/stores/message/mongodb"; diff --git a/langchain/src/stores/message/planetscale.ts b/langchain/src/stores/message/planetscale.ts index 7f7a9018b027..bf090eb5fa95 100644 --- a/langchain/src/stores/message/planetscale.ts +++ b/langchain/src/stores/message/planetscale.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/planetscale"; \ No newline at end of file +export * from "@langchain/community/stores/message/planetscale"; diff --git a/langchain/src/stores/message/redis.ts b/langchain/src/stores/message/redis.ts index 0d46c48cea30..801f450bc9b7 100644 --- a/langchain/src/stores/message/redis.ts +++ b/langchain/src/stores/message/redis.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/redis"; \ No newline at end of file +export * from "@langchain/community/stores/message/redis"; diff --git a/langchain/src/stores/message/upstash_redis.ts b/langchain/src/stores/message/upstash_redis.ts index ffdb9ff36246..33cb724f14dd 100644 --- a/langchain/src/stores/message/upstash_redis.ts +++ b/langchain/src/stores/message/upstash_redis.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/upstash_redis"; \ No newline at end of file +export * from "@langchain/community/stores/message/upstash_redis"; diff --git a/langchain/src/stores/message/utils.ts b/langchain/src/stores/message/utils.ts index b490d23f9991..3a65c2b25031 100644 --- a/langchain/src/stores/message/utils.ts +++ b/langchain/src/stores/message/utils.ts @@ -1 +1,4 @@ -export { mapStoredMessagesToChatMessages, mapChatMessagesToStoredMessages } from "@langchain/core/messages"; \ No newline at end of file +export { + mapStoredMessagesToChatMessages, + mapChatMessagesToStoredMessages, +} from "@langchain/core/messages"; diff --git a/langchain/src/stores/message/xata.ts b/langchain/src/stores/message/xata.ts index 32b73221198b..9df54fb3b943 100644 --- a/langchain/src/stores/message/xata.ts +++ b/langchain/src/stores/message/xata.ts @@ -1 +1 @@ -export * from "@langchain/community/stores/message/xata"; \ No newline at end of file +export * from "@langchain/community/stores/message/xata"; diff --git a/langchain/src/util/math.ts b/langchain/src/util/math.ts index fe703c2d5f79..5c1f37d1b0aa 100644 --- a/langchain/src/util/math.ts +++ b/langchain/src/util/math.ts @@ -1,180 +1 @@ -import { - similarity as ml_distance_similarity, - distance as ml_distance, -} from "ml-distance"; - -type VectorFunction = (xVector: number[], yVector: number[]) => number; - -/** - * Apply a row-wise function between two matrices with the same number of columns. - * - * @param {number[][]} X - The first matrix. - * @param {number[][]} Y - The second matrix. - * @param {VectorFunction} func - The function to apply. - * - * @throws {Error} If the number of columns in X and Y are not the same. - * - * @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y. - */ - -export function matrixFunc( - X: number[][], - Y: number[][], - func: VectorFunction -): number[][] { - if ( - X.length === 0 || - X[0].length === 0 || - Y.length === 0 || - Y[0].length === 0 - ) { - return [[]]; - } - - if (X[0].length !== Y[0].length) { - throw new Error( - `Number of columns in X and Y must be the same. X has shape ${[ - X.length, - X[0].length, - ]} and Y has shape ${[Y.length, Y[0].length]}.` - ); - } - - return X.map((xVector) => - Y.map((yVector) => func(xVector, yVector)).map((similarity) => - Number.isNaN(similarity) ? 0 : similarity - ) - ); -} - -export function normalize(M: number[][], similarity = false): number[][] { - const max = matrixMaxVal(M); - return M.map((row) => - row.map((val) => (similarity ? 1 - val / max : val / max)) - ); -} - -/** - * This function calculates the row-wise cosine similarity between two matrices with the same number of columns. - * - * @param {number[][]} X - The first matrix. - * @param {number[][]} Y - The second matrix. - * - * @throws {Error} If the number of columns in X and Y are not the same. - * - * @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y. - */ -export function cosineSimilarity(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance_similarity.cosine); -} - -export function innerProduct(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.innerProduct); -} - -export function euclideanDistance(X: number[][], Y: number[][]): number[][] { - return matrixFunc(X, Y, ml_distance.euclidean); -} - -/** - * This function implements the Maximal Marginal Relevance algorithm - * to select a set of embeddings that maximizes the diversity and relevance to a query embedding. - * - * @param {number[]|number[][]} queryEmbedding - The query embedding. - * @param {number[][]} embeddingList - The list of embeddings to select from. - * @param {number} [lambda=0.5] - The trade-off parameter between relevance and diversity. - * @param {number} [k=4] - The maximum number of embeddings to select. - * - * @returns {number[]} The indexes of the selected embeddings in the embeddingList. - */ -export function maximalMarginalRelevance( - queryEmbedding: number[] | number[][], - embeddingList: number[][], - lambda = 0.5, - k = 4 -): number[] { - if (Math.min(k, embeddingList.length) <= 0) { - return []; - } - - const queryEmbeddingExpanded = ( - Array.isArray(queryEmbedding[0]) ? queryEmbedding : [queryEmbedding] - ) as number[][]; - - const similarityToQuery = cosineSimilarity( - queryEmbeddingExpanded, - embeddingList - )[0]; - const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex; - - const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]]; - const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex]; - - while (selectedEmbeddingsIndexes.length < Math.min(k, embeddingList.length)) { - let bestScore = -Infinity; - let bestIndex = -1; - - const similarityToSelected = cosineSimilarity( - embeddingList, - selectedEmbeddings - ); - - similarityToQuery.forEach((queryScore, queryScoreIndex) => { - if (selectedEmbeddingsIndexes.includes(queryScoreIndex)) { - return; - } - const maxSimilarityToSelected = Math.max( - ...similarityToSelected[queryScoreIndex] - ); - const score = - lambda * queryScore - (1 - lambda) * maxSimilarityToSelected; - - if (score > bestScore) { - bestScore = score; - bestIndex = queryScoreIndex; - } - }); - selectedEmbeddings.push(embeddingList[bestIndex]); - selectedEmbeddingsIndexes.push(bestIndex); - } - - return selectedEmbeddingsIndexes; -} - -type MaxInfo = { - maxIndex: number; - maxValue: number; -}; - -/** - * Finds the index of the maximum value in the given array. - * @param {number[]} array - The input array. - * - * @returns {number} The index of the maximum value in the array. If the array is empty, returns -1. - */ -function argMax(array: number[]): MaxInfo { - if (array.length === 0) { - return { - maxIndex: -1, - maxValue: NaN, - }; - } - - let maxValue = array[0]; - let maxIndex = 0; - - for (let i = 1; i < array.length; i += 1) { - if (array[i] > maxValue) { - maxIndex = i; - maxValue = array[i]; - } - } - return { maxIndex, maxValue }; -} - -function matrixMaxVal(arrays: number[][]): number { - return arrays.reduce( - (acc, array) => Math.max(acc, argMax(array).maxValue), - 0 - ); -} +export * from "@langchain/core/utils/math"; diff --git a/langchain/src/vectorstores/analyticdb.ts b/langchain/src/vectorstores/analyticdb.ts index 15f0eb6d831f..99b074dc0ace 100644 --- a/langchain/src/vectorstores/analyticdb.ts +++ b/langchain/src/vectorstores/analyticdb.ts @@ -1,390 +1 @@ -import * as uuid from "uuid"; -import pg, { Pool, PoolConfig } from "pg"; -import { from as copyFrom } from "pg-copy-streams"; -import { pipeline } from "node:stream/promises"; -import { Readable } from "node:stream"; - -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -const _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document"; - -/** - * Interface defining the arguments required to create an instance of - * `AnalyticDBVectorStore`. - */ -export interface AnalyticDBArgs { - connectionOptions: PoolConfig; - embeddingDimension?: number; - collectionName?: string; - preDeleteCollection?: boolean; -} - -/** - * Interface defining the structure of data to be stored in the - * AnalyticDB. - */ -interface DataType { - id: string; - embedding: number[]; - document: string; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadata: Record; -} - -/** - * Class that provides methods for creating and managing a collection of - * documents in an AnalyticDB, adding documents or vectors to the - * collection, performing similarity search on vectors, and creating an - * instance of `AnalyticDBVectorStore` from texts or documents. - */ -export class AnalyticDBVectorStore extends VectorStore { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - declare FilterType: Record; - - private pool: Pool; - - private embeddingDimension?: number; - - private collectionName: string; - - private preDeleteCollection: boolean; - - private isCreateCollection = false; - - _vectorstoreType(): string { - return "analyticdb"; - } - - constructor(embeddings: Embeddings, args: AnalyticDBArgs) { - super(embeddings, args); - - this.pool = new pg.Pool({ - host: args.connectionOptions.host, - port: args.connectionOptions.port, - database: args.connectionOptions.database, - user: args.connectionOptions.user, - password: args.connectionOptions.password, - }); - this.embeddingDimension = args.embeddingDimension; - this.collectionName = - args.collectionName || _LANGCHAIN_DEFAULT_COLLECTION_NAME; - this.preDeleteCollection = args.preDeleteCollection || false; - } - - /** - * Closes all the clients in the pool and terminates the pool. - * @returns Promise that resolves when all clients are closed and the pool is terminated. - */ - async end(): Promise { - return this.pool.end(); - } - - /** - * Creates a new table in the database if it does not already exist. The - * table is created with columns for id, embedding, document, and - * metadata. An index is also created on the embedding column if it does - * not already exist. - * @returns Promise that resolves when the table and index are created. - */ - async createTableIfNotExists(): Promise { - if (!this.embeddingDimension) { - this.embeddingDimension = ( - await this.embeddings.embedQuery("test") - ).length; - } - const client = await this.pool.connect(); - try { - await client.query("BEGIN"); - // Create the table if it doesn't exist - await client.query(` - CREATE TABLE IF NOT EXISTS ${this.collectionName} ( - id TEXT PRIMARY KEY DEFAULT NULL, - embedding REAL[], - document TEXT, - metadata JSON - ); - `); - - // Check if the index exists - const indexName = `${this.collectionName}_embedding_idx`; - const indexQuery = ` - SELECT 1 - FROM pg_indexes - WHERE indexname = '${indexName}'; - `; - const result = await client.query(indexQuery); - - // Create the index if it doesn't exist - if (result.rowCount === 0) { - const indexStatement = ` - CREATE INDEX ${indexName} - ON ${this.collectionName} USING ann(embedding) - WITH ( - "dim" = ${this.embeddingDimension}, - "hnsw_m" = 100 - ); - `; - await client.query(indexStatement); - } - await client.query("COMMIT"); - } catch (err) { - await client.query("ROLLBACK"); - throw err; - } finally { - client.release(); - } - } - - /** - * Deletes the collection from the database if it exists. - * @returns Promise that resolves when the collection is deleted. - */ - async deleteCollection(): Promise { - const dropStatement = `DROP TABLE IF EXISTS ${this.collectionName};`; - await this.pool.query(dropStatement); - } - - /** - * Creates a new collection in the database. If `preDeleteCollection` is - * true, any existing collection with the same name is deleted before the - * new collection is created. - * @returns Promise that resolves when the collection is created. - */ - async createCollection(): Promise { - if (this.preDeleteCollection) { - await this.deleteCollection(); - } - await this.createTableIfNotExists(); - this.isCreateCollection = true; - } - - /** - * Adds an array of documents to the collection. The documents are first - * converted to vectors using the `embedDocuments` method of the - * `embeddings` instance. - * @param documents Array of Document instances to be added to the collection. - * @returns Promise that resolves when the documents are added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Adds an array of vectors and corresponding documents to the collection. - * The vectors and documents are batch inserted into the database. - * @param vectors Array of vectors to be added to the collection. - * @param documents Array of Document instances corresponding to the vectors. - * @returns Promise that resolves when the vectors and documents are added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - if (vectors.length !== documents.length) { - throw new Error(`Vectors and documents must have the same length`); - } - if (!this.embeddingDimension) { - this.embeddingDimension = ( - await this.embeddings.embedQuery("test") - ).length; - } - if (vectors[0].length !== this.embeddingDimension) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${this.embeddingDimension})` - ); - } - - if (!this.isCreateCollection) { - await this.createCollection(); - } - - const client = await this.pool.connect(); - try { - const chunkSize = 500; - const chunksTableData: DataType[] = []; - - for (let i = 0; i < documents.length; i += 1) { - chunksTableData.push({ - id: uuid.v4(), - embedding: vectors[i], - document: documents[i].pageContent, - metadata: documents[i].metadata, - }); - - // Execute the batch insert when the batch size is reached - if (chunksTableData.length === chunkSize) { - const rs = new Readable(); - let currentIndex = 0; - rs._read = function () { - if (currentIndex === chunkSize) { - rs.push(null); - } else { - const data = chunksTableData[currentIndex]; - rs.push( - `${data.id}\t{${data.embedding.join(",")}}\t${ - data.document - }\t${JSON.stringify(data.metadata)}\n` - ); - currentIndex += 1; - } - }; - const ws = client.query( - copyFrom( - `COPY ${this.collectionName}(id, embedding, document, metadata) FROM STDIN` - ) - ); - - await pipeline(rs, ws); - // Clear the chunksTableData list for the next batch - chunksTableData.length = 0; - } - } - - // Insert any remaining records that didn't make up a full batch - if (chunksTableData.length > 0) { - const rs = new Readable(); - let currentIndex = 0; - rs._read = function () { - if (currentIndex === chunksTableData.length) { - rs.push(null); - } else { - const data = chunksTableData[currentIndex]; - rs.push( - `${data.id}\t{${data.embedding.join(",")}}\t${ - data.document - }\t${JSON.stringify(data.metadata)}\n` - ); - currentIndex += 1; - } - }; - const ws = client.query( - copyFrom( - `COPY ${this.collectionName}(id, embedding, document, metadata) FROM STDIN` - ) - ); - await pipeline(rs, ws); - } - } finally { - client.release(); - } - } - - /** - * Performs a similarity search on the vectors in the collection. The - * search is performed using the given query vector and returns the top k - * most similar vectors along with their corresponding documents and - * similarity scores. - * @param query Query vector for the similarity search. - * @param k Number of top similar vectors to return. - * @param filter Optional. Filter to apply on the metadata of the documents. - * @returns Promise that resolves to an array of tuples, each containing a Document instance and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - if (!this.isCreateCollection) { - await this.createCollection(); - } - - let filterCondition = ""; - const filterEntries = filter ? Object.entries(filter) : []; - if (filterEntries.length > 0) { - const conditions = filterEntries.map( - (_, index) => `metadata->>$${2 * index + 3} = $${2 * index + 4}` - ); - filterCondition = `WHERE ${conditions.join(" AND ")}`; - } - - const sqlQuery = ` - SELECT *, l2_distance(embedding, $1::real[]) AS distance - FROM ${this.collectionName} - ${filterCondition} - ORDER BY embedding <-> $1 - LIMIT $2; - `; - - // Execute the query and fetch the results - const { rows } = await this.pool.query(sqlQuery, [ - query, - k, - ...filterEntries.flatMap(([key, value]) => [key, value]), - ]); - - const result: [Document, number][] = rows.map((row) => [ - new Document({ pageContent: row.document, metadata: row.metadata }), - row.distance, - ]); - - return result; - } - - /** - * Creates an instance of `AnalyticDBVectorStore` from an array of texts - * and corresponding metadata. The texts are first converted to Document - * instances before being added to the collection. - * @param texts Array of texts to be added to the collection. - * @param metadatas Array or object of metadata corresponding to the texts. - * @param embeddings Embeddings instance used to convert the texts to vectors. - * @param dbConfig Configuration for the AnalyticDB. - * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: AnalyticDBArgs - ): Promise { - const docs = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return AnalyticDBVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates an instance of `AnalyticDBVectorStore` from an array of - * Document instances. The documents are added to the collection. - * @param docs Array of Document instances to be added to the collection. - * @param embeddings Embeddings instance used to convert the documents to vectors. - * @param dbConfig Configuration for the AnalyticDB. - * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: AnalyticDBArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Creates an instance of `AnalyticDBVectorStore` from an existing index - * in the database. A new collection is created in the database. - * @param embeddings Embeddings instance used to convert the documents to vectors. - * @param dbConfig Configuration for the AnalyticDB. - * @returns Promise that resolves to an instance of `AnalyticDBVectorStore`. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: AnalyticDBArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.createCollection(); - return instance; - } -} +export * from "@langchain/community/vectorstores/analyticdb"; diff --git a/langchain/src/vectorstores/base.ts b/langchain/src/vectorstores/base.ts index 29701d6b091b..58e6d1c589ed 100644 --- a/langchain/src/vectorstores/base.ts +++ b/langchain/src/vectorstores/base.ts @@ -1,299 +1 @@ -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { BaseRetriever, BaseRetrieverInput } from "../schema/retriever.js"; -import { Serializable } from "../load/serializable.js"; -import { - CallbackManagerForRetrieverRun, - Callbacks, -} from "../callbacks/manager.js"; - -/** - * Type for options when adding a document to the VectorStore. - */ -// eslint-disable-next-line @typescript-eslint/no-explicit-any -type AddDocumentOptions = Record; - -/** - * Type for options when performing a maximal marginal relevance search. - */ -export type MaxMarginalRelevanceSearchOptions = { - k: number; - fetchK?: number; - lambda?: number; - filter?: FilterType; -}; - -/** - * Type for options when performing a maximal marginal relevance search - * with the VectorStoreRetriever. - */ -export type VectorStoreRetrieverMMRSearchKwargs = { - fetchK?: number; - lambda?: number; -}; - -/** - * Type for input when creating a VectorStoreRetriever instance. - */ -export type VectorStoreRetrieverInput = - BaseRetrieverInput & - ( - | { - vectorStore: V; - k?: number; - filter?: V["FilterType"]; - searchType?: "similarity"; - } - | { - vectorStore: V; - k?: number; - filter?: V["FilterType"]; - searchType: "mmr"; - searchKwargs?: VectorStoreRetrieverMMRSearchKwargs; - } - ); - -/** - * Class for performing document retrieval from a VectorStore. Can perform - * similarity search or maximal marginal relevance search. - */ -export class VectorStoreRetriever< - V extends VectorStore = VectorStore -> extends BaseRetriever { - static lc_name() { - return "VectorStoreRetriever"; - } - - get lc_namespace() { - return ["langchain", "retrievers", "base"]; - } - - vectorStore: V; - - k = 4; - - searchType = "similarity"; - - searchKwargs?: VectorStoreRetrieverMMRSearchKwargs; - - filter?: V["FilterType"]; - - _vectorstoreType(): string { - return this.vectorStore._vectorstoreType(); - } - - constructor(fields: VectorStoreRetrieverInput) { - super(fields); - this.vectorStore = fields.vectorStore; - this.k = fields.k ?? this.k; - this.searchType = fields.searchType ?? this.searchType; - this.filter = fields.filter; - if (fields.searchType === "mmr") { - this.searchKwargs = fields.searchKwargs; - } - } - - async _getRelevantDocuments( - query: string, - runManager?: CallbackManagerForRetrieverRun - ): Promise { - if (this.searchType === "mmr") { - if (typeof this.vectorStore.maxMarginalRelevanceSearch !== "function") { - throw new Error( - `The vector store backing this retriever, ${this._vectorstoreType()} does not support max marginal relevance search.` - ); - } - return this.vectorStore.maxMarginalRelevanceSearch( - query, - { - k: this.k, - filter: this.filter, - ...this.searchKwargs, - }, - runManager?.getChild("vectorstore") - ); - } - return this.vectorStore.similaritySearch( - query, - this.k, - this.filter, - runManager?.getChild("vectorstore") - ); - } - - async addDocuments( - documents: Document[], - options?: AddDocumentOptions - ): Promise { - return this.vectorStore.addDocuments(documents, options); - } -} - -/** - * Abstract class representing a store of vectors. Provides methods for - * adding vectors and documents, deleting from the store, and searching - * the store. - */ -export abstract class VectorStore extends Serializable { - declare FilterType: object | string; - - lc_namespace = ["langchain", "vectorstores", this._vectorstoreType()]; - - embeddings: Embeddings; - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - constructor(embeddings: Embeddings, dbConfig: Record) { - super(dbConfig); - this.embeddings = embeddings; - } - - abstract _vectorstoreType(): string; - - abstract addVectors( - vectors: number[][], - documents: Document[], - options?: AddDocumentOptions - ): Promise; - - abstract addDocuments( - documents: Document[], - options?: AddDocumentOptions - ): Promise; - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - async delete(_params?: Record): Promise { - throw new Error("Not implemented."); - } - - abstract similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]>; - - async similaritySearch( - query: string, - k = 4, - filter: this["FilterType"] | undefined = undefined, - _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later - ): Promise { - const results = await this.similaritySearchVectorWithScore( - await this.embeddings.embedQuery(query), - k, - filter - ); - - return results.map((result) => result[0]); - } - - async similaritySearchWithScore( - query: string, - k = 4, - filter: this["FilterType"] | undefined = undefined, - _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later - ): Promise<[Document, number][]> { - return this.similaritySearchVectorWithScore( - await this.embeddings.embedQuery(query), - k, - filter - ); - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {this["FilterType"]} options.filter - Optional filter - * @param _callbacks - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch?( - query: string, - options: MaxMarginalRelevanceSearchOptions, - _callbacks: Callbacks | undefined // implement passing to embedQuery later - ): Promise; - - static fromTexts( - _texts: string[], - _metadatas: object[] | object, - _embeddings: Embeddings, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - _dbConfig: Record - ): Promise { - throw new Error( - "the Langchain vectorstore implementation you are using forgot to override this, please report a bug" - ); - } - - static fromDocuments( - _docs: Document[], - _embeddings: Embeddings, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - _dbConfig: Record - ): Promise { - throw new Error( - "the Langchain vectorstore implementation you are using forgot to override this, please report a bug" - ); - } - - asRetriever( - kOrFields?: number | Partial>, - filter?: this["FilterType"], - callbacks?: Callbacks, - tags?: string[], - metadata?: Record, - verbose?: boolean - ): VectorStoreRetriever { - if (typeof kOrFields === "number") { - return new VectorStoreRetriever({ - vectorStore: this, - k: kOrFields, - filter, - tags: [...(tags ?? []), this._vectorstoreType()], - metadata, - verbose, - callbacks, - }); - } else { - const params = { - vectorStore: this, - k: kOrFields?.k, - filter: kOrFields?.filter, - tags: [...(kOrFields?.tags ?? []), this._vectorstoreType()], - metadata: kOrFields?.metadata, - verbose: kOrFields?.verbose, - callbacks: kOrFields?.callbacks, - searchType: kOrFields?.searchType, - }; - if (kOrFields?.searchType === "mmr") { - return new VectorStoreRetriever({ - ...params, - searchKwargs: kOrFields.searchKwargs, - }); - } - return new VectorStoreRetriever({ ...params }); - } - } -} - -/** - * Abstract class extending VectorStore with functionality for saving and - * loading the vector store. - */ -export abstract class SaveableVectorStore extends VectorStore { - abstract save(directory: string): Promise; - - static load( - _directory: string, - _embeddings: Embeddings - ): Promise { - throw new Error("Not implemented"); - } -} +export * from "@langchain/core/vectorstores"; diff --git a/langchain/src/vectorstores/cassandra.ts b/langchain/src/vectorstores/cassandra.ts index d3938d4a6b5f..6c6e084674a4 100644 --- a/langchain/src/vectorstores/cassandra.ts +++ b/langchain/src/vectorstores/cassandra.ts @@ -1,581 +1 @@ -/* eslint-disable prefer-template */ -import { Client as CassandraClient, DseClientOptions } from "cassandra-driver"; - -import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -export interface Column { - type: string; - name: string; - partition?: boolean; -} - -export interface Index { - name: string; - value: string; -} - -export interface Filter { - name: string; - value: unknown; - operator?: string; -} - -export type WhereClause = Filter[] | Filter | Record; - -export type SupportedVectorTypes = "cosine" | "dot_product" | "euclidean"; - -export interface CassandraLibArgs extends DseClientOptions, AsyncCallerParams { - table: string; - keyspace: string; - vectorType?: SupportedVectorTypes; - dimensions: number; - primaryKey: Column | Column[]; - metadataColumns: Column[]; - withClause?: string; - indices?: Index[]; - batchSize?: number; -} - -/** - * Class for interacting with the Cassandra database. It extends the - * VectorStore class and provides methods for adding vectors and - * documents, searching for similar vectors, and creating instances from - * texts or documents. - */ -export class CassandraStore extends VectorStore { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - declare FilterType: WhereClause; - - private client: CassandraClient; - - private readonly vectorType: SupportedVectorTypes; - - private readonly dimensions: number; - - private readonly keyspace: string; - - private primaryKey: Column[]; - - private metadataColumns: Column[]; - - private withClause: string; - - private selectColumns: string; - - private readonly table: string; - - private indices: Index[]; - - private isInitialized = false; - - asyncCaller: AsyncCaller; - - private readonly batchSize: number; - - _vectorstoreType(): string { - return "cassandra"; - } - - constructor(embeddings: Embeddings, args: CassandraLibArgs) { - super(embeddings, args); - - const { - indices = [], - maxConcurrency = 25, - withClause = "", - batchSize = 1, - vectorType = "cosine", - dimensions, - keyspace, - table, - primaryKey, - metadataColumns, - } = args; - - const argsWithDefaults = { - ...args, - indices, - maxConcurrency, - withClause, - batchSize, - vectorType, - }; - this.asyncCaller = new AsyncCaller(argsWithDefaults); - this.client = new CassandraClient(argsWithDefaults); - - // Assign properties - this.vectorType = vectorType; - this.dimensions = dimensions; - this.keyspace = keyspace; - this.table = table; - this.primaryKey = Array.isArray(primaryKey) ? primaryKey : [primaryKey]; - this.metadataColumns = metadataColumns; - this.withClause = withClause.trim().replace(/^with\s*/i, ""); - this.indices = indices; - this.batchSize = batchSize >= 1 ? batchSize : 1; - } - - /** - * Method to save vectors to the Cassandra database. - * @param vectors Vectors to save. - * @param documents The documents associated with the vectors. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - - if (!this.isInitialized) { - await this.initialize(); - } - - await this.insertAll(vectors, documents); - } - - /** - * Method to add documents to the Cassandra database. - * @param documents The documents to add. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - return this.addVectors( - await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), - documents - ); - } - - /** - * Method to search for vectors that are similar to a given query vector. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter - * @returns Promise that resolves with an array of tuples, each containing a Document and a score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: WhereClause - ): Promise<[Document, number][]> { - if (!this.isInitialized) { - await this.initialize(); - } - - // Ensure we have an array of Filter from the public interface - const filters = this.asFilters(filter); - - const queryStr = this.buildSearchQuery(filters); - - // Search query will be of format: - // SELECT ..., text, similarity_x(?) AS similarity_score - // FROM ... - // - // ORDER BY vector ANN OF ? - // LIMIT ? - // If any filter values are specified, they will be in the WHERE clause as - // filter.name filter.operator ? - // queryParams is a list of bind variables sent with the prepared statement - const queryParams = []; - const vectorAsFloat32Array = new Float32Array(query); - queryParams.push(vectorAsFloat32Array); - if (filters) { - const values = (filters as Filter[]).map(({ value }) => value); - queryParams.push(...values); - } - queryParams.push(vectorAsFloat32Array); - queryParams.push(k); - - const queryResultSet = await this.client.execute(queryStr, queryParams, { - prepare: true, - }); - - return queryResultSet?.rows.map((row) => { - const textContent = row.text; - const sanitizedRow = { ...row }; - delete sanitizedRow.text; - delete sanitizedRow.similarity_score; - - // A null value in Cassandra evaluates to a deleted column - // as this is treated as a tombstone record for the cell. - Object.keys(sanitizedRow).forEach((key) => { - if (sanitizedRow[key] === null) { - delete sanitizedRow[key]; - } - }); - - return [ - new Document({ pageContent: textContent, metadata: sanitizedRow }), - row.similarity_score, - ]; - }); - } - - /** - * Static method to create an instance of CassandraStore from texts. - * @param texts The texts to use. - * @param metadatas The metadata associated with the texts. - * @param embeddings The embeddings to use. - * @param args The arguments for the CassandraStore. - * @returns Promise that resolves with a new instance of CassandraStore. - */ - static async fromTexts( - texts: string[], - metadatas: object | object[], - embeddings: Embeddings, - args: CassandraLibArgs - ): Promise { - const docs: Document[] = []; - - for (let index = 0; index < texts.length; index += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[index] : metadatas; - const doc = new Document({ - pageContent: texts[index], - metadata, - }); - docs.push(doc); - } - - return CassandraStore.fromDocuments(docs, embeddings, args); - } - - /** - * Static method to create an instance of CassandraStore from documents. - * @param docs The documents to use. - * @param embeddings The embeddings to use. - * @param args The arguments for the CassandraStore. - * @returns Promise that resolves with a new instance of CassandraStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args: CassandraLibArgs - ): Promise { - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create an instance of CassandraStore from an existing - * index. - * @param embeddings The embeddings to use. - * @param args The arguments for the CassandraStore. - * @returns Promise that resolves with a new instance of CassandraStore. - */ - static async fromExistingIndex( - embeddings: Embeddings, - args: CassandraLibArgs - ): Promise { - const instance = new this(embeddings, args); - - await instance.initialize(); - return instance; - } - - /** - * Method to initialize the Cassandra database. - * @returns Promise that resolves when the database has been initialized. - */ - private async initialize(): Promise { - let cql = ""; - cql = `CREATE TABLE IF NOT EXISTS ${this.keyspace}.${this.table} ( - ${this.primaryKey.map((col) => `${col.name} ${col.type}`).join(", ")} - , text TEXT - ${ - this.metadataColumns.length > 0 - ? ", " + - this.metadataColumns - .map((col) => `${col.name} ${col.type}`) - .join(", ") - : "" - } - , vector VECTOR - , ${this.buildPrimaryKey(this.primaryKey)} - ) ${this.withClause ? `WITH ${this.withClause}` : ""};`; - - await this.client.execute(cql); - - this.selectColumns = `${this.primaryKey - .map((col) => `${col.name}`) - .join(", ")} - ${ - this.metadataColumns.length > 0 - ? ", " + - this.metadataColumns - .map((col) => `${col.name}`) - .join(", ") - : "" - }`; - - cql = `CREATE CUSTOM INDEX IF NOT EXISTS idx_vector_${this.table} - ON ${this.keyspace}.${ - this.table - }(vector) USING 'StorageAttachedIndex' WITH OPTIONS = {'similarity_function': '${this.vectorType.toUpperCase()}'};`; - await this.client.execute(cql); - - for await (const { name, value } of this.indices) { - cql = `CREATE CUSTOM INDEX IF NOT EXISTS idx_${this.table}_${name} - ON ${this.keyspace}.${this.table} ${value} USING 'StorageAttachedIndex';`; - await this.client.execute(cql); - } - this.isInitialized = true; - } - - /** - * Method to build the PRIMARY KEY clause for CREATE TABLE. - * @param columns: list of Column to include in the key - * @returns The clause, including PRIMARY KEY - */ - private buildPrimaryKey(columns: Column[]): string { - // Partition columns may be specified with optional attribute col.partition - const partitionColumns = columns - .filter((col) => col.partition) - .map((col) => col.name) - .join(", "); - - // All columns not part of the partition key are clustering columns - const clusteringColumns = columns - .filter((col) => !col.partition) - .map((col) => col.name) - .join(", "); - - let primaryKey = ""; - - // If partition columns are specified, they are included in a () wrapper - // If not, the clustering columns are used, and the first clustering column - // is the partition key per normal Cassandra behaviour. - if (partitionColumns) { - primaryKey = `PRIMARY KEY ((${partitionColumns}), ${clusteringColumns})`; - } else { - primaryKey = `PRIMARY KEY (${clusteringColumns})`; - } - - return primaryKey; - } - - /** - * Type guard to check if an object is a Filter. - * @param obj: the object to check - * @returns boolean indicating if the object is a Filter - */ - private isFilter(obj: unknown): obj is Filter { - return ( - typeof obj === "object" && obj !== null && "name" in obj && "value" in obj - ); - } - - /** - * Helper to convert Record to a Filter[] - * @param record: a key-value Record collection - * @returns Record as a Filter[] - */ - private convertToFilters(record: Record): Filter[] { - return Object.entries(record).map(([name, value]) => ({ - name, - value, - operator: "=", - })); - } - - /** - * Input santisation method for filters, as FilterType is not required to be - * Filter[], but we want to use Filter[] internally. - * @param record: the proposed filter - * @returns A Filter[], which may be empty - */ - private asFilters(record: WhereClause | undefined): Filter[] { - if (!record) { - return []; - } - - // If record is already an array - if (Array.isArray(record)) { - return record.flatMap((item) => { - // Check if item is a Filter before passing it to convertToFilters - if (this.isFilter(item)) { - return [item]; - } else { - // Here item is treated as Record - return this.convertToFilters(item); - } - }); - } - - // If record is a single Filter object, return it in an array - if (this.isFilter(record)) { - return [record]; - } - - // If record is a Record, convert it to an array of Filter - return this.convertToFilters(record); - } - - /** - * Method to build the WHERE clause of a CQL query, using bind variable ? - * @param filters list of filters to include in the WHERE clause - * @returns The WHERE clause - */ - private buildWhereClause(filters?: Filter[]): string { - if (!filters || filters.length === 0) { - return ""; - } - - const whereConditions = filters.map( - ({ name, operator = "=" }) => `${name} ${operator} ?` - ); - - return `WHERE ${whereConditions.join(" AND ")}`; - } - - /** - * Method to build an CQL query for searching for similar vectors in the - * Cassandra database. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filters - * @returns The CQL query string. - */ - private buildSearchQuery(filters: Filter[]): string { - const whereClause = filters ? this.buildWhereClause(filters) : ""; - - const cqlQuery = `SELECT ${this.selectColumns}, text, similarity_${this.vectorType}(vector, ?) AS similarity_score - FROM ${this.keyspace}.${this.table} ${whereClause} ORDER BY vector ANN OF ? LIMIT ?`; - - return cqlQuery; - } - - /** - * Method for inserting vectors and documents into the Cassandra database in a batch. - * @param batchVectors The list of vectors to insert. - * @param batchDocuments The list of documents to insert. - * @returns Promise that resolves when the batch has been inserted. - */ - private async executeInsert( - batchVectors: number[][], - batchDocuments: Document[] - ): Promise { - // Input validation: Check if the lengths of batchVectors and batchDocuments are the same - if (batchVectors.length !== batchDocuments.length) { - throw new Error( - `The lengths of vectors (${batchVectors.length}) and documents (${batchDocuments.length}) must be the same.` - ); - } - - // Initialize an array to hold query objects - const queries = []; - - // Loop through each vector and document in the batch - for (let i = 0; i < batchVectors.length; i += 1) { - // Convert the list of numbers to a Float32Array, the driver's expected format of a vector - const preparedVector = new Float32Array(batchVectors[i]); - // Retrieve the corresponding document - const document = batchDocuments[i]; - - // Extract metadata column names and values from the document - const metadataColNames = Object.keys(document.metadata); - const metadataVals = Object.values(document.metadata); - - // Prepare the metadata columns string for the query, if metadata exists - const metadataInsert = - metadataColNames.length > 0 ? ", " + metadataColNames.join(", ") : ""; - - // Construct the query string and parameters - const query = { - query: `INSERT INTO ${this.keyspace}.${ - this.table - } (vector, text${metadataInsert}) - VALUES (?, ?${", ?".repeat(metadataColNames.length)})`, - params: [preparedVector, document.pageContent, ...metadataVals], - }; - - // Add the query to the list - queries.push(query); - } - - // Execute the queries: use a batch if multiple, otherwise execute a single query - if (queries.length === 1) { - await this.client.execute(queries[0].query, queries[0].params, { - prepare: true, - }); - } else { - await this.client.batch(queries, { prepare: true, logged: false }); - } - } - - /** - * Method for inserting vectors and documents into the Cassandra database in - * parallel, keeping within maxConcurrency number of active insert statements. - * @param vectors The vectors to insert. - * @param documents The documents to insert. - * @returns Promise that resolves when the documents have been added. - */ - private async insertAll( - vectors: number[][], - documents: Document[] - ): Promise { - // Input validation: Check if the lengths of vectors and documents are the same - if (vectors.length !== documents.length) { - throw new Error( - `The lengths of vectors (${vectors.length}) and documents (${documents.length}) must be the same.` - ); - } - - // Early exit: If there are no vectors or documents to insert, return immediately - if (vectors.length === 0) { - return; - } - - // Ensure the store is initialized before proceeding - if (!this.isInitialized) { - await this.initialize(); - } - - // Initialize an array to hold promises for each batch insert - const insertPromises: Promise[] = []; - - // Buffers to hold the current batch of vectors and documents - let currentBatchVectors: number[][] = []; - let currentBatchDocuments: Document[] = []; - - // Loop through each vector/document pair to insert; we use - // <= vectors.length to ensure the last batch is inserted - for (let i = 0; i <= vectors.length; i += 1) { - // Check if we're still within the array boundaries - if (i < vectors.length) { - // Add the current vector and document to the batch - currentBatchVectors.push(vectors[i]); - currentBatchDocuments.push(documents[i]); - } - - // Check if we've reached the batch size or end of the array - if ( - currentBatchVectors.length >= this.batchSize || - i === vectors.length - ) { - // Only proceed if there are items in the current batch - if (currentBatchVectors.length > 0) { - // Create copies of the current batch arrays to use in the async insert operation - const batchVectors = [...currentBatchVectors]; - const batchDocuments = [...currentBatchDocuments]; - - // Execute the insert using the AsyncCaller - it will handle concurrency and queueing. - insertPromises.push( - this.asyncCaller.call(() => - this.executeInsert(batchVectors, batchDocuments) - ) - ); - - // Clear the current buffers for the next iteration - currentBatchVectors = []; - currentBatchDocuments = []; - } - } - } - - // Wait for all insert operations to complete. - await Promise.all(insertPromises); - } -} +export * from "@langchain/community/vectorstores/cassandra"; diff --git a/langchain/src/vectorstores/chroma.ts b/langchain/src/vectorstores/chroma.ts index 6b52b8c6f1b7..583129d43503 100644 --- a/langchain/src/vectorstores/chroma.ts +++ b/langchain/src/vectorstores/chroma.ts @@ -1,364 +1 @@ -import * as uuid from "uuid"; -import type { ChromaClient as ChromaClientT, Collection } from "chromadb"; -import type { CollectionMetadata, Where } from "chromadb/dist/main/types.js"; - -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -/** - * Defines the arguments that can be passed to the `Chroma` class - * constructor. It can either contain a `url` for the Chroma database, the - * number of dimensions for the vectors (`numDimensions`), a - * `collectionName` for the collection to be used in the database, and a - * `filter` object; or it can contain an `index` which is an instance of - * `ChromaClientT`, along with the `numDimensions`, `collectionName`, and - * `filter`. - */ -export type ChromaLibArgs = - | { - url?: string; - numDimensions?: number; - collectionName?: string; - filter?: object; - collectionMetadata?: CollectionMetadata; - } - | { - index?: ChromaClientT; - numDimensions?: number; - collectionName?: string; - filter?: object; - collectionMetadata?: CollectionMetadata; - }; - -/** - * Defines the parameters for the `delete` method in the `Chroma` class. - * It can either contain an array of `ids` of the documents to be deleted - * or a `filter` object to specify the documents to be deleted. - */ -export interface ChromaDeleteParams { - ids?: string[]; - filter?: T; -} - -/** - * The main class that extends the `VectorStore` class. It provides - * methods for interacting with the Chroma database, such as adding - * documents, deleting documents, and searching for similar vectors. - */ -export class Chroma extends VectorStore { - declare FilterType: Where; - - index?: ChromaClientT; - - collection?: Collection; - - collectionName: string; - - collectionMetadata?: CollectionMetadata; - - numDimensions?: number; - - url: string; - - filter?: object; - - _vectorstoreType(): string { - return "chroma"; - } - - constructor(embeddings: Embeddings, args: ChromaLibArgs) { - super(embeddings, args); - this.numDimensions = args.numDimensions; - this.embeddings = embeddings; - this.collectionName = ensureCollectionName(args.collectionName); - this.collectionMetadata = args.collectionMetadata; - if ("index" in args) { - this.index = args.index; - } else if ("url" in args) { - this.url = args.url || "http://localhost:8000"; - } - - this.filter = args.filter; - } - - /** - * Adds documents to the Chroma database. The documents are first - * converted to vectors using the `embeddings` instance, and then added to - * the database. - * @param documents An array of `Document` instances to be added to the database. - * @param options Optional. An object containing an array of `ids` for the documents. - * @returns A promise that resolves when the documents have been added to the database. - */ - async addDocuments(documents: Document[], options?: { ids?: string[] }) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Ensures that a collection exists in the Chroma database. If the - * collection does not exist, it is created. - * @returns A promise that resolves with the `Collection` instance. - */ - async ensureCollection(): Promise { - if (!this.collection) { - if (!this.index) { - const { ChromaClient } = await Chroma.imports(); - this.index = new ChromaClient({ path: this.url }); - } - try { - this.collection = await this.index.getOrCreateCollection({ - name: this.collectionName, - ...(this.collectionMetadata && { metadata: this.collectionMetadata }), - }); - } catch (err) { - throw new Error(`Chroma getOrCreateCollection error: ${err}`); - } - } - - return this.collection; - } - - /** - * Adds vectors to the Chroma database. The vectors are associated with - * the provided documents. - * @param vectors An array of vectors to be added to the database. - * @param documents An array of `Document` instances associated with the vectors. - * @param options Optional. An object containing an array of `ids` for the vectors. - * @returns A promise that resolves with an array of document IDs when the vectors have been added to the database. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ) { - if (vectors.length === 0) { - return []; - } - if (this.numDimensions === undefined) { - this.numDimensions = vectors[0].length; - } - if (vectors.length !== documents.length) { - throw new Error(`Vectors and metadatas must have the same length`); - } - if (vectors[0].length !== this.numDimensions) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${this.numDimensions})` - ); - } - - const documentIds = - options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1()); - const collection = await this.ensureCollection(); - - const mappedMetadatas = documents.map(({ metadata }) => { - let locFrom; - let locTo; - - if (metadata?.loc) { - if (metadata.loc.lines?.from !== undefined) - locFrom = metadata.loc.lines.from; - if (metadata.loc.lines?.to !== undefined) locTo = metadata.loc.lines.to; - } - - const newMetadata: Document["metadata"] = { - ...metadata, - ...(locFrom !== undefined && { locFrom }), - ...(locTo !== undefined && { locTo }), - }; - - if (newMetadata.loc) delete newMetadata.loc; - - return newMetadata; - }); - - await collection.upsert({ - ids: documentIds, - embeddings: vectors, - metadatas: mappedMetadatas, - documents: documents.map(({ pageContent }) => pageContent), - }); - return documentIds; - } - - /** - * Deletes documents from the Chroma database. The documents to be deleted - * can be specified by providing an array of `ids` or a `filter` object. - * @param params An object containing either an array of `ids` of the documents to be deleted or a `filter` object to specify the documents to be deleted. - * @returns A promise that resolves when the specified documents have been deleted from the database. - */ - async delete(params: ChromaDeleteParams): Promise { - const collection = await this.ensureCollection(); - if (Array.isArray(params.ids)) { - await collection.delete({ ids: params.ids }); - } else if (params.filter) { - await collection.delete({ - where: { ...params.filter }, - }); - } else { - throw new Error(`You must provide one of "ids or "filter".`); - } - } - - /** - * Searches for vectors in the Chroma database that are similar to the - * provided query vector. The search can be filtered using the provided - * `filter` object or the `filter` property of the `Chroma` instance. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter Optional. A `filter` object to filter the search results. - * @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ) { - if (filter && this.filter) { - throw new Error("cannot provide both `filter` and `this.filter`"); - } - const _filter = filter ?? this.filter; - - const collection = await this.ensureCollection(); - - // similaritySearchVectorWithScore supports one query vector at a time - // chroma supports multiple query vectors at a time - const result = await collection.query({ - queryEmbeddings: query, - nResults: k, - where: { ..._filter }, - }); - - const { ids, distances, documents, metadatas } = result; - if (!ids || !distances || !documents || !metadatas) { - return []; - } - // get the result data from the first and only query vector - const [firstIds] = ids; - const [firstDistances] = distances; - const [firstDocuments] = documents; - const [firstMetadatas] = metadatas; - - const results: [Document, number][] = []; - for (let i = 0; i < firstIds.length; i += 1) { - let metadata: Document["metadata"] = firstMetadatas?.[i] ?? {}; - - if (metadata.locFrom && metadata.locTo) { - metadata = { - ...metadata, - loc: { - lines: { - from: metadata.locFrom, - to: metadata.locTo, - }, - }, - }; - - delete metadata.locFrom; - delete metadata.locTo; - } - - results.push([ - new Document({ - pageContent: firstDocuments?.[i] ?? "", - metadata, - }), - firstDistances[i], - ]); - } - return results; - } - - /** - * Creates a new `Chroma` instance from an array of text strings. The text - * strings are converted to `Document` instances and added to the Chroma - * database. - * @param texts An array of text strings. - * @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array. - * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. - * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. - * @returns A promise that resolves with a new `Chroma` instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: ChromaLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return this.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new `Chroma` instance from an array of `Document` instances. - * The documents are added to the Chroma database. - * @param docs An array of `Document` instances. - * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. - * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. - * @returns A promise that resolves with a new `Chroma` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: ChromaLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Creates a new `Chroma` instance from an existing collection in the - * Chroma database. - * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. - * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. - * @returns A promise that resolves with a new `Chroma` instance. - */ - static async fromExistingCollection( - embeddings: Embeddings, - dbConfig: ChromaLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.ensureCollection(); - return instance; - } - - /** - * Imports the `ChromaClient` from the `chromadb` module. - * @returns A promise that resolves with an object containing the `ChromaClient` constructor. - */ - static async imports(): Promise<{ - ChromaClient: typeof ChromaClientT; - }> { - try { - const { ChromaClient } = await import("chromadb"); - return { ChromaClient }; - } catch (e) { - throw new Error( - "Please install chromadb as a dependency with, e.g. `npm install -S chromadb`" - ); - } - } -} - -/** - * Generates a unique collection name if none is provided. - */ -function ensureCollectionName(collectionName?: string) { - if (!collectionName) { - return `langchain-${uuid.v4()}`; - } - return collectionName; -} +export * from "@langchain/community/vectorstores/chroma"; diff --git a/langchain/src/vectorstores/clickhouse.ts b/langchain/src/vectorstores/clickhouse.ts index 003256e8d7a8..ead163d980af 100644 --- a/langchain/src/vectorstores/clickhouse.ts +++ b/langchain/src/vectorstores/clickhouse.ts @@ -1,338 +1 @@ -import * as uuid from "uuid"; -import { ClickHouseClient, createClient } from "@clickhouse/client"; -import { format } from "mysql2"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -/** - * Arguments for the ClickHouseStore class, which include the host, port, - * protocol, username, password, index type, index parameters, - * index query params, column map, database, table. - */ -export interface ClickHouseLibArgs { - host: string; - port: string | number; - protocol?: string; - username: string; - password: string; - indexType?: string; - indexParam?: Record; - indexQueryParams?: Record; - columnMap?: ColumnMap; - database?: string; - table?: string; -} - -/** - * Mapping of columns in the ClickHouse database. - */ -export interface ColumnMap { - id: string; - uuid: string; - document: string; - embedding: string; - metadata: string; -} - -/** - * Type for filtering search results in the ClickHouse database. - */ -export interface ClickHouseFilter { - whereStr: string; -} - -/** - * Class for interacting with the ClickHouse database. It extends the - * VectorStore class and provides methods for adding vectors and - * documents, searching for similar vectors, and creating instances from - * texts or documents. - */ -export class ClickHouseStore extends VectorStore { - declare FilterType: ClickHouseFilter; - - private client: ClickHouseClient; - - private indexType: string; - - private indexParam: Record; - - private indexQueryParams: Record; - - private columnMap: ColumnMap; - - private database: string; - - private table: string; - - private isInitialized = false; - - _vectorstoreType(): string { - return "clickhouse"; - } - - constructor(embeddings: Embeddings, args: ClickHouseLibArgs) { - super(embeddings, args); - - this.indexType = args.indexType || "annoy"; - this.indexParam = args.indexParam || { L2Distance: 100 }; - this.indexQueryParams = args.indexQueryParams || {}; - this.columnMap = args.columnMap || { - id: "id", - document: "document", - embedding: "embedding", - metadata: "metadata", - uuid: "uuid", - }; - this.database = args.database || "default"; - this.table = args.table || "vector_table"; - - this.client = createClient({ - host: `${args.protocol ?? "https://"}${args.host}:${args.port}`, - username: args.username, - password: args.password, - session_id: uuid.v4(), - }); - } - - /** - * Method to add vectors to the ClickHouse database. - * @param vectors The vectors to add. - * @param documents The documents associated with the vectors. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - - if (!this.isInitialized) { - await this.initialize(vectors[0].length); - } - - const queryStr = this.buildInsertQuery(vectors, documents); - await this.client.exec({ query: queryStr }); - } - - /** - * Method to add documents to the ClickHouse database. - * @param documents The documents to add. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - return this.addVectors( - await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), - documents - ); - } - - /** - * Method to search for vectors that are similar to a given query vector. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter Optional filter for the search results. - * @returns Promise that resolves with an array of tuples, each containing a Document and a score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - if (!this.isInitialized) { - await this.initialize(query.length); - } - const queryStr = this.buildSearchQuery(query, k, filter); - - const queryResultSet = await this.client.query({ query: queryStr }); - - const queryResult: { - data: { document: string; metadata: object; dist: number }[]; - } = await queryResultSet.json(); - - const result: [Document, number][] = queryResult.data.map((item) => [ - new Document({ pageContent: item.document, metadata: item.metadata }), - item.dist, - ]); - - return result; - } - - /** - * Static method to create an instance of ClickHouseStore from texts. - * @param texts The texts to use. - * @param metadatas The metadata associated with the texts. - * @param embeddings The embeddings to use. - * @param args The arguments for the ClickHouseStore. - * @returns Promise that resolves with a new instance of ClickHouseStore. - */ - static async fromTexts( - texts: string[], - metadatas: object | object[], - embeddings: Embeddings, - args: ClickHouseLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return ClickHouseStore.fromDocuments(docs, embeddings, args); - } - - /** - * Static method to create an instance of ClickHouseStore from documents. - * @param docs The documents to use. - * @param embeddings The embeddings to use. - * @param args The arguments for the ClickHouseStore. - * @returns Promise that resolves with a new instance of ClickHouseStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args: ClickHouseLibArgs - ): Promise { - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create an instance of ClickHouseStore from an existing - * index. - * @param embeddings The embeddings to use. - * @param args The arguments for the ClickHouseStore. - * @returns Promise that resolves with a new instance of ClickHouseStore. - */ - static async fromExistingIndex( - embeddings: Embeddings, - args: ClickHouseLibArgs - ): Promise { - const instance = new this(embeddings, args); - - await instance.initialize(); - return instance; - } - - /** - * Method to initialize the ClickHouse database. - * @param dimension Optional dimension of the vectors. - * @returns Promise that resolves when the database has been initialized. - */ - private async initialize(dimension?: number): Promise { - const dim = dimension ?? (await this.embeddings.embedQuery("test")).length; - - const indexParamStr = this.indexParam - ? Object.entries(this.indexParam) - .map(([key, value]) => `'${key}', ${value}`) - .join(", ") - : ""; - - const query = ` - CREATE TABLE IF NOT EXISTS ${this.database}.${this.table}( - ${this.columnMap.id} Nullable(String), - ${this.columnMap.document} Nullable(String), - ${this.columnMap.embedding} Array(Float32), - ${this.columnMap.metadata} JSON, - ${this.columnMap.uuid} UUID DEFAULT generateUUIDv4(), - CONSTRAINT cons_vec_len CHECK length(${this.columnMap.embedding}) = ${dim}, - INDEX vec_idx ${this.columnMap.embedding} TYPE ${this.indexType}(${indexParamStr}) GRANULARITY 1000 - ) ENGINE = MergeTree ORDER BY ${this.columnMap.uuid} SETTINGS index_granularity = 8192;`; - - await this.client.exec({ - query, - clickhouse_settings: { - allow_experimental_object_type: 1, - allow_experimental_annoy_index: 1, - }, - }); - this.isInitialized = true; - } - - /** - * Method to build an SQL query for inserting vectors and documents into - * the ClickHouse database. - * @param vectors The vectors to insert. - * @param documents The documents to insert. - * @returns The SQL query string. - */ - private buildInsertQuery(vectors: number[][], documents: Document[]): string { - const columnsStr = Object.values( - Object.fromEntries( - Object.entries(this.columnMap).filter( - ([key]) => key !== this.columnMap.uuid - ) - ) - ).join(", "); - - const placeholders = vectors.map(() => "(?, ?, ?, ?)").join(", "); - const values = []; - - for (let i = 0; i < vectors.length; i += 1) { - const vector = vectors[i]; - const document = documents[i]; - values.push( - uuid.v4(), - this.escapeString(document.pageContent), - JSON.stringify(vector), - JSON.stringify(document.metadata) - ); - } - - const insertQueryStr = ` - INSERT INTO TABLE ${this.database}.${this.table}(${columnsStr}) - VALUES ${placeholders} - `; - - const insertQuery = format(insertQueryStr, values); - return insertQuery; - } - - private escapeString(str: string): string { - return str.replace(/\\/g, "\\\\").replace(/'/g, "\\'"); - } - - /** - * Method to build an SQL query for searching for similar vectors in the - * ClickHouse database. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter Optional filter for the search results. - * @returns The SQL query string. - */ - private buildSearchQuery( - query: number[], - k: number, - filter?: ClickHouseFilter - ): string { - const order = "ASC"; - const whereStr = filter ? `PREWHERE ${filter.whereStr}` : ""; - const placeholders = query.map(() => "?").join(", "); - - const settingStrings: string[] = []; - if (this.indexQueryParams) { - for (const [key, value] of Object.entries(this.indexQueryParams)) { - settingStrings.push(`SETTING ${key}=${value}`); - } - } - - const searchQueryStr = ` - SELECT ${this.columnMap.document} AS document, ${ - this.columnMap.metadata - } AS metadata, dist - FROM ${this.database}.${this.table} - ${whereStr} - ORDER BY L2Distance(${ - this.columnMap.embedding - }, [${placeholders}]) AS dist ${order} - LIMIT ${k} ${settingStrings.join(" ")} - `; - - // Format the query with actual values - const searchQuery = format(searchQueryStr, query); - return searchQuery; - } -} +export * from "@langchain/community/vectorstores/clickhouse"; diff --git a/langchain/src/vectorstores/closevector/node.ts b/langchain/src/vectorstores/closevector/node.ts index dd91f71a57b9..d3a2298f9577 100644 --- a/langchain/src/vectorstores/closevector/node.ts +++ b/langchain/src/vectorstores/closevector/node.ts @@ -1,182 +1 @@ -import { - CloseVectorHNSWNode, - HierarchicalNSWT, - CloseVectorHNSWLibArgs, - CloseVectorCredentials, -} from "closevector-node"; - -import { CloseVector } from "./common.js"; - -import { Embeddings } from "../../embeddings/base.js"; -import { Document } from "../../document.js"; - -/** - * package closevector-node is largely based on hnswlib.ts in the current folder with the following exceptions: - * 1. It uses a modified version of hnswlib-node to ensure the generated index can be loaded by closevector_web.ts. - * 2. It adds features to upload and download the index to/from the CDN provided by CloseVector. - * - * For more information, check out https://closevector-docs.getmegaportal.com/ - */ - -/** - * Arguments for creating a CloseVectorNode instance, extending CloseVectorHNSWLibArgs. - */ -export interface CloseVectorNodeArgs - extends CloseVectorHNSWLibArgs { - instance?: CloseVectorHNSWNode; -} - -/** - * Class that implements a vector store using Hierarchical Navigable Small - * World (HNSW) graphs. It extends the SaveableVectorStore class and - * provides methods for adding documents and vectors, performing - * similarity searches, and saving and loading the vector store. - */ -export class CloseVectorNode extends CloseVector { - declare FilterType: (doc: Document) => boolean; - - constructor( - embeddings: Embeddings, - args: CloseVectorNodeArgs, - credentials?: CloseVectorCredentials - ) { - super(embeddings, args, credentials); - if (args.instance) { - this.instance = args.instance; - } else { - this.instance = new CloseVectorHNSWNode(embeddings, args); - } - if (this.credentials?.key) { - this.instance.accessKey = this.credentials.key; - } - if (this.credentials?.secret) { - this.instance.secret = this.credentials.secret; - } - } - - /** - * Method to save the index to the CloseVector CDN. - * @param options - * @param options.description A description of the index. - * @param options.public Whether the index should be public or private. Defaults to false. - * @param options.uuid A UUID for the index. If not provided, a new index will be created. - * @param options.onProgress A callback function that will be called with the progress of the upload. - */ - async saveToCloud( - options: Parameters[0] - ) { - await this.instance.saveToCloud(options); - } - - /** - * Method to load the index from the CloseVector CDN. - * @param options - * @param options.uuid The UUID of the index to be downloaded. - * @param options.credentials The credentials to be used by the CloseVectorNode instance. - * @param options.embeddings The embeddings to be used by the CloseVectorNode instance. - * @param options.onProgress A callback function that will be called with the progress of the download. - */ - static async loadFromCloud( - options: Omit< - Parameters<(typeof CloseVectorHNSWNode)["loadFromCloud"]>[0] & { - embeddings: Embeddings; - credentials: CloseVectorCredentials; - }, - "accessKey" | "secret" - > - ) { - if (!options.credentials.key || !options.credentials.secret) { - throw new Error("key and secret must be provided"); - } - const instance = await CloseVectorHNSWNode.loadFromCloud({ - ...options, - accessKey: options.credentials.key, - secret: options.credentials.secret, - }); - const vectorstore = new this( - options.embeddings, - instance.args, - options.credentials - ); - return vectorstore; - } - - /** - * Static method to load a vector store from a directory. It reads the - * HNSW index, the arguments, and the document store from the directory, - * then creates a new HNSWLib instance with these values. - * @param directory The directory from which to load the vector store. - * @param embeddings The embeddings to be used by the CloseVectorNode instance. - * @returns A Promise that resolves to a new CloseVectorNode instance. - */ - static async load( - directory: string, - embeddings: Embeddings, - credentials?: CloseVectorCredentials - ) { - const instance = await CloseVectorHNSWNode.load(directory, embeddings); - const vectorstore = new this(embeddings, instance.args, credentials); - return vectorstore; - } - - /** - * Static method to create a new CloseVectorWeb instance from texts and metadata. - * It creates a new Document instance for each text and metadata, then - * calls the fromDocuments method to create the CloseVectorWeb instance. - * @param texts The texts to be used to create the documents. - * @param metadatas The metadata to be used to create the documents. - * @param embeddings The embeddings to be used by the CloseVectorWeb instance. - * @param args An optional configuration object for the CloseVectorWeb instance. - * @param credential An optional credential object for the CloseVector API. - * @returns A Promise that resolves to a new CloseVectorWeb instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - args?: Record, - credential?: CloseVectorCredentials - ): Promise { - const docs = CloseVector.textsToDocuments(texts, metadatas); - return await CloseVectorNode.fromDocuments( - docs, - embeddings, - args, - credential - ); - } - - /** - * Static method to create a new CloseVectorNode instance from documents. It - * creates a new CloseVectorNode instance, adds the documents to it, then returns - * the instance. - * @param docs The documents to be added to the HNSWLib instance. - * @param embeddings The embeddings to be used by the HNSWLib instance. - * @param args An optional configuration object for the HNSWLib instance. - * @param credentials An optional credential object for the CloseVector API. - * @returns A Promise that resolves to a new CloseVectorNode instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args?: Record, - credentials?: CloseVectorCredentials - ): Promise { - const _args: Record = args || { - space: "cosine", - }; - const instance = new this( - embeddings, - _args as unknown as CloseVectorNodeArgs, - credentials - ); - await instance.addDocuments(docs); - return instance; - } - - static async imports(): Promise<{ - HierarchicalNSW: typeof HierarchicalNSWT; - }> { - return CloseVectorHNSWNode.imports(); - } -} +export * from "@langchain/community/vectorstores/closevector/node"; diff --git a/langchain/src/vectorstores/closevector/web.ts b/langchain/src/vectorstores/closevector/web.ts index 06882c2befa9..bc67272fa78e 100644 --- a/langchain/src/vectorstores/closevector/web.ts +++ b/langchain/src/vectorstores/closevector/web.ts @@ -1,179 +1 @@ -import { - CloseVectorHNSWWeb, - HierarchicalNSWT, - CloseVectorHNSWLibArgs, - CloseVectorCredentials, - HnswlibModule, -} from "closevector-web"; - -import { CloseVector } from "./common.js"; - -import { Embeddings } from "../../embeddings/base.js"; -import { Document } from "../../document.js"; - -/** - * package closevector-node is largely based on hnswlib.ts in the current folder with the following exceptions: - * 1. It uses a modified version of hnswlib-node to ensure the generated index can be loaded by closevector_web.ts. - * 2. It adds features to upload and download the index to/from the CDN provided by CloseVector. - * - * For more information, check out https://closevector-docs.getmegaportal.com/ - */ - -/** - * Arguments for creating a CloseVectorWeb instance, extending CloseVectorHNSWLibArgs. - */ -export interface CloseVectorWebArgs - extends CloseVectorHNSWLibArgs { - instance?: CloseVectorHNSWWeb; -} - -/** - * Class that implements a vector store using CloseVector, It extends the SaveableVectorStore class and - * provides methods for adding documents and vectors, performing - * similarity searches, and saving and loading the vector store. - */ -export class CloseVectorWeb extends CloseVector { - declare FilterType: (doc: Document) => boolean; - - constructor( - embeddings: Embeddings, - args: CloseVectorWebArgs, - credentials?: CloseVectorCredentials - ) { - super(embeddings, args, credentials); - if (args.instance) { - this.instance = args.instance; - } else { - this.instance = new CloseVectorHNSWWeb(embeddings, args); - } - } - - /** - * Method to save the index to the CloseVector CDN. - * @param options - * @param options.url the upload url generated by the CloseVector API: https://closevector-docs.getmegaportal.com/docs/api/http-api/file-url - * @param options.onProgress a callback function to track the upload progress - */ - async saveToCloud( - options: Parameters[0] & { - uuid?: string; - } - ) { - if (!this.instance.uuid && !options.uuid) { - throw new Error("No uuid provided"); - } - if (!this.instance.uuid) { - this.instance._uuid = options.uuid; - } - await this.save(this.instance.uuid); - await this.instance.saveToCloud(options); - } - - /** - * Method to load the index from the CloseVector CDN. - * @param options - * @param options.url the upload url generated by the CloseVector API: https://closevector-docs.getmegaportal.com/docs/api/http-api/file-url - * @param options.onProgress a callback function to track the upload progress - * @param options.uuid the uuid of the index to be downloaded - * @param options.embeddings the embeddings to be used by the CloseVectorWeb instance - */ - static async loadFromCloud( - options: Parameters[0] & { - embeddings: Embeddings; - credentials?: CloseVectorCredentials; - } - ) { - const instance = await CloseVectorHNSWWeb.loadFromCloud(options); - const vectorstore = new this( - options.embeddings, - instance.args, - options.credentials - ); - return vectorstore; - } - - /** - * Static method to load a vector store from a directory. It reads the - * HNSW index, the arguments, and the document store from the directory, - * then creates a new CloseVectorWeb instance with these values. - * @param directory The directory from which to load the vector store. - * @param embeddings The embeddings to be used by the CloseVectorWeb instance. - * @returns A Promise that resolves to a new CloseVectorWeb instance. - */ - static async load( - directory: string, - embeddings: Embeddings, - credentials?: CloseVectorCredentials - ) { - const instance = await CloseVectorHNSWWeb.load(directory, embeddings); - const vectorstore = new this(embeddings, instance.args, credentials); - return vectorstore; - } - - /** - * Static method to create a new CloseVectorWeb instance from texts and metadata. - * It creates a new Document instance for each text and metadata, then - * calls the fromDocuments method to create the CloseVectorWeb instance. - * @param texts The texts to be used to create the documents. - * @param metadatas The metadata to be used to create the documents. - * @param embeddings The embeddings to be used by the CloseVectorWeb instance. - * @param args An optional configuration object for the CloseVectorWeb instance. - * @param credential An optional credential object for the CloseVector API. - * @returns A Promise that resolves to a new CloseVectorWeb instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - args?: Record, - credential?: CloseVectorCredentials - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return await CloseVectorWeb.fromDocuments( - docs, - embeddings, - args, - credential - ); - } - - /** - * Static method to create a new CloseVectorWeb instance from documents. It - * creates a new CloseVectorWeb instance, adds the documents to it, then returns - * the instance. - * @param docs The documents to be added to the CloseVectorWeb instance. - * @param embeddings The embeddings to be used by the CloseVectorWeb instance. - * @param args An optional configuration object for the CloseVectorWeb instance. - * @param credentials An optional credential object for the CloseVector API. - * @returns A Promise that resolves to a new CloseVectorWeb instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args?: Record, - credentials?: CloseVectorCredentials - ): Promise { - const _args: Record = args || { - space: "cosine", - }; - const instance = new this( - embeddings, - _args as unknown as CloseVectorWebArgs, - credentials - ); - await instance.addDocuments(docs); - return instance; - } - - static async imports(): Promise { - return CloseVectorHNSWWeb.imports(); - } -} +export * from "@langchain/community/vectorstores/closevector/web"; diff --git a/langchain/src/vectorstores/cloudflare_vectorize.ts b/langchain/src/vectorstores/cloudflare_vectorize.ts index 8a5babf49b1f..5ea66a2f66ae 100644 --- a/langchain/src/vectorstores/cloudflare_vectorize.ts +++ b/langchain/src/vectorstores/cloudflare_vectorize.ts @@ -1,227 +1 @@ -import * as uuid from "uuid"; - -import { - VectorizeIndex, - VectorizeVectorMetadata, -} from "@cloudflare/workers-types"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { chunkArray } from "../util/chunk.js"; -import { AsyncCaller, type AsyncCallerParams } from "../util/async_caller.js"; - -export interface VectorizeLibArgs extends AsyncCallerParams { - index: VectorizeIndex; - textKey?: string; -} - -/** - * Type that defines the parameters for the delete operation in the - * CloudflareVectorizeStore class. It includes ids, deleteAll flag, and namespace. - */ -export type VectorizeDeleteParams = { - ids: string[]; -}; - -/** - * Class that extends the VectorStore class and provides methods to - * interact with the Cloudflare Vectorize vector database. - */ -export class CloudflareVectorizeStore extends VectorStore { - textKey: string; - - namespace?: string; - - index: VectorizeIndex; - - caller: AsyncCaller; - - _vectorstoreType(): string { - return "cloudflare_vectorize"; - } - - constructor(embeddings: Embeddings, args: VectorizeLibArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - const { index, textKey, ...asyncCallerArgs } = args; - if (!index) { - throw new Error( - "Must supply a Vectorize index binding, eg { index: env.VECTORIZE }" - ); - } - this.index = index; - this.textKey = textKey ?? "text"; - this.caller = new AsyncCaller({ - maxConcurrency: 6, - maxRetries: 0, - ...asyncCallerArgs, - }); - } - - /** - * Method that adds documents to the Vectorize database. - * @param documents Array of documents to add. - * @param options Optional ids for the documents. - * @returns Promise that resolves with the ids of the added documents. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] } | string[] - ) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method that adds vectors to the Vectorize database. - * @param vectors Array of vectors to add. - * @param documents Array of documents associated with the vectors. - * @param options Optional ids for the vectors. - * @returns Promise that resolves with the ids of the added vectors. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } | string[] - ) { - const ids = Array.isArray(options) ? options : options?.ids; - const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; - const vectorizeVectors = vectors.map((values, idx) => { - const metadata: Record = { - ...documents[idx].metadata, - [this.textKey]: documents[idx].pageContent, - }; - return { - id: documentIds[idx], - metadata, - values, - }; - }); - - // Stick to a limit of 500 vectors per upsert request - const chunkSize = 500; - const chunkedVectors = chunkArray(vectorizeVectors, chunkSize); - const batchRequests = chunkedVectors.map((chunk) => - this.caller.call(async () => this.index.upsert(chunk)) - ); - - await Promise.all(batchRequests); - - return documentIds; - } - - /** - * Method that deletes vectors from the Vectorize database. - * @param params Parameters for the delete operation. - * @returns Promise that resolves when the delete operation is complete. - */ - async delete(params: VectorizeDeleteParams): Promise { - const batchSize = 1000; - const batchedIds = chunkArray(params.ids, batchSize); - const batchRequests = batchedIds.map((batchIds) => - this.caller.call(async () => this.index.deleteByIds(batchIds)) - ); - await Promise.all(batchRequests); - } - - /** - * Method that performs a similarity search in the Vectorize database and - * returns the results along with their scores. - * @param query Query vector for the similarity search. - * @param k Number of top results to return. - * @returns Promise that resolves with an array of documents and their scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number - ): Promise<[Document, number][]> { - const results = await this.index.query(query, { - returnVectors: true, - topK: k, - }); - - const result: [Document, number][] = []; - - if (results.matches) { - for (const res of results.matches) { - const { [this.textKey]: pageContent, ...metadata } = - res.vector?.metadata ?? {}; - result.push([ - new Document({ metadata, pageContent: pageContent as string }), - res.score, - ]); - } - } - - return result; - } - - /** - * Static method that creates a new instance of the CloudflareVectorizeStore class - * from texts. - * @param texts Array of texts to add to the Vectorize database. - * @param metadatas Metadata associated with the texts. - * @param embeddings Embeddings to use for the texts. - * @param dbConfig Configuration for the Vectorize database. - * @param options Optional ids for the vectors. - * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. - */ - static async fromTexts( - texts: string[], - metadatas: - | Record[] - | Record, - embeddings: Embeddings, - dbConfig: VectorizeLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return CloudflareVectorizeStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method that creates a new instance of the CloudflareVectorizeStore class - * from documents. - * @param docs Array of documents to add to the Vectorize database. - * @param embeddings Embeddings to use for the documents. - * @param dbConfig Configuration for the Vectorize database. - * @param options Optional ids for the vectors. - * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: VectorizeLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method that creates a new instance of the CloudflareVectorizeStore class - * from an existing index. - * @param embeddings Embeddings to use for the documents. - * @param dbConfig Configuration for the Vectorize database. - * @returns Promise that resolves with a new instance of the CloudflareVectorizeStore class. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: VectorizeLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - return instance; - } -} +export * from "@langchain/community/vectorstores/cloudflare_vectorize"; diff --git a/langchain/src/vectorstores/convex.ts b/langchain/src/vectorstores/convex.ts index e42264ee6060..65839c433e6b 100644 --- a/langchain/src/vectorstores/convex.ts +++ b/langchain/src/vectorstores/convex.ts @@ -1,376 +1 @@ -// eslint-disable-next-line import/no-extraneous-dependencies -import { - DocumentByInfo, - FieldPaths, - FilterExpression, - FunctionReference, - GenericActionCtx, - GenericDataModel, - GenericTableInfo, - NamedTableInfo, - NamedVectorIndex, - TableNamesInDataModel, - VectorFilterBuilder, - VectorIndexNames, - makeFunctionReference, -} from "convex/server"; -import { Document } from "../document.js"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; - -/** - * Type that defines the config required to initialize the - * ConvexVectorStore class. It includes the table name, - * index name, text field name, and embedding field name. - */ -export type ConvexVectorStoreConfig< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - TextFieldName extends FieldPaths>, - EmbeddingFieldName extends FieldPaths>, - MetadataFieldName extends FieldPaths>, - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - GetQuery extends FunctionReference< - "query", - "internal", - { id: string }, - object | null - > -> = { - readonly ctx: GenericActionCtx; - /** - * Defaults to "documents" - */ - readonly table?: TableName; - /** - * Defaults to "byEmbedding" - */ - readonly index?: IndexName; - /** - * Defaults to "text" - */ - readonly textField?: TextFieldName; - /** - * Defaults to "embedding" - */ - readonly embeddingField?: EmbeddingFieldName; - /** - * Defaults to "metadata" - */ - readonly metadataField?: MetadataFieldName; - /** - * Defaults to `internal.langchain.db.insert` - */ - readonly insert?: InsertMutation; - /** - * Defaults to `internal.langchain.db.get` - */ - readonly get?: GetQuery; -}; - -/** - * Class that is a wrapper around Convex storage and vector search. It is used - * to insert embeddings in Convex documents with a vector search index, - * and perform a vector search on them. - * - * ConvexVectorStore does NOT implement maxMarginalRelevanceSearch. - */ -export class ConvexVectorStore< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - TextFieldName extends FieldPaths>, - EmbeddingFieldName extends FieldPaths>, - MetadataFieldName extends FieldPaths>, - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - GetQuery extends FunctionReference< - "query", - "internal", - { id: string }, - object | null - > -> extends VectorStore { - /** - * Type that defines the filter used in the - * similaritySearchVectorWithScore and maxMarginalRelevanceSearch methods. - * It includes limit, filter and a flag to include embeddings. - */ - declare FilterType: { - filter?: ( - q: VectorFilterBuilder< - DocumentByInfo, - NamedVectorIndex, IndexName> - > - ) => FilterExpression; - includeEmbeddings?: boolean; - }; - - private readonly ctx: GenericActionCtx; - - private readonly table: TableName; - - private readonly index: IndexName; - - private readonly textField: TextFieldName; - - private readonly embeddingField: EmbeddingFieldName; - - private readonly metadataField: MetadataFieldName; - - private readonly insert: InsertMutation; - - private readonly get: GetQuery; - - _vectorstoreType(): string { - return "convex"; - } - - constructor( - embeddings: Embeddings, - config: ConvexVectorStoreConfig< - DataModel, - TableName, - IndexName, - TextFieldName, - EmbeddingFieldName, - MetadataFieldName, - InsertMutation, - GetQuery - > - ) { - super(embeddings, config); - this.ctx = config.ctx; - this.table = config.table ?? ("documents" as TableName); - this.index = config.index ?? ("byEmbedding" as IndexName); - this.textField = config.textField ?? ("text" as TextFieldName); - this.embeddingField = - config.embeddingField ?? ("embedding" as EmbeddingFieldName); - this.metadataField = - config.metadataField ?? ("metadata" as MetadataFieldName); - this.insert = - // eslint-disable-next-line @typescript-eslint/no-explicit-any - config.insert ?? (makeFunctionReference("langchain/db:insert") as any); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - this.get = config.get ?? (makeFunctionReference("langchain/db:get") as any); - } - - /** - * Add vectors and their corresponding documents to the Convex table. - * @param vectors Vectors to be added. - * @param documents Corresponding documents to be added. - * @returns Promise that resolves when the vectors and documents have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - const convexDocuments = vectors.map((embedding, idx) => ({ - [this.textField]: documents[idx].pageContent, - [this.embeddingField]: embedding, - [this.metadataField]: documents[idx].metadata, - })); - // TODO: Remove chunking when Convex handles the concurrent requests correctly - const PAGE_SIZE = 16; - for (let i = 0; i < convexDocuments.length; i += PAGE_SIZE) { - await Promise.all( - convexDocuments.slice(i, i + PAGE_SIZE).map((document) => - this.ctx.runMutation(this.insert, { - table: this.table, - document, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any) - ) - ); - } - } - - /** - * Add documents to the Convex table. It first converts - * the documents to vectors using the embeddings and then calls the - * addVectors method. - * @param documents Documents to be added. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Similarity search on the vectors stored in the - * Convex table. It returns a list of documents and their - * corresponding similarity scores. - * @param query Query vector for the similarity search. - * @param k Number of nearest neighbors to return. - * @param filter Optional filter to be applied. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - const idsAndScores = await this.ctx.vectorSearch(this.table, this.index, { - vector: query, - limit: k, - filter: filter?.filter, - }); - - const documents = await Promise.all( - idsAndScores.map(({ _id }) => - // eslint-disable-next-line @typescript-eslint/no-explicit-any - this.ctx.runQuery(this.get, { id: _id } as any) - ) - ); - - return documents.map( - ( - { - [this.textField]: text, - [this.embeddingField]: embedding, - [this.metadataField]: metadata, - }, - idx - ) => [ - new Document({ - pageContent: text as string, - metadata: { - ...metadata, - ...(filter?.includeEmbeddings ? { embedding } : null), - }, - }), - idsAndScores[idx]._score, - ] - ); - } - - /** - * Static method to create an instance of ConvexVectorStore from a - * list of texts. It first converts the texts to vectors and then adds - * them to the Convex table. - * @param texts List of texts to be converted to vectors. - * @param metadatas Metadata for the texts. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Convex. - * @returns Promise that resolves to a new instance of ConvexVectorStore. - */ - static async fromTexts< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - TextFieldName extends FieldPaths>, - EmbeddingFieldName extends FieldPaths>, - MetadataFieldName extends FieldPaths>, - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - GetQuery extends FunctionReference< - "query", - "internal", - { id: string }, - object | null - > - >( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: ConvexVectorStoreConfig< - DataModel, - TableName, - IndexName, - TextFieldName, - EmbeddingFieldName, - MetadataFieldName, - InsertMutation, - GetQuery - > - ): Promise< - ConvexVectorStore< - DataModel, - TableName, - IndexName, - TextFieldName, - EmbeddingFieldName, - MetadataFieldName, - InsertMutation, - GetQuery - > - > { - const docs = texts.map( - (text, i) => - new Document({ - pageContent: text, - metadata: Array.isArray(metadatas) ? metadatas[i] : metadatas, - }) - ); - return ConvexVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create an instance of ConvexVectorStore from a - * list of documents. It first converts the documents to vectors and then - * adds them to the Convex table. - * @param docs List of documents to be converted to vectors. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for Convex. - * @returns Promise that resolves to a new instance of ConvexVectorStore. - */ - static async fromDocuments< - DataModel extends GenericDataModel, - TableName extends TableNamesInDataModel, - IndexName extends VectorIndexNames>, - TextFieldName extends FieldPaths>, - EmbeddingFieldName extends FieldPaths>, - MetadataFieldName extends FieldPaths>, - InsertMutation extends FunctionReference< - "mutation", - "internal", - { table: string; document: object } - >, - GetQuery extends FunctionReference< - "query", - "internal", - { id: string }, - object | null - > - >( - docs: Document[], - embeddings: Embeddings, - dbConfig: ConvexVectorStoreConfig< - DataModel, - TableName, - IndexName, - TextFieldName, - EmbeddingFieldName, - MetadataFieldName, - InsertMutation, - GetQuery - > - ): Promise< - ConvexVectorStore< - DataModel, - TableName, - IndexName, - TextFieldName, - EmbeddingFieldName, - MetadataFieldName, - InsertMutation, - GetQuery - > - > { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/convex"; diff --git a/langchain/src/vectorstores/elasticsearch.ts b/langchain/src/vectorstores/elasticsearch.ts index 3d0161bb8035..cc0d9f90ea4d 100644 --- a/langchain/src/vectorstores/elasticsearch.ts +++ b/langchain/src/vectorstores/elasticsearch.ts @@ -1,342 +1 @@ -import * as uuid from "uuid"; -import { Client, estypes } from "@elastic/elasticsearch"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { VectorStore } from "./base.js"; - -/** - * Type representing the k-nearest neighbors (k-NN) engine used in - * Elasticsearch. - */ -type ElasticKnnEngine = "hnsw"; -/** - * Type representing the similarity measure used in Elasticsearch. - */ -type ElasticSimilarity = "l2_norm" | "dot_product" | "cosine"; - -/** - * Interface defining the options for vector search in Elasticsearch. - */ -interface VectorSearchOptions { - readonly engine?: ElasticKnnEngine; - readonly similarity?: ElasticSimilarity; - readonly m?: number; - readonly efConstruction?: number; - readonly candidates?: number; -} - -/** - * Interface defining the arguments required to create an Elasticsearch - * client. - */ -export interface ElasticClientArgs { - readonly client: Client; - readonly indexName?: string; - readonly vectorSearchOptions?: VectorSearchOptions; -} - -/** - * Type representing a filter object in Elasticsearch. - */ -// eslint-disable-next-line @typescript-eslint/no-explicit-any -type ElasticFilter = object | { field: string; operator: string; value: any }[]; - -/** - * Class for interacting with an Elasticsearch database. It extends the - * VectorStore base class and provides methods for adding documents and - * vectors to the Elasticsearch database, performing similarity searches, - * deleting documents, and more. - */ -export class ElasticVectorSearch extends VectorStore { - declare FilterType: ElasticFilter; - - private readonly client: Client; - - private readonly indexName: string; - - private readonly engine: ElasticKnnEngine; - - private readonly similarity: ElasticSimilarity; - - private readonly efConstruction: number; - - private readonly m: number; - - private readonly candidates: number; - - _vectorstoreType(): string { - return "elasticsearch"; - } - - constructor(embeddings: Embeddings, args: ElasticClientArgs) { - super(embeddings, args); - - this.engine = args.vectorSearchOptions?.engine ?? "hnsw"; - this.similarity = args.vectorSearchOptions?.similarity ?? "l2_norm"; - this.m = args.vectorSearchOptions?.m ?? 16; - this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 100; - this.candidates = args.vectorSearchOptions?.candidates ?? 200; - - this.client = args.client.child({ - headers: { "user-agent": "langchain-js-vs/0.0.1" }, - }); - this.indexName = args.indexName ?? "documents"; - } - - /** - * Method to add documents to the Elasticsearch database. It first - * converts the documents to vectors using the embeddings, then adds the - * vectors to the database. - * @param documents The documents to add to the database. - * @param options Optional parameter that can contain the IDs for the documents. - * @returns A promise that resolves with the IDs of the added documents. - */ - async addDocuments(documents: Document[], options?: { ids?: string[] }) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method to add vectors to the Elasticsearch database. It ensures the - * index exists, then adds the vectors and their corresponding documents - * to the database. - * @param vectors The vectors to add to the database. - * @param documents The documents corresponding to the vectors. - * @param options Optional parameter that can contain the IDs for the documents. - * @returns A promise that resolves with the IDs of the added documents. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ) { - await this.ensureIndexExists( - vectors[0].length, - this.engine, - this.similarity, - this.efConstruction, - this.m - ); - const documentIds = - options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v4()); - const operations = vectors.flatMap((embedding, idx) => [ - { - index: { - _id: documentIds[idx], - _index: this.indexName, - }, - }, - { - embedding, - metadata: documents[idx].metadata, - text: documents[idx].pageContent, - }, - ]); - await this.client.bulk({ refresh: true, operations }); - return documentIds; - } - - /** - * Method to perform a similarity search in the Elasticsearch database - * using a vector. It returns the k most similar documents along with - * their similarity scores. - * @param query The query vector. - * @param k The number of most similar documents to return. - * @param filter Optional filter to apply to the search. - * @returns A promise that resolves with an array of tuples, where each tuple contains a Document and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: ElasticFilter - ): Promise<[Document, number][]> { - const result = await this.client.search({ - index: this.indexName, - size: k, - knn: { - field: "embedding", - query_vector: query, - filter: this.buildMetadataTerms(filter), - k, - num_candidates: this.candidates, - }, - }); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return result.hits.hits.map((hit: any) => [ - new Document({ - pageContent: hit._source.text, - metadata: hit._source.metadata, - }), - hit._score, - ]); - } - - /** - * Method to delete documents from the Elasticsearch database. - * @param params Object containing the IDs of the documents to delete. - * @returns A promise that resolves when the deletion is complete. - */ - async delete(params: { ids: string[] }): Promise { - const operations = params.ids.map((id) => ({ - delete: { - _id: id, - _index: this.indexName, - }, - })); - await this.client.bulk({ refresh: true, operations }); - } - - /** - * Static method to create an ElasticVectorSearch instance from texts. It - * creates Document instances from the texts and their corresponding - * metadata, then calls the fromDocuments method to create the - * ElasticVectorSearch instance. - * @param texts The texts to create the ElasticVectorSearch instance from. - * @param metadatas The metadata corresponding to the texts. - * @param embeddings The embeddings to use for the documents. - * @param args The arguments to create the Elasticsearch client. - * @returns A promise that resolves with the created ElasticVectorSearch instance. - */ - static fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - args: ElasticClientArgs - ): Promise { - const documents = texts.map((text, idx) => { - const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; - return new Document({ pageContent: text, metadata }); - }); - - return ElasticVectorSearch.fromDocuments(documents, embeddings, args); - } - - /** - * Static method to create an ElasticVectorSearch instance from Document - * instances. It adds the documents to the Elasticsearch database, then - * returns the ElasticVectorSearch instance. - * @param docs The Document instances to create the ElasticVectorSearch instance from. - * @param embeddings The embeddings to use for the documents. - * @param dbConfig The configuration for the Elasticsearch database. - * @returns A promise that resolves with the created ElasticVectorSearch instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: ElasticClientArgs - ): Promise { - const store = new ElasticVectorSearch(embeddings, dbConfig); - await store.addDocuments(docs).then(() => store); - return store; - } - - /** - * Static method to create an ElasticVectorSearch instance from an - * existing index in the Elasticsearch database. It checks if the index - * exists, then returns the ElasticVectorSearch instance if it does. - * @param embeddings The embeddings to use for the documents. - * @param dbConfig The configuration for the Elasticsearch database. - * @returns A promise that resolves with the created ElasticVectorSearch instance if the index exists, otherwise it throws an error. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: ElasticClientArgs - ): Promise { - const store = new ElasticVectorSearch(embeddings, dbConfig); - const exists = await store.doesIndexExist(); - if (exists) { - return store; - } - throw new Error(`The index ${store.indexName} does not exist.`); - } - - private async ensureIndexExists( - dimension: number, - engine = "hnsw", - similarity = "l2_norm", - efConstruction = 100, - m = 16 - ): Promise { - const request: estypes.IndicesCreateRequest = { - index: this.indexName, - mappings: { - dynamic_templates: [ - { - // map all metadata properties to be keyword - "metadata.*": { - match_mapping_type: "*", - mapping: { type: "keyword" }, - }, - }, - ], - properties: { - text: { type: "text" }, - metadata: { type: "object" }, - embedding: { - type: "dense_vector", - dims: dimension, - index: true, - similarity, - index_options: { - type: engine, - m, - ef_construction: efConstruction, - }, - }, - }, - }, - }; - - const indexExists = await this.doesIndexExist(); - if (indexExists) return; - - await this.client.indices.create(request); - } - - private buildMetadataTerms( - filter?: ElasticFilter - // eslint-disable-next-line @typescript-eslint/no-explicit-any - ): { [operator: string]: { [field: string]: any } }[] { - if (filter == null) return []; - const result = []; - const filters = Array.isArray(filter) - ? filter - : Object.entries(filter).map(([key, value]) => ({ - operator: "term", - field: key, - value, - })); - for (const condition of filters) { - result.push({ - [condition.operator]: { - [`metadata.${condition.field}`]: condition.value, - }, - }); - } - return result; - } - - /** - * Method to check if an index exists in the Elasticsearch database. - * @returns A promise that resolves with a boolean indicating whether the index exists. - */ - async doesIndexExist(): Promise { - return await this.client.indices.exists({ index: this.indexName }); - } - - /** - * Method to delete an index from the Elasticsearch database if it exists. - * @returns A promise that resolves when the deletion is complete. - */ - async deleteIfExists(): Promise { - const indexExists = await this.doesIndexExist(); - if (!indexExists) return; - - await this.client.indices.delete({ index: this.indexName }); - } -} +export * from "@langchain/community/vectorstores/elasticsearch"; diff --git a/langchain/src/vectorstores/faiss.ts b/langchain/src/vectorstores/faiss.ts index 9ba2ffd3ac49..3696e021927b 100644 --- a/langchain/src/vectorstores/faiss.ts +++ b/langchain/src/vectorstores/faiss.ts @@ -1,461 +1 @@ -import type { IndexFlatL2 } from "faiss-node"; -import type { NameRegistry, Parser } from "pickleparser"; -import * as uuid from "uuid"; -import { Embeddings } from "../embeddings/base.js"; -import { SaveableVectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; - -/** - * Interface for the arguments required to initialize a FaissStore - * instance. - */ -export interface FaissLibArgs { - docstore?: SynchronousInMemoryDocstore; - index?: IndexFlatL2; - mapping?: Record; -} - -/** - * A class that wraps the FAISS (Facebook AI Similarity Search) vector - * database for efficient similarity search and clustering of dense - * vectors. - */ -export class FaissStore extends SaveableVectorStore { - _index?: IndexFlatL2; - - _mapping: Record; - - docstore: SynchronousInMemoryDocstore; - - args: FaissLibArgs; - - _vectorstoreType(): string { - return "faiss"; - } - - getMapping(): Record { - return this._mapping; - } - - getDocstore(): SynchronousInMemoryDocstore { - return this.docstore; - } - - constructor(embeddings: Embeddings, args: FaissLibArgs) { - super(embeddings, args); - this.args = args; - this._index = args.index; - this._mapping = args.mapping ?? {}; - this.embeddings = embeddings; - this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); - } - - /** - * Adds an array of Document objects to the store. - * @param documents An array of Document objects. - * @returns A Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[], options?: { ids?: string[] }) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - public get index(): IndexFlatL2 { - if (!this._index) { - throw new Error( - "Vector store not initialised yet. Try calling `fromTexts`, `fromDocuments` or `fromIndex` first." - ); - } - return this._index; - } - - private set index(index: IndexFlatL2) { - this._index = index; - } - - /** - * Adds an array of vectors and their corresponding Document objects to - * the store. - * @param vectors An array of vectors. - * @param documents An array of Document objects corresponding to the vectors. - * @returns A Promise that resolves with an array of document IDs when the vectors and documents have been added. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ) { - if (vectors.length === 0) { - return []; - } - if (vectors.length !== documents.length) { - throw new Error(`Vectors and documents must have the same length`); - } - const dv = vectors[0].length; - if (!this._index) { - const { IndexFlatL2 } = await FaissStore.importFaiss(); - this._index = new IndexFlatL2(dv); - } - const d = this.index.getDimension(); - if (dv !== d) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${d})` - ); - } - - const docstoreSize = this.index.ntotal(); - const documentIds = options?.ids ?? documents.map(() => uuid.v4()); - for (let i = 0; i < vectors.length; i += 1) { - const documentId = documentIds[i]; - const id = docstoreSize + i; - this.index.add(vectors[i]); - this._mapping[id] = documentId; - this.docstore.add({ [documentId]: documents[i] }); - } - return documentIds; - } - - /** - * Performs a similarity search in the vector store using a query vector - * and returns the top k results along with their scores. - * @param query A query vector. - * @param k The number of top results to return. - * @returns A Promise that resolves with an array of tuples, each containing a Document and its corresponding score. - */ - async similaritySearchVectorWithScore(query: number[], k: number) { - const d = this.index.getDimension(); - if (query.length !== d) { - throw new Error( - `Query vector must have the same length as the number of dimensions (${d})` - ); - } - if (k > this.index.ntotal()) { - const total = this.index.ntotal(); - console.warn( - `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` - ); - // eslint-disable-next-line no-param-reassign - k = total; - } - const result = this.index.search(query, k); - return result.labels.map((id, index) => { - const uuid = this._mapping[id]; - return [this.docstore.search(uuid), result.distances[index]] as [ - Document, - number - ]; - }); - } - - /** - * Saves the current state of the FaissStore to a specified directory. - * @param directory The directory to save the state to. - * @returns A Promise that resolves when the state has been saved. - */ - async save(directory: string) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - await fs.mkdir(directory, { recursive: true }); - await Promise.all([ - this.index.write(path.join(directory, "faiss.index")), - await fs.writeFile( - path.join(directory, "docstore.json"), - JSON.stringify([ - Array.from(this.docstore._docs.entries()), - this._mapping, - ]) - ), - ]); - } - - /** - * Method to delete documents. - * @param params Object containing the IDs of the documents to delete. - * @returns A promise that resolves when the deletion is complete. - */ - async delete(params: { ids: string[] }) { - const documentIds = params.ids; - if (documentIds == null) { - throw new Error("No documentIds provided to delete."); - } - - const mappings = new Map( - Object.entries(this._mapping).map(([key, value]) => [ - parseInt(key, 10), - value, - ]) - ); - const reversedMappings = new Map( - Array.from(mappings, (entry) => [entry[1], entry[0]]) - ); - - const missingIds = new Set( - documentIds.filter((id) => !reversedMappings.has(id)) - ); - if (missingIds.size > 0) { - throw new Error( - `Some specified documentIds do not exist in the current store. DocumentIds not found: ${Array.from( - missingIds - ).join(", ")}` - ); - } - - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const indexIdToDelete = documentIds.map((id) => reversedMappings.get(id)!); - - // remove from index - this.index.removeIds(indexIdToDelete); - // remove from docstore - documentIds.forEach((id) => { - this.docstore._docs.delete(id); - }); - // remove from mappings - indexIdToDelete.forEach((id) => { - mappings.delete(id); - }); - - this._mapping = { ...Array.from(mappings.values()) }; - } - - /** - * Merges the current FaissStore with another FaissStore. - * @param targetIndex The FaissStore to merge with. - * @returns A Promise that resolves with an array of document IDs when the merge is complete. - */ - async mergeFrom(targetIndex: FaissStore) { - const targetIndexDimensions = targetIndex.index.getDimension(); - if (!this._index) { - const { IndexFlatL2 } = await FaissStore.importFaiss(); - this._index = new IndexFlatL2(targetIndexDimensions); - } - const d = this.index.getDimension(); - if (targetIndexDimensions !== d) { - throw new Error("Cannot merge indexes with different dimensions."); - } - const targetMapping = targetIndex.getMapping(); - const targetDocstore = targetIndex.getDocstore(); - const targetSize = targetIndex.index.ntotal(); - const documentIds = []; - const currentDocstoreSize = this.index.ntotal(); - for (let i = 0; i < targetSize; i += 1) { - const targetId = targetMapping[i]; - documentIds.push(targetId); - const targetDocument = targetDocstore.search(targetId); - const id = currentDocstoreSize + i; - this._mapping[id] = targetId; - this.docstore.add({ [targetId]: targetDocument }); - } - this.index.mergeFrom(targetIndex.index); - return documentIds; - } - - /** - * Loads a FaissStore from a specified directory. - * @param directory The directory to load the FaissStore from. - * @param embeddings An Embeddings object. - * @returns A Promise that resolves with a new FaissStore instance. - */ - static async load(directory: string, embeddings: Embeddings) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - const readStore = (directory: string) => - fs - .readFile(path.join(directory, "docstore.json"), "utf8") - .then(JSON.parse) as Promise< - [Map, Record] - >; - const readIndex = async (directory: string) => { - const { IndexFlatL2 } = await this.importFaiss(); - return IndexFlatL2.read(path.join(directory, "faiss.index")); - }; - const [[docstoreFiles, mapping], index] = await Promise.all([ - readStore(directory), - readIndex(directory), - ]); - const docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles)); - return new this(embeddings, { docstore, index, mapping }); - } - - static async loadFromPython(directory: string, embeddings: Embeddings) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - const { Parser, NameRegistry } = await this.importPickleparser(); - - class PyDocument extends Map { - toDocument(): Document { - return new Document({ - pageContent: this.get("page_content"), - metadata: this.get("metadata"), - }); - } - } - - class PyInMemoryDocstore { - _dict: Map; - - toInMemoryDocstore(): SynchronousInMemoryDocstore { - const s = new SynchronousInMemoryDocstore(); - for (const [key, value] of Object.entries(this._dict)) { - s._docs.set(key, value.toDocument()); - } - return s; - } - } - - const readStore = async (directory: string) => { - const pkl = await fs.readFile( - path.join(directory, "index.pkl"), - "binary" - ); - const buffer = Buffer.from(pkl, "binary"); - - const registry = new NameRegistry() - .register( - "langchain.docstore.in_memory", - "InMemoryDocstore", - PyInMemoryDocstore - ) - .register("langchain.schema", "Document", PyDocument) - .register("langchain.docstore.document", "Document", PyDocument) - .register("langchain.schema.document", "Document", PyDocument) - .register("pathlib", "WindowsPath", (...args) => args.join("\\")) - .register("pathlib", "PosixPath", (...args) => args.join("/")); - - const pickleparser = new Parser({ - nameResolver: registry, - }); - const [rawStore, mapping] = - pickleparser.parse<[PyInMemoryDocstore, Record]>( - buffer - ); - const store = rawStore.toInMemoryDocstore(); - return { store, mapping }; - }; - const readIndex = async (directory: string) => { - const { IndexFlatL2 } = await this.importFaiss(); - return IndexFlatL2.read(path.join(directory, "index.faiss")); - }; - const [store, index] = await Promise.all([ - readStore(directory), - readIndex(directory), - ]); - return new this(embeddings, { - docstore: store.store, - index, - mapping: store.mapping, - }); - } - - /** - * Creates a new FaissStore from an array of texts, their corresponding - * metadata, and an Embeddings object. - * @param texts An array of texts. - * @param metadatas An array of metadata corresponding to the texts, or a single metadata object to be used for all texts. - * @param embeddings An Embeddings object. - * @param dbConfig An optional configuration object for the document store. - * @returns A Promise that resolves with a new FaissStore instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return this.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new FaissStore from an array of Document objects and an - * Embeddings object. - * @param docs An array of Document objects. - * @param embeddings An Embeddings object. - * @param dbConfig An optional configuration object for the document store. - * @returns A Promise that resolves with a new FaissStore instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const args: FaissLibArgs = { - docstore: dbConfig?.docstore, - }; - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Creates a new FaissStore from an existing FaissStore and an Embeddings - * object. - * @param targetIndex An existing FaissStore. - * @param embeddings An Embeddings object. - * @param dbConfig An optional configuration object for the document store. - * @returns A Promise that resolves with a new FaissStore instance. - */ - static async fromIndex( - targetIndex: FaissStore, - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const args: FaissLibArgs = { - docstore: dbConfig?.docstore, - }; - const instance = new this(embeddings, args); - await instance.mergeFrom(targetIndex); - return instance; - } - - static async importFaiss(): Promise<{ IndexFlatL2: typeof IndexFlatL2 }> { - try { - const { - default: { IndexFlatL2 }, - } = await import("faiss-node"); - - return { IndexFlatL2 }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (err: any) { - throw new Error( - `Could not import faiss-node. Please install faiss-node as a dependency with, e.g. \`npm install -S faiss-node\`.\n\nError: ${err?.message}` - ); - } - } - - static async importPickleparser(): Promise<{ - Parser: typeof Parser; - NameRegistry: typeof NameRegistry; - }> { - try { - const { - default: { Parser, NameRegistry }, - } = await import("pickleparser"); - - return { Parser, NameRegistry }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (err: any) { - throw new Error( - `Could not import pickleparser. Please install pickleparser as a dependency with, e.g. \`npm install -S pickleparser\`.\n\nError: ${err?.message}` - ); - } - } -} +export * from "@langchain/community/vectorstores/faiss"; diff --git a/langchain/src/vectorstores/googlevertexai.ts b/langchain/src/vectorstores/googlevertexai.ts index 0deaf7cfa5a3..dd4573f3feb8 100644 --- a/langchain/src/vectorstores/googlevertexai.ts +++ b/langchain/src/vectorstores/googlevertexai.ts @@ -1,737 +1 @@ -import * as uuid from "uuid"; -import flatten from "flat"; -import { GoogleAuth, GoogleAuthOptions } from "google-auth-library"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document, DocumentInput } from "../document.js"; -import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js"; -import { - AsyncCaller, - AsyncCallerCallOptions, - AsyncCallerParams, -} from "../util/async_caller.js"; -import { - GoogleVertexAIConnectionParams, - GoogleResponse, - GoogleAbstractedClientOpsMethod, -} from "../types/googlevertexai-types.js"; -import { Docstore } from "../schema/index.js"; - -/** - * Allows us to create IdDocument classes that contain the ID. - */ -export interface IdDocumentInput extends DocumentInput { - id?: string; -} - -/** - * A Document that optionally includes the ID of the document. - */ -export class IdDocument extends Document implements IdDocumentInput { - id?: string; - - constructor(fields: IdDocumentInput) { - super(fields); - this.id = fields.id; - } -} - -interface IndexEndpointConnectionParams - extends GoogleVertexAIConnectionParams { - indexEndpoint: string; -} - -interface DeployedIndex { - id: string; - index: string; - // There are other attributes, but we don't care about them right now -} - -interface IndexEndpointResponse extends GoogleResponse { - data: { - deployedIndexes: DeployedIndex[]; - publicEndpointDomainName: string; - // There are other attributes, but we don't care about them right now - }; -} - -class IndexEndpointConnection extends GoogleVertexAIConnection< - AsyncCallerCallOptions, - IndexEndpointResponse, - GoogleAuthOptions -> { - indexEndpoint: string; - - constructor(fields: IndexEndpointConnectionParams, caller: AsyncCaller) { - super(fields, caller, new GoogleAuth(fields.authOptions)); - - this.indexEndpoint = fields.indexEndpoint; - } - - async buildUrl(): Promise { - const projectId = await this.client.getProjectId(); - const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}`; - return url; - } - - buildMethod(): GoogleAbstractedClientOpsMethod { - return "GET"; - } - - async request( - options: AsyncCallerCallOptions - ): Promise { - return this._request(undefined, options); - } -} - -/** - * Used to represent parameters that are necessary to delete documents - * from the matching engine. These must be a list of string IDs - */ -export interface MatchingEngineDeleteParams { - ids: string[]; -} - -interface RemoveDatapointParams - extends GoogleVertexAIConnectionParams { - index: string; -} - -interface RemoveDatapointRequest { - datapointIds: string[]; -} - -interface RemoveDatapointResponse extends GoogleResponse { - // Should be empty -} - -class RemoveDatapointConnection extends GoogleVertexAIConnection< - AsyncCallerCallOptions, - RemoveDatapointResponse, - GoogleAuthOptions -> { - index: string; - - constructor(fields: RemoveDatapointParams, caller: AsyncCaller) { - super(fields, caller, new GoogleAuth(fields.authOptions)); - - this.index = fields.index; - } - - async buildUrl(): Promise { - const projectId = await this.client.getProjectId(); - const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:removeDatapoints`; - return url; - } - - buildMethod(): GoogleAbstractedClientOpsMethod { - return "POST"; - } - - async request( - datapointIds: string[], - options: AsyncCallerCallOptions - ): Promise { - const data: RemoveDatapointRequest = { - datapointIds, - }; - return this._request(data, options); - } -} - -interface UpsertDatapointParams - extends GoogleVertexAIConnectionParams { - index: string; -} - -export interface Restriction { - namespace: string; - allowList?: string[]; - denyList?: string[]; -} - -interface CrowdingTag { - crowdingAttribute: string; -} - -interface IndexDatapoint { - datapointId: string; - featureVector: number[]; - restricts?: Restriction[]; - crowdingTag?: CrowdingTag; -} - -interface UpsertDatapointRequest { - datapoints: IndexDatapoint[]; -} - -interface UpsertDatapointResponse extends GoogleResponse { - // Should be empty -} - -class UpsertDatapointConnection extends GoogleVertexAIConnection< - AsyncCallerCallOptions, - UpsertDatapointResponse, - GoogleAuthOptions -> { - index: string; - - constructor(fields: UpsertDatapointParams, caller: AsyncCaller) { - super(fields, caller, new GoogleAuth(fields.authOptions)); - - this.index = fields.index; - } - - async buildUrl(): Promise { - const projectId = await this.client.getProjectId(); - const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:upsertDatapoints`; - return url; - } - - buildMethod(): GoogleAbstractedClientOpsMethod { - return "POST"; - } - - async request( - datapoints: IndexDatapoint[], - options: AsyncCallerCallOptions - ): Promise { - const data: UpsertDatapointRequest = { - datapoints, - }; - return this._request(data, options); - } -} - -interface FindNeighborsConnectionParams - extends GoogleVertexAIConnectionParams { - indexEndpoint: string; - - deployedIndexId: string; -} - -interface FindNeighborsRequestQuery { - datapoint: { - datapointId: string; - featureVector: number[]; - restricts?: Restriction[]; - }; - neighborCount: number; -} - -interface FindNeighborsRequest { - deployedIndexId: string; - queries: FindNeighborsRequestQuery[]; -} - -interface FindNeighborsResponseNeighbor { - datapoint: { - datapointId: string; - crowdingTag: { - crowdingTagAttribute: string; - }; - }; - distance: number; -} - -interface FindNeighborsResponseNearestNeighbor { - id: string; - neighbors: FindNeighborsResponseNeighbor[]; -} - -interface FindNeighborsResponse extends GoogleResponse { - data: { - nearestNeighbors: FindNeighborsResponseNearestNeighbor[]; - }; -} - -class FindNeighborsConnection - extends GoogleVertexAIConnection< - AsyncCallerCallOptions, - FindNeighborsResponse, - GoogleAuthOptions - > - implements FindNeighborsConnectionParams -{ - indexEndpoint: string; - - deployedIndexId: string; - - constructor(params: FindNeighborsConnectionParams, caller: AsyncCaller) { - super(params, caller, new GoogleAuth(params.authOptions)); - - this.indexEndpoint = params.indexEndpoint; - this.deployedIndexId = params.deployedIndexId; - } - - async buildUrl(): Promise { - const projectId = await this.client.getProjectId(); - const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}:findNeighbors`; - return url; - } - - buildMethod(): GoogleAbstractedClientOpsMethod { - return "POST"; - } - - async request( - request: FindNeighborsRequest, - options: AsyncCallerCallOptions - ): Promise { - return this._request(request, options); - } -} - -/** - * Information about the Matching Engine public API endpoint. - * Primarily exported to allow for testing. - */ -export interface PublicAPIEndpointInfo { - apiEndpoint?: string; - - deployedIndexId?: string; -} - -/** - * Parameters necessary to configure the Matching Engine. - */ -export interface MatchingEngineArgs - extends GoogleVertexAIConnectionParams, - IndexEndpointConnectionParams, - UpsertDatapointParams { - docstore: Docstore; - - callerParams?: AsyncCallerParams; - - callerOptions?: AsyncCallerCallOptions; - - apiEndpoint?: string; - - deployedIndexId?: string; -} - -/** - * A class that represents a connection to a Google Vertex AI Matching Engine - * instance. - */ -export class MatchingEngine extends VectorStore implements MatchingEngineArgs { - declare FilterType: Restriction[]; - - /** - * Docstore that retains the document, stored by ID - */ - docstore: Docstore; - - /** - * The host to connect to for queries and upserts. - */ - apiEndpoint: string; - - apiVersion = "v1"; - - endpoint = "us-central1-aiplatform.googleapis.com"; - - location = "us-central1"; - - /** - * The id for the index endpoint - */ - indexEndpoint: string; - - /** - * The id for the index - */ - index: string; - - /** - * The id for the "deployed index", which is an identifier in the - * index endpoint that references the index (but is not the index id) - */ - deployedIndexId: string; - - callerParams: AsyncCallerParams; - - callerOptions: AsyncCallerCallOptions; - - caller: AsyncCaller; - - indexEndpointClient: IndexEndpointConnection; - - removeDatapointClient: RemoveDatapointConnection; - - upsertDatapointClient: UpsertDatapointConnection; - - constructor(embeddings: Embeddings, args: MatchingEngineArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - this.docstore = args.docstore; - - this.apiEndpoint = args.apiEndpoint ?? this.apiEndpoint; - this.deployedIndexId = args.deployedIndexId ?? this.deployedIndexId; - - this.apiVersion = args.apiVersion ?? this.apiVersion; - this.endpoint = args.endpoint ?? this.endpoint; - this.location = args.location ?? this.location; - this.indexEndpoint = args.indexEndpoint ?? this.indexEndpoint; - this.index = args.index ?? this.index; - - this.callerParams = args.callerParams ?? this.callerParams; - this.callerOptions = args.callerOptions ?? this.callerOptions; - this.caller = new AsyncCaller(this.callerParams || {}); - - const indexClientParams: IndexEndpointConnectionParams = { - endpoint: this.endpoint, - location: this.location, - apiVersion: this.apiVersion, - indexEndpoint: this.indexEndpoint, - }; - this.indexEndpointClient = new IndexEndpointConnection( - indexClientParams, - this.caller - ); - - const removeClientParams: RemoveDatapointParams = { - endpoint: this.endpoint, - location: this.location, - apiVersion: this.apiVersion, - index: this.index, - }; - this.removeDatapointClient = new RemoveDatapointConnection( - removeClientParams, - this.caller - ); - - const upsertClientParams: UpsertDatapointParams = { - endpoint: this.endpoint, - location: this.location, - apiVersion: this.apiVersion, - index: this.index, - }; - this.upsertDatapointClient = new UpsertDatapointConnection( - upsertClientParams, - this.caller - ); - } - - _vectorstoreType(): string { - return "googlevertexai"; - } - - async addDocuments(documents: Document[]): Promise { - const texts: string[] = documents.map((doc) => doc.pageContent); - const vectors: number[][] = await this.embeddings.embedDocuments(texts); - return this.addVectors(vectors, documents); - } - - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length !== documents.length) { - throw new Error(`Vectors and metadata must have the same length`); - } - const datapoints: IndexDatapoint[] = vectors.map((vector, idx) => - this.buildDatapoint(vector, documents[idx]) - ); - const options = {}; - const response = await this.upsertDatapointClient.request( - datapoints, - options - ); - if (Object.keys(response?.data ?? {}).length === 0) { - // Nothing in the response in the body means we saved it ok - const idDoc = documents as IdDocument[]; - const docsToStore: Record = {}; - idDoc.forEach((doc) => { - if (doc.id) { - docsToStore[doc.id] = doc; - } - }); - await this.docstore.add(docsToStore); - } - } - - // TODO: Refactor this into a utility type and use with pinecone as well? - // eslint-disable-next-line @typescript-eslint/no-explicit-any - cleanMetadata(documentMetadata: Record): { - [key: string]: string | number | boolean | string[] | null; - } { - type metadataType = { - [key: string]: string | number | boolean | string[] | null; - }; - - function getStringArrays( - prefix: string, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - m: Record - ): Record { - let ret: Record = {}; - - Object.keys(m).forEach((key) => { - const newPrefix = prefix.length > 0 ? `${prefix}.${key}` : key; - const val = m[key]; - if (!val) { - // Ignore it - } else if (Array.isArray(val)) { - // Make sure everything in the array is a string - ret[newPrefix] = val.map((v) => `${v}`); - } else if (typeof val === "object") { - const subArrays = getStringArrays(newPrefix, val); - ret = { ...ret, ...subArrays }; - } - }); - - return ret; - } - - const stringArrays: Record = getStringArrays( - "", - documentMetadata - ); - - const flatMetadata: metadataType = flatten(documentMetadata); - Object.keys(flatMetadata).forEach((key) => { - Object.keys(stringArrays).forEach((arrayKey) => { - const matchKey = `${arrayKey}.`; - if (key.startsWith(matchKey)) { - delete flatMetadata[key]; - } - }); - }); - - const metadata: metadataType = { - ...flatMetadata, - ...stringArrays, - }; - return metadata; - } - - /** - * Given the metadata from a document, convert it to an array of Restriction - * objects that may be passed to the Matching Engine and stored. - * The default implementation flattens any metadata and includes it as - * an "allowList". Subclasses can choose to convert some of these to - * "denyList" items or to add additional restrictions (for example, to format - * dates into a different structure or to add additional restrictions - * based on the date). - * @param documentMetadata - The metadata from a document - * @returns a Restriction[] (or an array of a subclass, from the FilterType) - */ - metadataToRestrictions( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - documentMetadata: Record - ): this["FilterType"] { - const metadata = this.cleanMetadata(documentMetadata); - - const restrictions: this["FilterType"] = []; - for (const key of Object.keys(metadata)) { - // Make sure the value is an array (or that we'll ignore it) - let valArray; - const val = metadata[key]; - if (val === null) { - valArray = null; - } else if (Array.isArray(val) && val.length > 0) { - valArray = val; - } else { - valArray = [`${val}`]; - } - - // Add to the restrictions if we do have a valid value - if (valArray) { - // Determine if this key is for the allowList or denyList - // TODO: get which ones should be on the deny list - const listType = "allowList"; - - // Create the restriction - const restriction: Restriction = { - namespace: key, - [listType]: valArray, - }; - - // Add it to the restriction list - restrictions.push(restriction); - } - } - return restrictions; - } - - /** - * Create an index datapoint for the vector and document id. - * If an id does not exist, create it and set the document to its value. - * @param vector - * @param document - */ - buildDatapoint(vector: number[], document: IdDocument): IndexDatapoint { - if (!document.id) { - // eslint-disable-next-line no-param-reassign - document.id = uuid.v4(); - } - const ret: IndexDatapoint = { - datapointId: document.id, - featureVector: vector, - }; - const restrictions = this.metadataToRestrictions(document.metadata); - if (restrictions?.length > 0) { - ret.restricts = restrictions; - } - return ret; - } - - async delete(params: MatchingEngineDeleteParams): Promise { - const options = {}; - await this.removeDatapointClient.request(params.ids, options); - } - - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - // Format the query into the request - const deployedIndexId = await this.getDeployedIndexId(); - const requestQuery: FindNeighborsRequestQuery = { - neighborCount: k, - datapoint: { - datapointId: `0`, - featureVector: query, - }, - }; - if (filter) { - requestQuery.datapoint.restricts = filter; - } - const request: FindNeighborsRequest = { - deployedIndexId, - queries: [requestQuery], - }; - - // Build the connection. - // Has to be done here, since we defer getting the endpoint until - // we need it. - const apiEndpoint = await this.getPublicAPIEndpoint(); - const findNeighborsParams: FindNeighborsConnectionParams = { - endpoint: apiEndpoint, - indexEndpoint: this.indexEndpoint, - apiVersion: this.apiVersion, - location: this.location, - deployedIndexId, - }; - const connection = new FindNeighborsConnection( - findNeighborsParams, - this.caller - ); - - // Make the call - const options = {}; - const response = await connection.request(request, options); - - // Get the document for each datapoint id and return them - const nearestNeighbors = response?.data?.nearestNeighbors ?? []; - const nearestNeighbor = nearestNeighbors[0]; - const neighbors = nearestNeighbor?.neighbors ?? []; - const ret: [Document, number][] = await Promise.all( - neighbors.map(async (neighbor) => { - const id = neighbor?.datapoint?.datapointId; - const distance = neighbor?.distance; - let doc: IdDocument; - try { - doc = await this.docstore.search(id); - } catch (xx) { - // Documents that are in the index are returned, even if they - // are not in the document store, to allow for some way to get - // the id so they can be deleted. - console.error(xx); - console.warn( - [ - `Document with id "${id}" is missing from the backing docstore.`, - `This can occur if you clear the docstore without deleting from the corresponding Matching Engine index.`, - `To resolve this, you should call .delete() with this id as part of the "ids" parameter.`, - ].join("\n") - ); - doc = new Document({ pageContent: `Missing document ${id}` }); - } - doc.id ??= id; - return [doc, distance]; - }) - ); - - return ret; - } - - /** - * For this index endpoint, figure out what API Endpoint URL and deployed - * index ID should be used to do upserts and queries. - * Also sets the `apiEndpoint` and `deployedIndexId` property for future use. - * @return The URL - */ - async determinePublicAPIEndpoint(): Promise { - const response: IndexEndpointResponse = - await this.indexEndpointClient.request(this.callerOptions); - - // Get the endpoint - const publicEndpointDomainName = response?.data?.publicEndpointDomainName; - this.apiEndpoint = publicEndpointDomainName; - - // Determine which of the deployed indexes match the index id - // and get the deployed index id. The list of deployed index ids - // contain the "index name" or path, but not the index id by itself, - // so we need to extract it from the name - const indexPathPattern = /projects\/.+\/locations\/.+\/indexes\/(.+)$/; - const deployedIndexes = response?.data?.deployedIndexes ?? []; - const deployedIndex = deployedIndexes.find((index) => { - const deployedIndexPath = index.index; - const match = deployedIndexPath.match(indexPathPattern); - if (match) { - const [, potentialIndexId] = match; - if (potentialIndexId === this.index) { - return true; - } - } - return false; - }); - if (deployedIndex) { - this.deployedIndexId = deployedIndex.id; - } - - return { - apiEndpoint: this.apiEndpoint, - deployedIndexId: this.deployedIndexId, - }; - } - - async getPublicAPIEndpoint(): Promise { - return ( - this.apiEndpoint ?? (await this.determinePublicAPIEndpoint()).apiEndpoint - ); - } - - async getDeployedIndexId(): Promise { - return ( - this.deployedIndexId ?? - (await this.determinePublicAPIEndpoint()).deployedIndexId - ); - } - - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: MatchingEngineArgs - ): Promise { - const docs: Document[] = texts.map( - (text, index): Document => ({ - pageContent: text, - metadata: Array.isArray(metadatas) ? metadatas[index] : metadatas, - }) - ); - return this.fromDocuments(docs, embeddings, dbConfig); - } - - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: MatchingEngineArgs - ): Promise { - const ret = new MatchingEngine(embeddings, dbConfig); - await ret.addDocuments(docs); - return ret; - } -} +export * from "@langchain/community/vectorstores/googlevertexai"; diff --git a/langchain/src/vectorstores/hnswlib.ts b/langchain/src/vectorstores/hnswlib.ts index 7f29eaaba44f..2d91e0d321c8 100644 --- a/langchain/src/vectorstores/hnswlib.ts +++ b/langchain/src/vectorstores/hnswlib.ts @@ -1,354 +1 @@ -import type { - HierarchicalNSW as HierarchicalNSWT, - SpaceName, -} from "hnswlib-node"; -import { Embeddings } from "../embeddings/base.js"; -import { SaveableVectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; - -/** - * Interface for the base configuration of HNSWLib. It includes the space - * name and the number of dimensions. - */ -export interface HNSWLibBase { - space: SpaceName; - numDimensions?: number; -} - -/** - * Interface for the arguments that can be passed to the HNSWLib - * constructor. It extends HNSWLibBase and includes properties for the - * document store and HNSW index. - */ -export interface HNSWLibArgs extends HNSWLibBase { - docstore?: SynchronousInMemoryDocstore; - index?: HierarchicalNSWT; -} - -/** - * Class that implements a vector store using Hierarchical Navigable Small - * World (HNSW) graphs. It extends the SaveableVectorStore class and - * provides methods for adding documents and vectors, performing - * similarity searches, and saving and loading the vector store. - */ -export class HNSWLib extends SaveableVectorStore { - declare FilterType: (doc: Document) => boolean; - - _index?: HierarchicalNSWT; - - docstore: SynchronousInMemoryDocstore; - - args: HNSWLibBase; - - _vectorstoreType(): string { - return "hnswlib"; - } - - constructor(embeddings: Embeddings, args: HNSWLibArgs) { - super(embeddings, args); - this._index = args.index; - this.args = args; - this.embeddings = embeddings; - this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); - } - - /** - * Method to add documents to the vector store. It first converts the - * documents to vectors using the embeddings, then adds the vectors to the - * vector store. - * @param documents The documents to be added to the vector store. - * @returns A Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - private static async getHierarchicalNSW(args: HNSWLibBase) { - const { HierarchicalNSW } = await HNSWLib.imports(); - if (!args.space) { - throw new Error("hnswlib-node requires a space argument"); - } - if (args.numDimensions === undefined) { - throw new Error("hnswlib-node requires a numDimensions argument"); - } - return new HierarchicalNSW(args.space, args.numDimensions); - } - - private async initIndex(vectors: number[][]) { - if (!this._index) { - if (this.args.numDimensions === undefined) { - this.args.numDimensions = vectors[0].length; - } - this.index = await HNSWLib.getHierarchicalNSW(this.args); - } - if (!this.index.getCurrentCount()) { - this.index.initIndex(vectors.length); - } - } - - public get index(): HierarchicalNSWT { - if (!this._index) { - throw new Error( - "Vector store not initialised yet. Try calling `addTexts` first." - ); - } - return this._index; - } - - private set index(index: HierarchicalNSWT) { - this._index = index; - } - - /** - * Method to add vectors to the vector store. It first initializes the - * index if it hasn't been initialized yet, then adds the vectors to the - * index and the documents to the document store. - * @param vectors The vectors to be added to the vector store. - * @param documents The documents corresponding to the vectors. - * @returns A Promise that resolves when the vectors and documents have been added. - */ - async addVectors(vectors: number[][], documents: Document[]) { - if (vectors.length === 0) { - return; - } - await this.initIndex(vectors); - - // TODO here we could optionally normalise the vectors to unit length - // so that dot product is equivalent to cosine similarity, like this - // https://github.com/nmslib/hnswlib/issues/384#issuecomment-1155737730 - // While we only support OpenAI embeddings this isn't necessary - if (vectors.length !== documents.length) { - throw new Error(`Vectors and metadatas must have the same length`); - } - if (vectors[0].length !== this.args.numDimensions) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${this.args.numDimensions})` - ); - } - const capacity = this.index.getMaxElements(); - const needed = this.index.getCurrentCount() + vectors.length; - if (needed > capacity) { - this.index.resizeIndex(needed); - } - const docstoreSize = this.index.getCurrentCount(); - const toSave: Record = {}; - for (let i = 0; i < vectors.length; i += 1) { - this.index.addPoint(vectors[i], docstoreSize + i); - toSave[docstoreSize + i] = documents[i]; - } - this.docstore.add(toSave); - } - - /** - * Method to perform a similarity search in the vector store using a query - * vector. It returns the k most similar documents along with their - * similarity scores. An optional filter function can be provided to - * filter the documents. - * @param query The query vector. - * @param k The number of most similar documents to return. - * @param filter An optional filter function to filter the documents. - * @returns A Promise that resolves to an array of tuples, where each tuple contains a document and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ) { - if (this.args.numDimensions && !this._index) { - await this.initIndex([[]]); - } - if (query.length !== this.args.numDimensions) { - throw new Error( - `Query vector must have the same length as the number of dimensions (${this.args.numDimensions})` - ); - } - if (k > this.index.getCurrentCount()) { - const total = this.index.getCurrentCount(); - console.warn( - `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` - ); - // eslint-disable-next-line no-param-reassign - k = total; - } - const filterFunction = (label: number): boolean => { - if (!filter) { - return true; - } - const document = this.docstore.search(String(label)); - // eslint-disable-next-line no-instanceof/no-instanceof - if (typeof document !== "string") { - return filter(document); - } - return false; - }; - const result = this.index.searchKnn( - query, - k, - filter ? filterFunction : undefined - ); - return result.neighbors.map( - (docIndex, resultIndex) => - [ - this.docstore.search(String(docIndex)), - result.distances[resultIndex], - ] as [Document, number] - ); - } - - /** - * Method to delete the vector store from a directory. It deletes the - * hnswlib.index file, the docstore.json file, and the args.json file from - * the directory. - * @param params An object with a directory property that specifies the directory from which to delete the vector store. - * @returns A Promise that resolves when the vector store has been deleted. - */ - async delete(params: { directory: string }) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - try { - await fs.access(path.join(params.directory, "hnswlib.index")); - } catch (err) { - throw new Error( - `Directory ${params.directory} does not contain a hnswlib.index file.` - ); - } - - await Promise.all([ - await fs.rm(path.join(params.directory, "hnswlib.index"), { - force: true, - }), - await fs.rm(path.join(params.directory, "docstore.json"), { - force: true, - }), - await fs.rm(path.join(params.directory, "args.json"), { force: true }), - ]); - } - - /** - * Method to save the vector store to a directory. It saves the HNSW - * index, the arguments, and the document store to the directory. - * @param directory The directory to which to save the vector store. - * @returns A Promise that resolves when the vector store has been saved. - */ - async save(directory: string) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - await fs.mkdir(directory, { recursive: true }); - await Promise.all([ - this.index.writeIndex(path.join(directory, "hnswlib.index")), - await fs.writeFile( - path.join(directory, "args.json"), - JSON.stringify(this.args) - ), - await fs.writeFile( - path.join(directory, "docstore.json"), - JSON.stringify(Array.from(this.docstore._docs.entries())) - ), - ]); - } - - /** - * Static method to load a vector store from a directory. It reads the - * HNSW index, the arguments, and the document store from the directory, - * then creates a new HNSWLib instance with these values. - * @param directory The directory from which to load the vector store. - * @param embeddings The embeddings to be used by the HNSWLib instance. - * @returns A Promise that resolves to a new HNSWLib instance. - */ - static async load(directory: string, embeddings: Embeddings) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - const args = JSON.parse( - await fs.readFile(path.join(directory, "args.json"), "utf8") - ); - const index = await HNSWLib.getHierarchicalNSW(args); - const [docstoreFiles] = await Promise.all([ - fs - .readFile(path.join(directory, "docstore.json"), "utf8") - .then(JSON.parse), - index.readIndex(path.join(directory, "hnswlib.index")), - ]); - args.docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles)); - - args.index = index; - - return new HNSWLib(embeddings, args); - } - - /** - * Static method to create a new HNSWLib instance from texts and metadata. - * It creates a new Document instance for each text and metadata, then - * calls the fromDocuments method to create the HNSWLib instance. - * @param texts The texts to be used to create the documents. - * @param metadatas The metadata to be used to create the documents. - * @param embeddings The embeddings to be used by the HNSWLib instance. - * @param dbConfig An optional configuration object for the document store. - * @returns A Promise that resolves to a new HNSWLib instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return HNSWLib.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a new HNSWLib instance from documents. It - * creates a new HNSWLib instance, adds the documents to it, then returns - * the instance. - * @param docs The documents to be added to the HNSWLib instance. - * @param embeddings The embeddings to be used by the HNSWLib instance. - * @param dbConfig An optional configuration object for the document store. - * @returns A Promise that resolves to a new HNSWLib instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const args: HNSWLibArgs = { - docstore: dbConfig?.docstore, - space: "cosine", - }; - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - static async imports(): Promise<{ - HierarchicalNSW: typeof HierarchicalNSWT; - }> { - try { - const { - default: { HierarchicalNSW }, - } = await import("hnswlib-node"); - - return { HierarchicalNSW }; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (err: any) { - throw new Error( - `Could not import hnswlib-node. Please install hnswlib-node as a dependency with, e.g. \`npm install -S hnswlib-node\`.\n\nError: ${err?.message}` - ); - } - } -} +export * from "@langchain/community/vectorstores/hnswlib"; diff --git a/langchain/src/vectorstores/lancedb.ts b/langchain/src/vectorstores/lancedb.ts index 398147ed0d3b..4b4b62d64a75 100644 --- a/langchain/src/vectorstores/lancedb.ts +++ b/langchain/src/vectorstores/lancedb.ts @@ -1,152 +1 @@ -import { Table } from "vectordb"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -/** - * Defines the arguments for the LanceDB class constructor. It includes a - * table and an optional textKey. - */ -export type LanceDBArgs = { - table: Table; - textKey?: string; -}; - -/** - * A wrapper for an open-source database for vector-search with persistent - * storage. It simplifies retrieval, filtering, and management of - * embeddings. - */ -export class LanceDB extends VectorStore { - private table: Table; - - private textKey: string; - - constructor(embeddings: Embeddings, args: LanceDBArgs) { - super(embeddings, args); - this.table = args.table; - this.embeddings = embeddings; - this.textKey = args.textKey || "text"; - } - - /** - * Adds documents to the database. - * @param documents The documents to be added. - * @returns A Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - _vectorstoreType(): string { - return "lancedb"; - } - - /** - * Adds vectors and their corresponding documents to the database. - * @param vectors The vectors to be added. - * @param documents The corresponding documents to be added. - * @returns A Promise that resolves when the vectors and documents have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - if (vectors.length !== documents.length) { - throw new Error(`Vectors and documents must have the same length`); - } - - const data: Array> = []; - for (let i = 0; i < documents.length; i += 1) { - const record = { - vector: vectors[i], - [this.textKey]: documents[i].pageContent, - }; - Object.keys(documents[i].metadata).forEach((metaKey) => { - record[metaKey] = documents[i].metadata[metaKey]; - }); - data.push(record); - } - await this.table.add(data); - } - - /** - * Performs a similarity search on the vectors in the database and returns - * the documents and their scores. - * @param query The query vector. - * @param k The number of results to return. - * @returns A Promise that resolves with an array of tuples, each containing a Document and its score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number - ): Promise<[Document, number][]> { - const results = await this.table.search(query).limit(k).execute(); - - const docsAndScore: [Document, number][] = []; - results.forEach((item) => { - const metadata: Record = {}; - Object.keys(item).forEach((key) => { - if (key !== "vector" && key !== "score" && key !== this.textKey) { - metadata[key] = item[key]; - } - }); - - docsAndScore.push([ - new Document({ - pageContent: item[this.textKey] as string, - metadata, - }), - item.score as number, - ]); - }); - return docsAndScore; - } - - /** - * Creates a new instance of LanceDB from texts. - * @param texts The texts to be converted into documents. - * @param metadatas The metadata for the texts. - * @param embeddings The embeddings to be managed. - * @param dbConfig The configuration for the LanceDB instance. - * @returns A Promise that resolves with a new instance of LanceDB. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: LanceDBArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return LanceDB.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new instance of LanceDB from documents. - * @param docs The documents to be added to the database. - * @param embeddings The embeddings to be managed. - * @param dbConfig The configuration for the LanceDB instance. - * @returns A Promise that resolves with a new instance of LanceDB. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: LanceDBArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/lancedb"; diff --git a/langchain/src/vectorstores/milvus.ts b/langchain/src/vectorstores/milvus.ts index 463da9e2e044..c99dfbc8ad45 100644 --- a/langchain/src/vectorstores/milvus.ts +++ b/langchain/src/vectorstores/milvus.ts @@ -1,674 +1 @@ -import * as uuid from "uuid"; -import { - MilvusClient, - DataType, - DataTypeMap, - ErrorCode, - FieldType, - ClientConfig, -} from "@zilliz/milvus2-sdk-node"; - -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface for the arguments required by the Milvus class constructor. - */ -export interface MilvusLibArgs { - collectionName?: string; - primaryField?: string; - vectorField?: string; - textField?: string; - url?: string; // db address - ssl?: boolean; - username?: string; - password?: string; - textFieldMaxLength?: number; - clientConfig?: ClientConfig; - autoId?: boolean; -} - -/** - * Type representing the type of index used in the Milvus database. - */ -type IndexType = - | "IVF_FLAT" - | "IVF_SQ8" - | "IVF_PQ" - | "HNSW" - | "RHNSW_FLAT" - | "RHNSW_SQ" - | "RHNSW_PQ" - | "IVF_HNSW" - | "ANNOY"; - -/** - * Interface for the parameters required to create an index in the Milvus - * database. - */ -interface IndexParam { - params: { nprobe?: number; ef?: number; search_k?: number }; -} - -interface InsertRow { - [x: string]: string | number[]; -} - -const MILVUS_PRIMARY_FIELD_NAME = "langchain_primaryid"; -const MILVUS_VECTOR_FIELD_NAME = "langchain_vector"; -const MILVUS_TEXT_FIELD_NAME = "langchain_text"; -const MILVUS_COLLECTION_NAME_PREFIX = "langchain_col"; - -/** - * Class for interacting with a Milvus database. Extends the VectorStore - * class. - */ -export class Milvus extends VectorStore { - get lc_secrets(): { [key: string]: string } { - return { - ssl: "MILVUS_SSL", - username: "MILVUS_USERNAME", - password: "MILVUS_PASSWORD", - }; - } - - declare FilterType: string; - - collectionName: string; - - numDimensions?: number; - - autoId?: boolean; - - primaryField: string; - - vectorField: string; - - textField: string; - - textFieldMaxLength: number; - - fields: string[]; - - client: MilvusClient; - - indexParams: Record = { - IVF_FLAT: { params: { nprobe: 10 } }, - IVF_SQ8: { params: { nprobe: 10 } }, - IVF_PQ: { params: { nprobe: 10 } }, - HNSW: { params: { ef: 10 } }, - RHNSW_FLAT: { params: { ef: 10 } }, - RHNSW_SQ: { params: { ef: 10 } }, - RHNSW_PQ: { params: { ef: 10 } }, - IVF_HNSW: { params: { nprobe: 10, ef: 10 } }, - ANNOY: { params: { search_k: 10 } }, - }; - - indexCreateParams = { - index_type: "HNSW", - metric_type: "L2", - params: JSON.stringify({ M: 8, efConstruction: 64 }), - }; - - indexSearchParams = JSON.stringify({ ef: 64 }); - - _vectorstoreType(): string { - return "milvus"; - } - - constructor(embeddings: Embeddings, args: MilvusLibArgs) { - super(embeddings, args); - this.embeddings = embeddings; - this.collectionName = args.collectionName ?? genCollectionName(); - this.textField = args.textField ?? MILVUS_TEXT_FIELD_NAME; - - this.autoId = args.autoId ?? true; - this.primaryField = args.primaryField ?? MILVUS_PRIMARY_FIELD_NAME; - this.vectorField = args.vectorField ?? MILVUS_VECTOR_FIELD_NAME; - - this.textFieldMaxLength = args.textFieldMaxLength ?? 0; - - this.fields = []; - - const url = args.url ?? getEnvironmentVariable("MILVUS_URL"); - const { - address = "", - username = "", - password = "", - ssl, - } = args.clientConfig || {}; - - // combine args clientConfig and env variables - const clientConfig: ClientConfig = { - ...(args.clientConfig || {}), - address: url || address, - username: args.username || username, - password: args.password || password, - ssl: args.ssl || ssl, - }; - - if (!clientConfig.address) { - throw new Error("Milvus URL address is not provided."); - } - this.client = new MilvusClient(clientConfig); - } - - /** - * Adds documents to the Milvus database. - * @param documents Array of Document instances to be added to the database. - * @returns Promise resolving to void. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Adds vectors to the Milvus database. - * @param vectors Array of vectors to be added to the database. - * @param documents Array of Document instances associated with the vectors. - * @returns Promise resolving to void. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - await this.ensureCollection(vectors, documents); - - const insertDatas: InsertRow[] = []; - // eslint-disable-next-line no-plusplus - for (let index = 0; index < vectors.length; index++) { - const vec = vectors[index]; - const doc = documents[index]; - const data: InsertRow = { - [this.textField]: doc.pageContent, - [this.vectorField]: vec, - }; - this.fields.forEach((field) => { - switch (field) { - case this.primaryField: - if (!this.autoId) { - if (doc.metadata[this.primaryField] === undefined) { - throw new Error( - `The Collection's primaryField is configured with autoId=false, thus its value must be provided through metadata.` - ); - } - data[field] = doc.metadata[this.primaryField]; - } - break; - case this.textField: - data[field] = doc.pageContent; - break; - case this.vectorField: - data[field] = vec; - break; - default: // metadata fields - if (doc.metadata[field] === undefined) { - throw new Error( - `The field "${field}" is not provided in documents[${index}].metadata.` - ); - } else if (typeof doc.metadata[field] === "object") { - data[field] = JSON.stringify(doc.metadata[field]); - } else { - data[field] = doc.metadata[field]; - } - break; - } - }); - - insertDatas.push(data); - } - - const insertResp = await this.client.insert({ - collection_name: this.collectionName, - fields_data: insertDatas, - }); - if (insertResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error inserting data: ${JSON.stringify(insertResp)}`); - } - await this.client.flushSync({ collection_names: [this.collectionName] }); - } - - /** - * Searches for vectors in the Milvus database that are similar to a given - * vector. - * @param query Vector to compare with the vectors in the database. - * @param k Number of similar vectors to return. - * @param filter Optional filter to apply to the search. - * @returns Promise resolving to an array of tuples, each containing a Document instance and a similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: string - ): Promise<[Document, number][]> { - const hasColResp = await this.client.hasCollection({ - collection_name: this.collectionName, - }); - if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error checking collection: ${hasColResp}`); - } - if (hasColResp.value === false) { - throw new Error( - `Collection not found: ${this.collectionName}, please create collection before search.` - ); - } - - const filterStr = filter ?? ""; - - await this.grabCollectionFields(); - - const loadResp = await this.client.loadCollectionSync({ - collection_name: this.collectionName, - }); - if (loadResp.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error loading collection: ${loadResp}`); - } - - // clone this.field and remove vectorField - const outputFields = this.fields.filter( - (field) => field !== this.vectorField - ); - - const searchResp = await this.client.search({ - collection_name: this.collectionName, - search_params: { - anns_field: this.vectorField, - topk: k.toString(), - metric_type: this.indexCreateParams.metric_type, - params: this.indexSearchParams, - }, - output_fields: outputFields, - vector_type: DataType.FloatVector, - vectors: [query], - filter: filterStr, - }); - if (searchResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error searching data: ${JSON.stringify(searchResp)}`); - } - const results: [Document, number][] = []; - searchResp.results.forEach((result) => { - const fields = { - pageContent: "", - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadata: {} as Record, - }; - Object.keys(result).forEach((key) => { - if (key === this.textField) { - fields.pageContent = result[key]; - } else if (this.fields.includes(key) || key === this.primaryField) { - if (typeof result[key] === "string") { - const { isJson, obj } = checkJsonString(result[key]); - fields.metadata[key] = isJson ? obj : result[key]; - } else { - fields.metadata[key] = result[key]; - } - } - }); - results.push([new Document(fields), result.score]); - }); - // console.log("Search result: " + JSON.stringify(results, null, 2)); - return results; - } - - /** - * Ensures that a collection exists in the Milvus database. - * @param vectors Optional array of vectors to be used if a new collection needs to be created. - * @param documents Optional array of Document instances to be used if a new collection needs to be created. - * @returns Promise resolving to void. - */ - async ensureCollection(vectors?: number[][], documents?: Document[]) { - const hasColResp = await this.client.hasCollection({ - collection_name: this.collectionName, - }); - if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error( - `Error checking collection: ${JSON.stringify(hasColResp, null, 2)}` - ); - } - - if (hasColResp.value === false) { - if (vectors === undefined || documents === undefined) { - throw new Error( - `Collection not found: ${this.collectionName}, please provide vectors and documents to create collection.` - ); - } - await this.createCollection(vectors, documents); - } else { - await this.grabCollectionFields(); - } - } - - /** - * Creates a collection in the Milvus database. - * @param vectors Array of vectors to be added to the new collection. - * @param documents Array of Document instances to be added to the new collection. - * @returns Promise resolving to void. - */ - async createCollection( - vectors: number[][], - documents: Document[] - ): Promise { - const fieldList: FieldType[] = []; - - fieldList.push(...createFieldTypeForMetadata(documents, this.primaryField)); - - fieldList.push( - { - name: this.primaryField, - description: "Primary key", - data_type: DataType.Int64, - is_primary_key: true, - autoID: this.autoId, - }, - { - name: this.textField, - description: "Text field", - data_type: DataType.VarChar, - type_params: { - max_length: - this.textFieldMaxLength > 0 - ? this.textFieldMaxLength.toString() - : getTextFieldMaxLength(documents).toString(), - }, - }, - { - name: this.vectorField, - description: "Vector field", - data_type: DataType.FloatVector, - type_params: { - dim: getVectorFieldDim(vectors).toString(), - }, - } - ); - - fieldList.forEach((field) => { - if (!field.autoID) { - this.fields.push(field.name); - } - }); - - const createRes = await this.client.createCollection({ - collection_name: this.collectionName, - fields: fieldList, - }); - - if (createRes.error_code !== ErrorCode.SUCCESS) { - console.log(createRes); - throw new Error(`Failed to create collection: ${createRes}`); - } - - await this.client.createIndex({ - collection_name: this.collectionName, - field_name: this.vectorField, - extra_params: this.indexCreateParams, - }); - } - - /** - * Retrieves the fields of a collection in the Milvus database. - * @returns Promise resolving to void. - */ - async grabCollectionFields(): Promise { - if (!this.collectionName) { - throw new Error("Need collection name to grab collection fields"); - } - if ( - this.primaryField && - this.vectorField && - this.textField && - this.fields.length > 0 - ) { - return; - } - const desc = await this.client.describeCollection({ - collection_name: this.collectionName, - }); - desc.schema.fields.forEach((field) => { - this.fields.push(field.name); - if (field.autoID) { - const index = this.fields.indexOf(field.name); - if (index !== -1) { - this.fields.splice(index, 1); - } - } - if (field.is_primary_key) { - this.primaryField = field.name; - } - const dtype = DataTypeMap[field.data_type]; - if (dtype === DataType.FloatVector || dtype === DataType.BinaryVector) { - this.vectorField = field.name; - } - - if (dtype === DataType.VarChar && field.name === MILVUS_TEXT_FIELD_NAME) { - this.textField = field.name; - } - }); - } - - /** - * Creates a Milvus instance from a set of texts and their associated - * metadata. - * @param texts Array of texts to be added to the database. - * @param metadatas Array of metadata objects associated with the texts. - * @param embeddings Embeddings instance used to generate vector embeddings for the texts. - * @param dbConfig Optional configuration for the Milvus database. - * @returns Promise resolving to a new Milvus instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: MilvusLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return Milvus.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a Milvus instance from a set of Document instances. - * @param docs Array of Document instances to be added to the database. - * @param embeddings Embeddings instance used to generate vector embeddings for the documents. - * @param dbConfig Optional configuration for the Milvus database. - * @returns Promise resolving to a new Milvus instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: MilvusLibArgs - ): Promise { - const args: MilvusLibArgs = { - collectionName: dbConfig?.collectionName || genCollectionName(), - url: dbConfig?.url, - ssl: dbConfig?.ssl, - username: dbConfig?.username, - password: dbConfig?.password, - textField: dbConfig?.textField, - primaryField: dbConfig?.primaryField, - vectorField: dbConfig?.vectorField, - clientConfig: dbConfig?.clientConfig, - autoId: dbConfig?.autoId, - }; - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Creates a Milvus instance from an existing collection in the Milvus - * database. - * @param embeddings Embeddings instance used to generate vector embeddings for the documents in the collection. - * @param dbConfig Configuration for the Milvus database. - * @returns Promise resolving to a new Milvus instance. - */ - static async fromExistingCollection( - embeddings: Embeddings, - dbConfig: MilvusLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.ensureCollection(); - return instance; - } - - /** - * Deletes data from the Milvus database. - * @param params Object containing a filter to apply to the deletion. - * @returns Promise resolving to void. - */ - async delete(params: { filter: string }): Promise { - const hasColResp = await this.client.hasCollection({ - collection_name: this.collectionName, - }); - if (hasColResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error checking collection: ${hasColResp}`); - } - if (hasColResp.value === false) { - throw new Error( - `Collection not found: ${this.collectionName}, please create collection before search.` - ); - } - - const { filter } = params; - - const deleteResp = await this.client.deleteEntities({ - collection_name: this.collectionName, - expr: filter, - }); - - if (deleteResp.status.error_code !== ErrorCode.SUCCESS) { - throw new Error(`Error deleting data: ${JSON.stringify(deleteResp)}`); - } - } -} - -function createFieldTypeForMetadata( - documents: Document[], - primaryFieldName: string -): FieldType[] { - const sampleMetadata = documents[0].metadata; - let textFieldMaxLength = 0; - let jsonFieldMaxLength = 0; - documents.forEach(({ metadata }) => { - // check all keys name and count in metadata is same as sampleMetadata - Object.keys(metadata).forEach((key) => { - if ( - !(key in metadata) || - typeof metadata[key] !== typeof sampleMetadata[key] - ) { - throw new Error( - "All documents must have same metadata keys and datatype" - ); - } - - // find max length of string field and json field, cache json string value - if (typeof metadata[key] === "string") { - if (metadata[key].length > textFieldMaxLength) { - textFieldMaxLength = metadata[key].length; - } - } else if (typeof metadata[key] === "object") { - const json = JSON.stringify(metadata[key]); - if (json.length > jsonFieldMaxLength) { - jsonFieldMaxLength = json.length; - } - } - }); - }); - - const fields: FieldType[] = []; - for (const [key, value] of Object.entries(sampleMetadata)) { - const type = typeof value; - - if (key === primaryFieldName) { - /** - * skip primary field - * because we will create primary field in createCollection - * */ - } else if (type === "string") { - fields.push({ - name: key, - description: `Metadata String field`, - data_type: DataType.VarChar, - type_params: { - max_length: textFieldMaxLength.toString(), - }, - }); - } else if (type === "number") { - fields.push({ - name: key, - description: `Metadata Number field`, - data_type: DataType.Float, - }); - } else if (type === "boolean") { - fields.push({ - name: key, - description: `Metadata Boolean field`, - data_type: DataType.Bool, - }); - } else if (value === null) { - // skip - } else { - // use json for other types - try { - fields.push({ - name: key, - description: `Metadata JSON field`, - data_type: DataType.VarChar, - type_params: { - max_length: jsonFieldMaxLength.toString(), - }, - }); - } catch (e) { - throw new Error("Failed to parse metadata field as JSON"); - } - } - } - return fields; -} - -function genCollectionName(): string { - return `${MILVUS_COLLECTION_NAME_PREFIX}_${uuid.v4().replaceAll("-", "")}`; -} - -function getTextFieldMaxLength(documents: Document[]) { - let textMaxLength = 0; - const textEncoder = new TextEncoder(); - // eslint-disable-next-line no-plusplus - for (let i = 0; i < documents.length; i++) { - const text = documents[i].pageContent; - const textLengthInBytes = textEncoder.encode(text).length; - if (textLengthInBytes > textMaxLength) { - textMaxLength = textLengthInBytes; - } - } - return textMaxLength; -} - -function getVectorFieldDim(vectors: number[][]) { - if (vectors.length === 0) { - throw new Error("No vectors found"); - } - return vectors[0].length; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function checkJsonString(value: string): { isJson: boolean; obj: any } { - try { - const result = JSON.parse(value); - return { isJson: true, obj: result }; - } catch (e) { - return { isJson: false, obj: null }; - } -} +export * from "@langchain/community/vectorstores/milvus"; diff --git a/langchain/src/vectorstores/momento_vector_index.ts b/langchain/src/vectorstores/momento_vector_index.ts index afc13a1b8248..ad282e0176ea 100644 --- a/langchain/src/vectorstores/momento_vector_index.ts +++ b/langchain/src/vectorstores/momento_vector_index.ts @@ -1,399 +1 @@ -/* eslint-disable no-instanceof/no-instanceof */ -/* eslint-disable @typescript-eslint/no-explicit-any */ -import { - ALL_VECTOR_METADATA, - IVectorIndexClient, - VectorIndexItem, - CreateVectorIndex, - VectorUpsertItemBatch, - VectorDeleteItemBatch, - VectorSearch, - VectorSearchAndFetchVectors, -} from "@gomomento/sdk-core"; -import * as uuid from "uuid"; -import { Document } from "../document.js"; -import { Embeddings } from "../embeddings/base.js"; -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { maximalMarginalRelevance } from "../util/math.js"; - -export interface DocumentProps { - ids: string[]; -} - -export interface MomentoVectorIndexLibArgs { - /** - * The Momento Vector Index client. - */ - client: IVectorIndexClient; - /** - * The name of the index to use to store the data. - * Defaults to "default". - */ - indexName?: string; - /** - * The name of the metadata field to use to store the text of the document. - * Defaults to "text". - */ - textField?: string; - /** - * Whether to create the index if it does not already exist. - * Defaults to true. - */ - ensureIndexExists?: boolean; -} - -export interface DeleteProps { - /** - * The ids of the documents to delete. - */ - ids: string[]; -} - -/** - * A vector store that uses the Momento Vector Index. - * - * @remarks - * To sign up for a free Momento account, visit https://console.gomomento.com. - */ -export class MomentoVectorIndex extends VectorStore { - private client: IVectorIndexClient; - - private indexName: string; - - private textField: string; - - private _ensureIndexExists: boolean; - - _vectorstoreType(): string { - return "momento"; - } - - /** - * Creates a new `MomentoVectorIndex` instance. - * @param embeddings The embeddings instance to use to generate embeddings from documents. - * @param args The arguments to use to configure the vector store. - */ - constructor(embeddings: Embeddings, args: MomentoVectorIndexLibArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - this.client = args.client; - this.indexName = args.indexName ?? "default"; - this.textField = args.textField ?? "text"; - this._ensureIndexExists = args.ensureIndexExists ?? true; - } - - /** - * Returns the Momento Vector Index client. - * @returns The Momento Vector Index client. - */ - public getClient(): IVectorIndexClient { - return this.client; - } - - /** - * Creates the index if it does not already exist. - * @param numDimensions The number of dimensions of the vectors to be stored in the index. - * @returns Promise that resolves to true if the index was created, false if it already existed. - */ - private async ensureIndexExists(numDimensions: number): Promise { - const response = await this.client.createIndex( - this.indexName, - numDimensions - ); - if (response instanceof CreateVectorIndex.Success) { - return true; - } else if (response instanceof CreateVectorIndex.AlreadyExists) { - return false; - } else if (response instanceof CreateVectorIndex.Error) { - throw new Error(response.toString()); - } else { - throw new Error(`Unknown response type: ${response.toString()}`); - } - } - - /** - * Converts the documents to a format that can be stored in the index. - * - * This is necessary because the Momento Vector Index requires that the metadata - * be a map of strings to strings. - * @param vectors The vectors to convert. - * @param documents The documents to convert. - * @param ids The ids to convert. - * @returns The converted documents. - */ - private prepareItemBatch( - vectors: number[][], - documents: Document>[], - ids: string[] - ): VectorIndexItem[] { - return vectors.map((vector, idx) => ({ - id: ids[idx], - vector, - metadata: { - ...documents[idx].metadata, - [this.textField]: documents[idx].pageContent, - }, - })); - } - - /** - * Adds vectors to the index. - * - * @remarks If the index does not already exist, it will be created if `ensureIndexExists` is true. - * @param vectors The vectors to add to the index. - * @param documents The documents to add to the index. - * @param documentProps The properties of the documents to add to the index, specifically the ids. - * @returns Promise that resolves when the vectors have been added to the index. Also returns the ids of the - * documents that were added. - */ - public async addVectors( - vectors: number[][], - documents: Document>[], - documentProps?: DocumentProps - ): Promise { - if (vectors.length === 0) { - return; - } - - if (documents.length !== vectors.length) { - throw new Error( - `Number of vectors (${vectors.length}) does not equal number of documents (${documents.length})` - ); - } - - if (vectors.some((v) => v.length !== vectors[0].length)) { - throw new Error("All vectors must have the same length"); - } - - if ( - documentProps?.ids !== undefined && - documentProps.ids.length !== vectors.length - ) { - throw new Error( - `Number of ids (${ - documentProps?.ids?.length || "null" - }) does not equal number of vectors (${vectors.length})` - ); - } - - if (this._ensureIndexExists) { - await this.ensureIndexExists(vectors[0].length); - } - const documentIds = documentProps?.ids ?? documents.map(() => uuid.v4()); - - const batchSize = 128; - const numBatches = Math.ceil(vectors.length / batchSize); - - // Add each batch of vectors to the index - for (let i = 0; i < numBatches; i += 1) { - const [startIndex, endIndex] = [ - i * batchSize, - Math.min((i + 1) * batchSize, vectors.length), - ]; - - const batchVectors = vectors.slice(startIndex, endIndex); - const batchDocuments = documents.slice(startIndex, endIndex); - const batchDocumentIds = documentIds.slice(startIndex, endIndex); - - // Insert the items to the index - const response = await this.client.upsertItemBatch( - this.indexName, - this.prepareItemBatch(batchVectors, batchDocuments, batchDocumentIds) - ); - if (response instanceof VectorUpsertItemBatch.Success) { - // eslint-disable-next-line no-continue - continue; - } else if (response instanceof VectorUpsertItemBatch.Error) { - throw new Error(response.toString()); - } else { - throw new Error(`Unknown response type: ${response.toString()}`); - } - } - } - - /** - * Adds vectors to the index. Generates embeddings from the documents - * using the `Embeddings` instance passed to the constructor. - * @param documents Array of `Document` instances to be added to the index. - * @returns Promise that resolves when the documents have been added to the index. - */ - async addDocuments( - documents: Document[], - documentProps?: DocumentProps - ): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - documentProps - ); - } - - /** - * Deletes vectors from the index by id. - * @param params The parameters to use to delete the vectors, specifically the ids. - */ - public async delete(params: DeleteProps): Promise { - const response = await this.client.deleteItemBatch( - this.indexName, - params.ids - ); - if (response instanceof VectorDeleteItemBatch.Success) { - // pass - } else if (response instanceof VectorDeleteItemBatch.Error) { - throw new Error(response.toString()); - } else { - throw new Error(`Unknown response type: ${response.toString()}`); - } - } - - /** - * Searches the index for the most similar vectors to the query vector. - * @param query The query vector. - * @param k The number of results to return. - * @returns Promise that resolves to the documents of the most similar vectors - * to the query vector. - */ - public async similaritySearchVectorWithScore( - query: number[], - k: number - ): Promise<[Document>, number][]> { - const response = await this.client.search(this.indexName, query, { - topK: k, - metadataFields: ALL_VECTOR_METADATA, - }); - if (response instanceof VectorSearch.Success) { - if (response.hits === undefined) { - return []; - } - - return response.hits().map((hit) => [ - new Document({ - pageContent: hit.metadata[this.textField]?.toString() ?? "", - metadata: Object.fromEntries( - Object.entries(hit.metadata).filter( - ([key]) => key !== this.textField - ) - ), - }), - hit.score, - ]); - } else if (response instanceof VectorSearch.Error) { - throw new Error(response.toString()); - } else { - throw new Error(`Unknown response type: ${response.toString()}`); - } - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {this["FilterType"]} options.filter - Optional filter - * @param _callbacks - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const queryEmbedding = await this.embeddings.embedQuery(query); - const response = await this.client.searchAndFetchVectors( - this.indexName, - queryEmbedding, - { topK: options.fetchK ?? 20, metadataFields: ALL_VECTOR_METADATA } - ); - - if (response instanceof VectorSearchAndFetchVectors.Success) { - const hits = response.hits(); - - // Gather the embeddings of the search results - const embeddingList = hits.map((hit) => hit.vector); - - // Gather the ids of the most relevant results when applying MMR - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - options.lambda, - options.k - ); - - const finalResult = mmrIndexes.map((index) => { - const hit = hits[index]; - const { [this.textField]: pageContent, ...metadata } = hit.metadata; - return new Document({ metadata, pageContent: pageContent as string }); - }); - return finalResult; - } else if (response instanceof VectorSearchAndFetchVectors.Error) { - throw new Error(response.toString()); - } else { - throw new Error(`Unknown response type: ${response.toString()}`); - } - } - - /** - * Stores the documents in the index. - * - * Converts the documents to vectors using the `Embeddings` instance passed. - * @param texts The texts to store in the index. - * @param metadatas The metadata to store in the index. - * @param embeddings The embeddings instance to use to generate embeddings from the documents. - * @param dbConfig The configuration to use to instantiate the vector store. - * @param documentProps The properties of the documents to add to the index, specifically the ids. - * @returns Promise that resolves to the vector store. - */ - public static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: MomentoVectorIndexLibArgs, - documentProps?: DocumentProps - ): Promise { - if (Array.isArray(metadatas) && texts.length !== metadatas.length) { - throw new Error( - `Number of texts (${texts.length}) does not equal number of metadatas (${metadatas.length})` - ); - } - - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const metadata: object = Array.isArray(metadatas) - ? metadatas[i] - : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return await this.fromDocuments(docs, embeddings, dbConfig, documentProps); - } - - /** - * Stores the documents in the index. - * @param docs The documents to store in the index. - * @param embeddings The embeddings instance to use to generate embeddings from the documents. - * @param dbConfig The configuration to use to instantiate the vector store. - * @param documentProps The properties of the documents to add to the index, specifically the ids. - * @returns Promise that resolves to the vector store. - */ - public static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: MomentoVectorIndexLibArgs, - documentProps?: DocumentProps - ): Promise { - const vectorStore = new MomentoVectorIndex(embeddings, dbConfig); - await vectorStore.addDocuments(docs, documentProps); - return vectorStore; - } -} +export * from "@langchain/community/vectorstores/momento_vector_index"; diff --git a/langchain/src/vectorstores/mongodb_atlas.ts b/langchain/src/vectorstores/mongodb_atlas.ts index 4815330bddb4..3d211e633bd9 100755 --- a/langchain/src/vectorstores/mongodb_atlas.ts +++ b/langchain/src/vectorstores/mongodb_atlas.ts @@ -1,279 +1 @@ -import type { Collection, Document as MongoDBDocument } from "mongodb"; -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { maximalMarginalRelevance } from "../util/math.js"; - -/** - * Type that defines the arguments required to initialize the - * MongoDBAtlasVectorSearch class. It includes the MongoDB collection, - * index name, text key, and embedding key. - */ -export type MongoDBAtlasVectorSearchLibArgs = { - readonly collection: Collection; - readonly indexName?: string; - readonly textKey?: string; - readonly embeddingKey?: string; -}; - -/** - * Type that defines the filter used in the - * similaritySearchVectorWithScore and maxMarginalRelevanceSearch methods. - * It includes pre-filter, post-filter pipeline, and a flag to include - * embeddings. - */ -type MongoDBAtlasFilter = { - preFilter?: MongoDBDocument; - postFilterPipeline?: MongoDBDocument[]; - includeEmbeddings?: boolean; -} & MongoDBDocument; - -/** - * Class that is a wrapper around MongoDB Atlas Vector Search. It is used - * to store embeddings in MongoDB documents, create a vector search index, - * and perform K-Nearest Neighbors (KNN) search with an approximate - * nearest neighbor algorithm. - */ -export class MongoDBAtlasVectorSearch extends VectorStore { - declare FilterType: MongoDBAtlasFilter; - - private readonly collection: Collection; - - private readonly indexName: string; - - private readonly textKey: string; - - private readonly embeddingKey: string; - - _vectorstoreType(): string { - return "mongodb_atlas"; - } - - constructor(embeddings: Embeddings, args: MongoDBAtlasVectorSearchLibArgs) { - super(embeddings, args); - this.collection = args.collection; - this.indexName = args.indexName ?? "default"; - this.textKey = args.textKey ?? "text"; - this.embeddingKey = args.embeddingKey ?? "embedding"; - } - - /** - * Method to add vectors and their corresponding documents to the MongoDB - * collection. - * @param vectors Vectors to be added. - * @param documents Corresponding documents to be added. - * @returns Promise that resolves when the vectors and documents have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - const docs = vectors.map((embedding, idx) => ({ - [this.textKey]: documents[idx].pageContent, - [this.embeddingKey]: embedding, - ...documents[idx].metadata, - })); - await this.collection.insertMany(docs); - } - - /** - * Method to add documents to the MongoDB collection. It first converts - * the documents to vectors using the embeddings and then calls the - * addVectors method. - * @param documents Documents to be added. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method that performs a similarity search on the vectors stored in the - * MongoDB collection. It returns a list of documents and their - * corresponding similarity scores. - * @param query Query vector for the similarity search. - * @param k Number of nearest neighbors to return. - * @param filter Optional filter to be applied. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: MongoDBAtlasFilter - ): Promise<[Document, number][]> { - const postFilterPipeline = filter?.postFilterPipeline ?? []; - const preFilter: MongoDBDocument | undefined = - filter?.preFilter || - filter?.postFilterPipeline || - filter?.includeEmbeddings - ? filter.preFilter - : filter; - const removeEmbeddingsPipeline = !filter?.includeEmbeddings - ? [ - { - $project: { - [this.embeddingKey]: 0, - }, - }, - ] - : []; - - const pipeline: MongoDBDocument[] = [ - { - $vectorSearch: { - queryVector: MongoDBAtlasVectorSearch.fixArrayPrecision(query), - index: this.indexName, - path: this.embeddingKey, - limit: k, - numCandidates: 10 * k, - ...(preFilter && { filter: preFilter }), - }, - }, - { - $set: { - score: { $meta: "vectorSearchScore" }, - }, - }, - ...removeEmbeddingsPipeline, - ...postFilterPipeline, - ]; - - const results = this.collection - .aggregate(pipeline) - .map<[Document, number]>((result) => { - const { score, [this.textKey]: text, ...metadata } = result; - return [new Document({ pageContent: text, metadata }), score]; - }); - - return results.toArray(); - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {MongoDBAtlasFilter} options.filter - Optional Atlas Search operator to pre-filter on document fields - * or post-filter following the knnBeta search. - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const { k, fetchK = 20, lambda = 0.5, filter } = options; - - const queryEmbedding = await this.embeddings.embedQuery(query); - - // preserve the original value of includeEmbeddings - const includeEmbeddingsFlag = options.filter?.includeEmbeddings || false; - - // update filter to include embeddings, as they will be used in MMR - const includeEmbeddingsFilter = { - ...filter, - includeEmbeddings: true, - }; - - const resultDocs = await this.similaritySearchVectorWithScore( - MongoDBAtlasVectorSearch.fixArrayPrecision(queryEmbedding), - fetchK, - includeEmbeddingsFilter - ); - - const embeddingList = resultDocs.map( - (doc) => doc[0].metadata[this.embeddingKey] - ); - - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - lambda, - k - ); - - return mmrIndexes.map((idx) => { - const doc = resultDocs[idx][0]; - - // remove embeddings if they were not requested originally - if (!includeEmbeddingsFlag) { - delete doc.metadata[this.embeddingKey]; - } - return doc; - }); - } - - /** - * Static method to create an instance of MongoDBAtlasVectorSearch from a - * list of texts. It first converts the texts to vectors and then adds - * them to the MongoDB collection. - * @param texts List of texts to be converted to vectors. - * @param metadatas Metadata for the texts. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for MongoDB Atlas. - * @returns Promise that resolves to a new instance of MongoDBAtlasVectorSearch. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: MongoDBAtlasVectorSearchLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return MongoDBAtlasVectorSearch.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create an instance of MongoDBAtlasVectorSearch from a - * list of documents. It first converts the documents to vectors and then - * adds them to the MongoDB collection. - * @param docs List of documents to be converted to vectors. - * @param embeddings Embeddings to be used for conversion. - * @param dbConfig Database configuration for MongoDB Atlas. - * @returns Promise that resolves to a new instance of MongoDBAtlasVectorSearch. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: MongoDBAtlasVectorSearchLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to fix the precision of the array that ensures that - * every number in this array is always float when casted to other types. - * This is needed since MongoDB Atlas Vector Search does not cast integer - * inside vector search to float automatically. - * This method shall introduce a hint of error but should be safe to use - * since introduced error is very small, only applies to integer numbers - * returned by embeddings, and most embeddings shall not have precision - * as high as 15 decimal places. - * @param array Array of number to be fixed. - * @returns - */ - static fixArrayPrecision(array: number[]) { - return array.map((value) => { - if (Number.isInteger(value)) { - return value + 0.000000000000001; - } - return value; - }); - } -} +export * from "@langchain/community/vectorstores/mongodb_atlas"; diff --git a/langchain/src/vectorstores/myscale.ts b/langchain/src/vectorstores/myscale.ts index 3fdd997c44a0..d1c1a8cda53e 100644 --- a/langchain/src/vectorstores/myscale.ts +++ b/langchain/src/vectorstores/myscale.ts @@ -1,314 +1 @@ -import * as uuid from "uuid"; -import { ClickHouseClient, createClient } from "@clickhouse/client"; - -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -/** - * Arguments for the MyScaleStore class, which include the host, port, - * protocol, username, password, index type, index parameters, column map, - * database, table, and metric. - */ -export interface MyScaleLibArgs { - host: string; - port: string | number; - protocol?: string; - username: string; - password: string; - indexType?: string; - indexParam?: Record; - columnMap?: ColumnMap; - database?: string; - table?: string; - metric?: metric; -} - -/** - * Mapping of columns in the MyScale database. - */ -export interface ColumnMap { - id: string; - text: string; - vector: string; - metadata: string; -} - -/** - * Type of metric used in the MyScale database. - */ -export type metric = "L2" | "Cosine" | "IP"; - -/** - * Type for filtering search results in the MyScale database. - */ -export interface MyScaleFilter { - whereStr: string; -} - -/** - * Class for interacting with the MyScale database. It extends the - * VectorStore class and provides methods for adding vectors and - * documents, searching for similar vectors, and creating instances from - * texts or documents. - */ -export class MyScaleStore extends VectorStore { - declare FilterType: MyScaleFilter; - - private client: ClickHouseClient; - - private indexType: string; - - private indexParam: Record; - - private columnMap: ColumnMap; - - private database: string; - - private table: string; - - private metric: metric; - - private isInitialized = false; - - _vectorstoreType(): string { - return "myscale"; - } - - constructor(embeddings: Embeddings, args: MyScaleLibArgs) { - super(embeddings, args); - - this.indexType = args.indexType || "MSTG"; - this.indexParam = args.indexParam || {}; - this.columnMap = args.columnMap || { - id: "id", - text: "text", - vector: "vector", - metadata: "metadata", - }; - this.database = args.database || "default"; - this.table = args.table || "vector_table"; - this.metric = args.metric || "Cosine"; - - this.client = createClient({ - host: `${args.protocol ?? "https://"}${args.host}:${args.port}`, - username: args.username, - password: args.password, - session_id: uuid.v4(), - }); - } - - /** - * Method to add vectors to the MyScale database. - * @param vectors The vectors to add. - * @param documents The documents associated with the vectors. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - - if (!this.isInitialized) { - await this.initialize(vectors[0].length); - } - - const queryStr = this.buildInsertQuery(vectors, documents); - await this.client.exec({ query: queryStr }); - } - - /** - * Method to add documents to the MyScale database. - * @param documents The documents to add. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - return this.addVectors( - await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), - documents - ); - } - - /** - * Method to search for vectors that are similar to a given query vector. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter Optional filter for the search results. - * @returns Promise that resolves with an array of tuples, each containing a Document and a score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - if (!this.isInitialized) { - await this.initialize(query.length); - } - const queryStr = this.buildSearchQuery(query, k, filter); - - const queryResultSet = await this.client.query({ query: queryStr }); - const queryResult: { - data: { text: string; metadata: object; dist: number }[]; - } = await queryResultSet.json(); - - const result: [Document, number][] = queryResult.data.map((item) => [ - new Document({ pageContent: item.text, metadata: item.metadata }), - item.dist, - ]); - - return result; - } - - /** - * Static method to create an instance of MyScaleStore from texts. - * @param texts The texts to use. - * @param metadatas The metadata associated with the texts. - * @param embeddings The embeddings to use. - * @param args The arguments for the MyScaleStore. - * @returns Promise that resolves with a new instance of MyScaleStore. - */ - static async fromTexts( - texts: string[], - metadatas: object | object[], - embeddings: Embeddings, - args: MyScaleLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return MyScaleStore.fromDocuments(docs, embeddings, args); - } - - /** - * Static method to create an instance of MyScaleStore from documents. - * @param docs The documents to use. - * @param embeddings The embeddings to use. - * @param args The arguments for the MyScaleStore. - * @returns Promise that resolves with a new instance of MyScaleStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args: MyScaleLibArgs - ): Promise { - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create an instance of MyScaleStore from an existing - * index. - * @param embeddings The embeddings to use. - * @param args The arguments for the MyScaleStore. - * @returns Promise that resolves with a new instance of MyScaleStore. - */ - static async fromExistingIndex( - embeddings: Embeddings, - args: MyScaleLibArgs - ): Promise { - const instance = new this(embeddings, args); - - await instance.initialize(); - return instance; - } - - /** - * Method to initialize the MyScale database. - * @param dimension Optional dimension of the vectors. - * @returns Promise that resolves when the database has been initialized. - */ - private async initialize(dimension?: number): Promise { - const dim = dimension ?? (await this.embeddings.embedQuery("test")).length; - - let indexParamStr = ""; - for (const [key, value] of Object.entries(this.indexParam)) { - indexParamStr += `, '${key}=${value}'`; - } - - const query = ` - CREATE TABLE IF NOT EXISTS ${this.database}.${this.table}( - ${this.columnMap.id} String, - ${this.columnMap.text} String, - ${this.columnMap.vector} Array(Float32), - ${this.columnMap.metadata} JSON, - CONSTRAINT cons_vec_len CHECK length(${this.columnMap.vector}) = ${dim}, - VECTOR INDEX vidx ${this.columnMap.vector} TYPE ${this.indexType}('metric_type=${this.metric}'${indexParamStr}) - ) ENGINE = MergeTree ORDER BY ${this.columnMap.id} - `; - - await this.client.exec({ query: "SET allow_experimental_object_type=1" }); - await this.client.exec({ - query: "SET output_format_json_named_tuples_as_objects = 1", - }); - await this.client.exec({ query }); - this.isInitialized = true; - } - - /** - * Method to build an SQL query for inserting vectors and documents into - * the MyScale database. - * @param vectors The vectors to insert. - * @param documents The documents to insert. - * @returns The SQL query string. - */ - private buildInsertQuery(vectors: number[][], documents: Document[]): string { - const columnsStr = Object.values(this.columnMap).join(", "); - - const data: string[] = []; - for (let i = 0; i < vectors.length; i += 1) { - const vector = vectors[i]; - const document = documents[i]; - const item = [ - `'${uuid.v4()}'`, - `'${this.escapeString(document.pageContent)}'`, - `[${vector}]`, - `'${JSON.stringify(document.metadata)}'`, - ].join(", "); - data.push(`(${item})`); - } - const dataStr = data.join(", "); - - return ` - INSERT INTO TABLE - ${this.database}.${this.table}(${columnsStr}) - VALUES - ${dataStr} - `; - } - - private escapeString(str: string): string { - return str.replace(/\\/g, "\\\\").replace(/'/g, "\\'"); - } - - /** - * Method to build an SQL query for searching for similar vectors in the - * MyScale database. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @param filter Optional filter for the search results. - * @returns The SQL query string. - */ - private buildSearchQuery( - query: number[], - k: number, - filter?: MyScaleFilter - ): string { - const order = this.metric === "IP" ? "DESC" : "ASC"; - - const whereStr = filter ? `PREWHERE ${filter.whereStr}` : ""; - return ` - SELECT ${this.columnMap.text} AS text, ${this.columnMap.metadata} AS metadata, dist - FROM ${this.database}.${this.table} - ${whereStr} - ORDER BY distance(${this.columnMap.vector}, [${query}]) AS dist ${order} - LIMIT ${k} - `; - } -} +export * from "@langchain/community/vectorstores/myscale"; diff --git a/langchain/src/vectorstores/neo4j_vector.ts b/langchain/src/vectorstores/neo4j_vector.ts index e7c496a6e9ed..0338e358a89b 100644 --- a/langchain/src/vectorstores/neo4j_vector.ts +++ b/langchain/src/vectorstores/neo4j_vector.ts @@ -1,731 +1 @@ -import neo4j from "neo4j-driver"; -import * as uuid from "uuid"; -import { Document } from "../document.js"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; - -export type SearchType = "vector" | "hybrid"; - -export type DistanceStrategy = "euclidean" | "cosine"; - -interface Neo4jVectorStoreArgs { - url: string; - username: string; - password: string; - database?: string; - preDeleteCollection?: boolean; - textNodeProperty?: string; - textNodeProperties?: string[]; - embeddingNodeProperty?: string; - keywordIndexName?: string; - indexName?: string; - searchType?: SearchType; - retrievalQuery?: string; - nodeLabel?: string; - createIdIndex?: boolean; -} - -const DEFAULT_SEARCH_TYPE = "vector"; -const DEFAULT_DISTANCE_STRATEGY = "cosine"; - -/** - * @security *Security note*: Make sure that the database connection uses credentials - * that are narrowly-scoped to only include necessary permissions. - * Failure to do so may result in data corruption or loss, since the calling - * code may attempt commands that would result in deletion, mutation - * of data if appropriately prompted or reading sensitive data if such - * data is present in the database. - * The best way to guard against such negative outcomes is to (as appropriate) - * limit the permissions granted to the credentials used with this tool. - * For example, creating read only users for the database is a good way to - * ensure that the calling code cannot mutate or delete data. - * - * @link See https://js.langchain.com/docs/security for more information. - */ -export class Neo4jVectorStore extends VectorStore { - private driver: neo4j.Driver; - - private database: string; - - private preDeleteCollection: boolean; - - private nodeLabel: string; - - private embeddingNodeProperty: string; - - private embeddingDimension: number; - - private textNodeProperty: string; - - private keywordIndexName: string; - - private indexName: string; - - private retrievalQuery: string; - - private searchType: SearchType; - - private distanceStrategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY; - - _vectorstoreType(): string { - return "neo4jvector"; - } - - constructor(embeddings: Embeddings, config: Neo4jVectorStoreArgs) { - super(embeddings, config); - } - - static async initialize( - embeddings: Embeddings, - config: Neo4jVectorStoreArgs - ) { - const store = new Neo4jVectorStore(embeddings, config); - await store._initializeDriver(config); - await store._verifyConnectivity(); - - const { - preDeleteCollection = false, - nodeLabel = "Chunk", - textNodeProperty = "text", - embeddingNodeProperty = "embedding", - keywordIndexName = "keyword", - indexName = "vector", - retrievalQuery = "", - searchType = DEFAULT_SEARCH_TYPE, - } = config; - - store.embeddingDimension = (await embeddings.embedQuery("foo")).length; - store.preDeleteCollection = preDeleteCollection; - store.nodeLabel = nodeLabel; - store.textNodeProperty = textNodeProperty; - store.embeddingNodeProperty = embeddingNodeProperty; - store.keywordIndexName = keywordIndexName; - store.indexName = indexName; - store.retrievalQuery = retrievalQuery; - store.searchType = searchType; - - if (store.preDeleteCollection) { - await store._dropIndex(); - } - - return store; - } - - async _initializeDriver({ - url, - username, - password, - database = "neo4j", - }: Neo4jVectorStoreArgs) { - try { - this.driver = neo4j.driver(url, neo4j.auth.basic(username, password)); - this.database = database; - } catch (error) { - throw new Error( - "Could not create a Neo4j driver instance. Please check the connection details." - ); - } - } - - async _verifyConnectivity() { - await this.driver.verifyAuthentication(); - } - - async close() { - await this.driver.close(); - } - - async _dropIndex() { - try { - await this.query(` - MATCH (n:\`${this.nodeLabel}\`) - CALL { - WITH n - DETACH DELETE n - } - IN TRANSACTIONS OF 10000 ROWS; - `); - await this.query(`DROP INDEX ${this.indexName}`); - } catch (error) { - console.error("An error occurred while dropping the index:", error); - } - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - async query(query: string, params: any = {}): Promise { - const session = this.driver.session({ database: this.database }); - const result = await session.run(query, params); - return toObjects(result.records); - } - - static async fromTexts( - texts: string[], - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadatas: any, - embeddings: Embeddings, - config: Neo4jVectorStoreArgs - ): Promise { - const docs = []; - - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return Neo4jVectorStore.fromDocuments(docs, embeddings, config); - } - - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - config: Neo4jVectorStoreArgs - ): Promise { - const { - searchType = DEFAULT_SEARCH_TYPE, - createIdIndex = true, - textNodeProperties = [], - } = config; - - const store = await this.initialize(embeddings, config); - - const embeddingDimension = await store.retrieveExistingIndex(); - - if (!embeddingDimension) { - await store.createNewIndex(); - } else if (store.embeddingDimension !== embeddingDimension) { - throw new Error( - `Index with name "${store.indexName}" already exists. The provided embedding function and vector index dimensions do not match. - Embedding function dimension: ${store.embeddingDimension} - Vector index dimension: ${embeddingDimension}` - ); - } - - if (searchType === "hybrid") { - const ftsNodeLabel = await store.retrieveExistingFtsIndex(); - - if (!ftsNodeLabel) { - await store.createNewKeywordIndex(textNodeProperties); - } else { - if (ftsNodeLabel !== store.nodeLabel) { - throw Error( - "Vector and keyword index don't index the same node label" - ); - } - } - } - - if (createIdIndex) { - await store.query( - `CREATE CONSTRAINT IF NOT EXISTS FOR (n:${store.nodeLabel}) REQUIRE n.id IS UNIQUE;` - ); - } - - await store.addDocuments(docs); - - return store; - } - - static async fromExistingIndex( - embeddings: Embeddings, - config: Neo4jVectorStoreArgs - ) { - const { searchType = DEFAULT_SEARCH_TYPE, keywordIndexName = "keyword" } = - config; - - if (searchType === "hybrid" && !keywordIndexName) { - throw Error( - "keyword_index name has to be specified when using hybrid search option" - ); - } - - const store = await this.initialize(embeddings, config); - const embeddingDimension = await store.retrieveExistingIndex(); - - if (!embeddingDimension) { - throw Error( - "The specified vector index name does not exist. Make sure to check if you spelled it correctly" - ); - } - - if (store.embeddingDimension !== embeddingDimension) { - throw new Error( - `The provided embedding function and vector index dimensions do not match. - Embedding function dimension: ${store.embeddingDimension} - Vector index dimension: ${embeddingDimension}` - ); - } - - if (searchType === "hybrid") { - const ftsNodeLabel = await store.retrieveExistingFtsIndex(); - - if (!ftsNodeLabel) { - throw Error( - "The specified keyword index name does not exist. Make sure to check if you spelled it correctly" - ); - } else { - if (ftsNodeLabel !== store.nodeLabel) { - throw Error( - "Vector and keyword index don't index the same node label" - ); - } - } - } - - return store; - } - - static async fromExistingGraph( - embeddings: Embeddings, - config: Neo4jVectorStoreArgs - ) { - const { - textNodeProperties = [], - embeddingNodeProperty, - searchType = DEFAULT_SEARCH_TYPE, - retrievalQuery = "", - nodeLabel, - } = config; - - let _retrievalQuery = retrievalQuery; - - if (textNodeProperties.length === 0) { - throw Error( - "Parameter `text_node_properties` must not be an empty array" - ); - } - - if (!retrievalQuery) { - _retrievalQuery = ` - RETURN reduce(str='', k IN ${JSON.stringify(textNodeProperties)} | - str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text, - node {.*, \`${embeddingNodeProperty}\`: Null, id: Null, ${textNodeProperties - .map((prop) => `\`${prop}\`: Null`) - .join(", ")} } AS metadata, score - `; - } - - const store = await this.initialize(embeddings, { - ...config, - retrievalQuery: _retrievalQuery, - }); - - const embeddingDimension = await store.retrieveExistingIndex(); - - if (!embeddingDimension) { - await store.createNewIndex(); - } else if (store.embeddingDimension !== embeddingDimension) { - throw new Error( - `Index with name ${store.indexName} already exists. The provided embedding function and vector index dimensions do not match.\nEmbedding function dimension: ${store.embeddingDimension}\nVector index dimension: ${embeddingDimension}` - ); - } - - if (searchType === "hybrid") { - const ftsNodeLabel = await store.retrieveExistingFtsIndex( - textNodeProperties - ); - - if (!ftsNodeLabel) { - await store.createNewKeywordIndex(textNodeProperties); - } else { - if (ftsNodeLabel !== store.nodeLabel) { - throw Error( - "Vector and keyword index don't index the same node label" - ); - } - } - } - - // eslint-disable-next-line no-constant-condition - while (true) { - const fetchQuery = ` - MATCH (n:\`${nodeLabel}\`) - WHERE n.${embeddingNodeProperty} IS null - AND any(k in $props WHERE n[k] IS NOT null) - RETURN elementId(n) AS id, reduce(str='', k IN $props | - str + '\\n' + k + ':' + coalesce(n[k], '')) AS text - LIMIT 1000 - `; - - const data = await store.query(fetchQuery, { props: textNodeProperties }); - - if (!data) { - continue; - } - - const textEmbeddings = await embeddings.embedDocuments( - data.map((el) => el.text) - ); - - const params = { - data: data.map((el, index) => ({ - id: el.id, - embedding: textEmbeddings[index], - })), - }; - - await store.query( - ` - UNWIND $data AS row - MATCH (n:\`${nodeLabel}\`) - WHERE elementId(n) = row.id - CALL db.create.setVectorProperty(n, '${embeddingNodeProperty}', row.embedding) - YIELD node RETURN count(*) - `, - params - ); - - if (data.length < 1000) { - break; - } - } - - return store; - } - - async createNewIndex(): Promise { - const indexQuery = ` - CALL db.index.vector.createNodeIndex( - $index_name, - $node_label, - $embedding_node_property, - toInteger($embedding_dimension), - $similarity_metric - ) - `; - - const parameters = { - index_name: this.indexName, - node_label: this.nodeLabel, - embedding_node_property: this.embeddingNodeProperty, - embedding_dimension: this.embeddingDimension, - similarity_metric: this.distanceStrategy, - }; - - await this.query(indexQuery, parameters); - } - - async retrieveExistingIndex() { - let indexInformation = await this.query( - ` - SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options - WHERE type = 'VECTOR' AND (name = $index_name - OR (labelsOrTypes[0] = $node_label AND - properties[0] = $embedding_node_property)) - RETURN name, labelsOrTypes, properties, options - `, - { - index_name: this.indexName, - node_label: this.nodeLabel, - embedding_node_property: this.embeddingNodeProperty, - } - ); - - if (indexInformation) { - indexInformation = this.sortByIndexName(indexInformation, this.indexName); - - try { - const [index] = indexInformation; - const [labelOrType] = index.labelsOrTypes; - const [property] = index.properties; - - this.indexName = index.name; - this.nodeLabel = labelOrType; - this.embeddingNodeProperty = property; - - const embeddingDimension = - index.options.indexConfig["vector.dimensions"]; - return Number(embeddingDimension); - } catch (error) { - return null; - } - } - - return null; - } - - async retrieveExistingFtsIndex( - textNodeProperties: string[] = [] - ): Promise { - const indexInformation = await this.query( - ` - SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options - WHERE type = 'FULLTEXT' AND (name = $keyword_index_name - OR (labelsOrTypes = [$node_label] AND - properties = $text_node_property)) - RETURN name, labelsOrTypes, properties, options - `, - { - keyword_index_name: this.keywordIndexName, - node_label: this.nodeLabel, - text_node_property: - textNodeProperties.length > 0 - ? textNodeProperties - : [this.textNodeProperty], - } - ); - - if (indexInformation) { - // Sort the index information by index name - const sortedIndexInformation = this.sortByIndexName( - indexInformation, - this.indexName - ); - - try { - const [index] = sortedIndexInformation; - const [labelOrType] = index.labelsOrTypes; - const [property] = index.properties; - - this.keywordIndexName = index.name; - this.textNodeProperty = property; - this.nodeLabel = labelOrType; - - return labelOrType; - } catch (error) { - return null; - } - } - - return null; - } - - async createNewKeywordIndex( - textNodeProperties: string[] = [] - ): Promise { - const nodeProps = - textNodeProperties.length > 0 - ? textNodeProperties - : [this.textNodeProperty]; - - // Construct the Cypher query to create a new full text index - const ftsIndexQuery = ` - CREATE FULLTEXT INDEX ${this.keywordIndexName} - FOR (n:\`${this.nodeLabel}\`) ON EACH - [${nodeProps.map((prop) => `n.\`${prop}\``).join(", ")}] - `; - - await this.query(ftsIndexQuery); - } - - sortByIndexName( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - values: Array<{ [key: string]: any }>, - indexName: string - // eslint-disable-next-line @typescript-eslint/no-explicit-any - ): Array<{ [key: string]: any }> { - return values.sort( - (a, b) => - (a.index_name === indexName ? -1 : 0) - - (b.index_name === indexName ? -1 : 0) - ); - } - - async addVectors( - vectors: number[][], - documents: Document[], - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadatas?: Record[], - ids?: string[] - ): Promise { - let _ids = ids; - let _metadatas = metadatas; - - if (!_ids) { - _ids = documents.map(() => uuid.v1()); - } - - if (!metadatas) { - _metadatas = documents.map(() => ({})); - } - - const importQuery = ` - UNWIND $data AS row - CALL { - WITH row - MERGE (c:\`${this.nodeLabel}\` {id: row.id}) - WITH c, row - CALL db.create.setVectorProperty(c, '${this.embeddingNodeProperty}', row.embedding) - YIELD node - SET c.\`${this.textNodeProperty}\` = row.text - SET c += row.metadata - } IN TRANSACTIONS OF 1000 ROWS - `; - - const parameters = { - data: documents.map(({ pageContent, metadata }, index) => ({ - text: pageContent, - metadata: _metadatas ? _metadatas[index] : metadata, - embedding: vectors[index], - id: _ids ? _ids[index] : null, - })), - }; - - await this.query(importQuery, parameters); - - return _ids; - } - - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - async similaritySearch(query: string, k = 4): Promise { - const embedding = await this.embeddings.embedQuery(query); - - const results = await this.similaritySearchVectorWithScore( - embedding, - k, - query - ); - - return results.map((result) => result[0]); - } - - async similaritySearchVectorWithScore( - vector: number[], - k: number, - query: string - ): Promise<[Document, number][]> { - const defaultRetrieval = ` - RETURN node.${this.textNodeProperty} AS text, score, - node {.*, ${this.textNodeProperty}: Null, - ${this.embeddingNodeProperty}: Null, id: Null } AS metadata - `; - - const retrievalQuery = this.retrievalQuery - ? this.retrievalQuery - : defaultRetrieval; - - const readQuery = `${getSearchIndexQuery( - this.searchType - )} ${retrievalQuery}`; - - const parameters = { - index: this.indexName, - k: Number(k), - embedding: vector, - keyword_index: this.keywordIndexName, - query, - }; - const results = await this.query(readQuery, parameters); - - if (results) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const docs: [Document, number][] = results.map((result: any) => [ - new Document({ - pageContent: result.text, - metadata: Object.fromEntries( - Object.entries(result.metadata).filter(([_, v]) => v !== null) - ), - }), - result.score, - ]); - - return docs; - } - - return []; - } -} - -function toObjects(records: neo4j.Record[]) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const recordValues: Record[] = records.map((record) => { - const rObj = record.toObject(); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const out: { [key: string]: any } = {}; - Object.keys(rObj).forEach((key) => { - out[key] = itemIntToString(rObj[key]); - }); - return out; - }); - return recordValues; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function itemIntToString(item: any): any { - if (neo4j.isInt(item)) return item.toString(); - if (Array.isArray(item)) return item.map((ii) => itemIntToString(ii)); - if (["number", "string", "boolean"].indexOf(typeof item) !== -1) return item; - if (item === null) return item; - if (typeof item === "object") return objIntToString(item); -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function objIntToString(obj: any) { - const entry = extractFromNeoObjects(obj); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let newObj: any = null; - if (Array.isArray(entry)) { - newObj = entry.map((item) => itemIntToString(item)); - } else if (entry !== null && typeof entry === "object") { - newObj = {}; - Object.keys(entry).forEach((key) => { - newObj[key] = itemIntToString(entry[key]); - }); - } - return newObj; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function extractFromNeoObjects(obj: any) { - if ( - // eslint-disable-next-line - obj instanceof (neo4j.types.Node as any) || - // eslint-disable-next-line - obj instanceof (neo4j.types.Relationship as any) - ) { - return obj.properties; - // eslint-disable-next-line - } else if (obj instanceof (neo4j.types.Path as any)) { - // eslint-disable-next-line - return [].concat.apply([], extractPathForRows(obj)); - } - return obj; -} - -function extractPathForRows(path: neo4j.Path) { - let { segments } = path; - // Zero length path. No relationship, end === start - if (!Array.isArray(path.segments) || path.segments.length < 1) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - segments = [{ ...path, end: null } as any]; - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return segments.map((segment: any) => - [ - objIntToString(segment.start), - objIntToString(segment.relationship), - objIntToString(segment.end), - ].filter((part) => part !== null) - ); -} - -function getSearchIndexQuery(searchType: SearchType): string { - const typeToQueryMap: { [key in SearchType]: string } = { - vector: - "CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score", - hybrid: ` - CALL { - CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score - RETURN node, score UNION - CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score - WITH collect({node: node, score: score}) AS nodes, max(score) AS max - UNWIND nodes AS n - RETURN n.node AS node, (n.score / max) AS score - } - WITH node, max(score) AS score ORDER BY score DESC LIMIT toInteger($k) - `, - }; - - return typeToQueryMap[searchType]; -} +export * from "@langchain/community/vectorstores/neo4j_vector"; diff --git a/langchain/src/vectorstores/opensearch.ts b/langchain/src/vectorstores/opensearch.ts index cfd9fcd87bbe..1e8e2d6058fd 100644 --- a/langchain/src/vectorstores/opensearch.ts +++ b/langchain/src/vectorstores/opensearch.ts @@ -1,326 +1 @@ -import { Client, RequestParams, errors } from "@opensearch-project/opensearch"; -import * as uuid from "uuid"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { VectorStore } from "./base.js"; - -type OpenSearchEngine = "nmslib" | "hnsw"; -type OpenSearchSpaceType = "l2" | "cosinesimil" | "ip"; - -/** - * Interface defining the options for vector search in OpenSearch. It - * includes the engine type, space type, and parameters for the HNSW - * algorithm. - */ -interface VectorSearchOptions { - readonly engine?: OpenSearchEngine; - readonly spaceType?: OpenSearchSpaceType; - readonly m?: number; - readonly efConstruction?: number; - readonly efSearch?: number; -} - -/** - * Interface defining the arguments required to create an instance of the - * OpenSearchVectorStore class. It includes the OpenSearch client, index - * name, and vector search options. - */ -export interface OpenSearchClientArgs { - readonly client: Client; - readonly indexName?: string; - - readonly vectorSearchOptions?: VectorSearchOptions; -} - -/** - * Type alias for an object. It's used to define filters for OpenSearch - * queries. - */ -type OpenSearchFilter = object; - -/** - * Class that provides a wrapper around the OpenSearch service for vector - * search. It provides methods for adding documents and vectors to the - * OpenSearch index, searching for similar vectors, and managing the - * OpenSearch index. - */ -export class OpenSearchVectorStore extends VectorStore { - declare FilterType: OpenSearchFilter; - - private readonly client: Client; - - private readonly indexName: string; - - private readonly engine: OpenSearchEngine; - - private readonly spaceType: OpenSearchSpaceType; - - private readonly efConstruction: number; - - private readonly efSearch: number; - - private readonly m: number; - - _vectorstoreType(): string { - return "opensearch"; - } - - constructor(embeddings: Embeddings, args: OpenSearchClientArgs) { - super(embeddings, args); - - this.spaceType = args.vectorSearchOptions?.spaceType ?? "l2"; - this.engine = args.vectorSearchOptions?.engine ?? "nmslib"; - this.m = args.vectorSearchOptions?.m ?? 16; - this.efConstruction = args.vectorSearchOptions?.efConstruction ?? 512; - this.efSearch = args.vectorSearchOptions?.efSearch ?? 512; - - this.client = args.client; - this.indexName = args.indexName ?? "documents"; - } - - /** - * Method to add documents to the OpenSearch index. It first converts the - * documents to vectors using the embeddings, then adds the vectors to the - * index. - * @param documents The documents to be added to the OpenSearch index. - * @returns Promise resolving to void. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method to add vectors to the OpenSearch index. It ensures the index - * exists, then adds the vectors and associated documents to the index. - * @param vectors The vectors to be added to the OpenSearch index. - * @param documents The documents associated with the vectors. - * @param options Optional parameter that can contain the IDs for the documents. - * @returns Promise resolving to void. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ): Promise { - await this.ensureIndexExists( - vectors[0].length, - this.engine, - this.spaceType, - this.efSearch, - this.efConstruction, - this.m - ); - const documentIds = - options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v4()); - const operations = vectors.flatMap((embedding, idx) => [ - { - index: { - _index: this.indexName, - _id: documentIds[idx], - }, - }, - { - embedding, - metadata: documents[idx].metadata, - text: documents[idx].pageContent, - }, - ]); - await this.client.bulk({ body: operations }); - await this.client.indices.refresh({ index: this.indexName }); - } - - /** - * Method to perform a similarity search on the OpenSearch index using a - * query vector. It returns the k most similar documents and their scores. - * @param query The query vector. - * @param k The number of similar documents to return. - * @param filter Optional filter for the OpenSearch query. - * @returns Promise resolving to an array of tuples, each containing a Document and its score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: OpenSearchFilter | undefined - ): Promise<[Document, number][]> { - const search: RequestParams.Search = { - index: this.indexName, - body: { - query: { - bool: { - filter: { bool: { must: this.buildMetadataTerms(filter) } }, - must: [ - { - knn: { - embedding: { vector: query, k }, - }, - }, - ], - }, - }, - size: k, - }, - }; - - const { body } = await this.client.search(search); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return body.hits.hits.map((hit: any) => [ - new Document({ - pageContent: hit._source.text, - metadata: hit._source.metadata, - }), - hit._score, - ]); - } - - /** - * Static method to create a new OpenSearchVectorStore from an array of - * texts, their metadata, embeddings, and OpenSearch client arguments. - * @param texts The texts to be converted into documents and added to the OpenSearch index. - * @param metadatas The metadata associated with the texts. Can be an array of objects or a single object. - * @param embeddings The embeddings used to convert the texts into vectors. - * @param args The OpenSearch client arguments. - * @returns Promise resolving to a new instance of OpenSearchVectorStore. - */ - static fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - args: OpenSearchClientArgs - ): Promise { - const documents = texts.map((text, idx) => { - const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; - return new Document({ pageContent: text, metadata }); - }); - - return OpenSearchVectorStore.fromDocuments(documents, embeddings, args); - } - - /** - * Static method to create a new OpenSearchVectorStore from an array of - * Documents, embeddings, and OpenSearch client arguments. - * @param docs The documents to be added to the OpenSearch index. - * @param embeddings The embeddings used to convert the documents into vectors. - * @param dbConfig The OpenSearch client arguments. - * @returns Promise resolving to a new instance of OpenSearchVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: OpenSearchClientArgs - ): Promise { - const store = new OpenSearchVectorStore(embeddings, dbConfig); - await store.addDocuments(docs).then(() => store); - return store; - } - - /** - * Static method to create a new OpenSearchVectorStore from an existing - * OpenSearch index, embeddings, and OpenSearch client arguments. - * @param embeddings The embeddings used to convert the documents into vectors. - * @param dbConfig The OpenSearch client arguments. - * @returns Promise resolving to a new instance of OpenSearchVectorStore. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: OpenSearchClientArgs - ): Promise { - const store = new OpenSearchVectorStore(embeddings, dbConfig); - await store.client.cat.indices({ index: store.indexName }); - return store; - } - - private async ensureIndexExists( - dimension: number, - engine = "nmslib", - spaceType = "l2", - efSearch = 512, - efConstruction = 512, - m = 16 - ): Promise { - const body = { - settings: { - index: { - number_of_shards: 5, - number_of_replicas: 1, - knn: true, - "knn.algo_param.ef_search": efSearch, - }, - }, - mappings: { - dynamic_templates: [ - { - // map all metadata properties to be keyword - "metadata.*": { - match_mapping_type: "*", - mapping: { type: "keyword" }, - }, - }, - ], - properties: { - text: { type: "text" }, - metadata: { type: "object" }, - embedding: { - type: "knn_vector", - dimension, - method: { - name: "hnsw", - engine, - space_type: spaceType, - parameters: { ef_construction: efConstruction, m }, - }, - }, - }, - }, - }; - - const indexExists = await this.doesIndexExist(); - if (indexExists) return; - - await this.client.indices.create({ index: this.indexName, body }); - } - - private buildMetadataTerms( - filter?: OpenSearchFilter - ): { [key: string]: Record }[] { - if (filter == null) return []; - const result = []; - for (const [key, value] of Object.entries(filter)) { - const aggregatorKey = Array.isArray(value) ? "terms" : "term"; - result.push({ [aggregatorKey]: { [`metadata.${key}`]: value } }); - } - return result; - } - - /** - * Method to check if the OpenSearch index exists. - * @returns Promise resolving to a boolean indicating whether the index exists. - */ - async doesIndexExist(): Promise { - try { - await this.client.cat.indices({ index: this.indexName }); - return true; - } catch (err: unknown) { - // eslint-disable-next-line no-instanceof/no-instanceof - if (err instanceof errors.ResponseError && err.statusCode === 404) { - return false; - } - throw err; - } - } - - /** - * Method to delete the OpenSearch index if it exists. - * @returns Promise resolving to void. - */ - async deleteIfExists(): Promise { - const indexExists = await this.doesIndexExist(); - if (!indexExists) return; - - await this.client.indices.delete({ index: this.indexName }); - } -} +export * from "@langchain/community/vectorstores/opensearch"; diff --git a/langchain/src/vectorstores/pgvector.ts b/langchain/src/vectorstores/pgvector.ts index 023d64966153..026732814617 100644 --- a/langchain/src/vectorstores/pgvector.ts +++ b/langchain/src/vectorstores/pgvector.ts @@ -1,440 +1 @@ -import pg, { type Pool, type PoolClient, type PoolConfig } from "pg"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -type Metadata = Record; - -/** - * Interface that defines the arguments required to create a - * `PGVectorStore` instance. It includes Postgres connection options, - * table name, filter, and verbosity level. - */ -export interface PGVectorStoreArgs { - postgresConnectionOptions: PoolConfig; - tableName: string; - collectionTableName?: string; - collectionName?: string; - collectionMetadata?: Metadata | null; - columns?: { - idColumnName?: string; - vectorColumnName?: string; - contentColumnName?: string; - metadataColumnName?: string; - }; - filter?: Metadata; - verbose?: boolean; - /** - * The amount of documents to chunk by when - * adding vectors. - * @default 500 - */ - chunkSize?: number; -} - -/** - * Class that provides an interface to a Postgres vector database. It - * extends the `VectorStore` base class and implements methods for adding - * documents and vectors, performing similarity searches, and ensuring the - * existence of a table in the database. - */ -export class PGVectorStore extends VectorStore { - declare FilterType: Metadata; - - tableName: string; - - collectionTableName?: string; - - collectionName = "langchain"; - - collectionMetadata: Metadata | null; - - idColumnName: string; - - vectorColumnName: string; - - contentColumnName: string; - - metadataColumnName: string; - - filter?: Metadata; - - _verbose?: boolean; - - pool: Pool; - - client?: PoolClient; - - chunkSize = 500; - - _vectorstoreType(): string { - return "pgvector"; - } - - private constructor(embeddings: Embeddings, config: PGVectorStoreArgs) { - super(embeddings, config); - this.tableName = config.tableName; - this.collectionTableName = config.collectionTableName; - this.collectionName = config.collectionName ?? "langchain"; - this.collectionMetadata = config.collectionMetadata ?? null; - this.filter = config.filter; - - this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding"; - this.contentColumnName = config.columns?.contentColumnName ?? "text"; - this.idColumnName = config.columns?.idColumnName ?? "id"; - this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata"; - - const pool = new pg.Pool(config.postgresConnectionOptions); - this.pool = pool; - this.chunkSize = config.chunkSize ?? 500; - - this._verbose = - getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? - !!config.verbose; - } - - /** - * Static method to create a new `PGVectorStore` instance from a - * connection. It creates a table if one does not exist, and calls - * `connect` to return a new instance of `PGVectorStore`. - * - * @param embeddings - Embeddings instance. - * @param fields - `PGVectorStoreArgs` instance. - * @returns A new instance of `PGVectorStore`. - */ - static async initialize( - embeddings: Embeddings, - config: PGVectorStoreArgs - ): Promise { - const postgresqlVectorStore = new PGVectorStore(embeddings, config); - - await postgresqlVectorStore._initializeClient(); - await postgresqlVectorStore.ensureTableInDatabase(); - if (postgresqlVectorStore.collectionTableName) { - await postgresqlVectorStore.ensureCollectionTableInDatabase(); - } - - return postgresqlVectorStore; - } - - protected async _initializeClient() { - this.client = await this.pool.connect(); - } - - /** - * Method to add documents to the vector store. It converts the documents into - * vectors, and adds them to the store. - * - * @param documents - Array of `Document` instances. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Inserts a row for the collectionName provided at initialization if it does not - * exist and returns the collectionId. - * - * @returns The collectionId for the given collectionName. - */ - async getOrCreateCollection(): Promise { - const queryString = ` - SELECT uuid from ${this.collectionTableName} - WHERE name = $1; - `; - const queryResult = await this.pool.query(queryString, [ - this.collectionName, - ]); - let collectionId = queryResult.rows[0]?.uuid; - - if (!collectionId) { - const insertString = ` - INSERT INTO ${this.collectionTableName}( - uuid, - name, - cmetadata - ) - VALUES ( - uuid_generate_v4(), - $1, - $2 - ) - RETURNING uuid; - `; - const insertResult = await this.pool.query(insertString, [ - this.collectionName, - this.collectionMetadata, - ]); - collectionId = insertResult.rows[0]?.uuid; - } - - return collectionId; - } - - /** - * Generates the SQL placeholders for a specific row at the provided index. - * - * @param index - The index of the row for which placeholders need to be generated. - * @param numOfColumns - The number of columns we are inserting data into. - * @returns The SQL placeholders for the row values. - */ - private generatePlaceholderForRowAt( - index: number, - numOfColumns: number - ): string { - const placeholders = []; - for (let i = 0; i < numOfColumns; i += 1) { - placeholders.push(`$${index * numOfColumns + i + 1}`); - } - return `(${placeholders.join(", ")})`; - } - - /** - * Constructs the SQL query for inserting rows into the specified table. - * - * @param rows - The rows of data to be inserted, consisting of values and records. - * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. - * @returns The complete SQL INSERT INTO query string. - */ - private async buildInsertQuery(rows: (string | Record)[][]) { - let collectionId; - if (this.collectionTableName) { - collectionId = await this.getOrCreateCollection(); - } - - const columns = [ - this.contentColumnName, - this.vectorColumnName, - this.metadataColumnName, - ]; - - if (collectionId) { - columns.push("collection_id"); - } - - const valuesPlaceholders = rows - .map((_, j) => this.generatePlaceholderForRowAt(j, columns.length)) - .join(", "); - - const text = ` - INSERT INTO ${this.tableName}( - ${columns} - ) - VALUES ${valuesPlaceholders} - `; - return text; - } - - /** - * Method to add vectors to the vector store. It converts the vectors into - * rows and inserts them into the database. - * - * @param vectors - Array of vectors. - * @param documents - Array of `Document` instances. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - const rows = []; - let collectionId; - if (this.collectionTableName) { - collectionId = await this.getOrCreateCollection(); - } - - for (let i = 0; i < vectors.length; i += 1) { - const values = []; - const embedding = vectors[i]; - const embeddingString = `[${embedding.join(",")}]`; - values.push( - documents[i].pageContent, - embeddingString, - documents[i].metadata - ); - if (collectionId) { - values.push(collectionId); - } - rows.push(values); - } - - for (let i = 0; i < rows.length; i += this.chunkSize) { - const chunk = rows.slice(i, i + this.chunkSize); - const insertQuery = await this.buildInsertQuery(chunk); - const flatValues = chunk.flat(); - try { - await this.pool.query(insertQuery, flatValues); - } catch (e) { - console.error(e); - throw new Error(`Error inserting: ${(e as Error).message}`); - } - } - } - - /** - * Method to perform a similarity search in the vector store. It returns - * the `k` most similar documents to the query vector, along with their - * similarity scores. - * - * @param query - Query vector. - * @param k - Number of most similar documents to return. - * @param filter - Optional filter to apply to the search. - * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - const embeddingString = `[${query.join(",")}]`; - const _filter = filter ?? "{}"; - let collectionId; - if (this.collectionTableName) { - collectionId = await this.getOrCreateCollection(); - } - - const parameters = [embeddingString, _filter, k]; - if (collectionId) { - parameters.push(collectionId); - } - - const queryString = ` - SELECT *, ${this.vectorColumnName} <=> $1 as "_distance" - FROM ${this.tableName} - WHERE ${this.metadataColumnName}::jsonb @> $2 - ${collectionId ? "AND collection_id = $4" : ""} - ORDER BY "_distance" ASC - LIMIT $3; - `; - - const documents = (await this.pool.query(queryString, parameters)).rows; - - const results = [] as [Document, number][]; - for (const doc of documents) { - if (doc._distance != null && doc[this.contentColumnName] != null) { - const document = new Document({ - pageContent: doc[this.contentColumnName], - metadata: doc[this.metadataColumnName], - }); - results.push([document, doc._distance]); - } - } - return results; - } - - /** - * Method to ensure the existence of the table in the database. It creates - * the table if it does not already exist. - * - * @returns Promise that resolves when the table has been ensured. - */ - async ensureTableInDatabase(): Promise { - await this.pool.query("CREATE EXTENSION IF NOT EXISTS vector;"); - await this.pool.query('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'); - - await this.pool.query(` - CREATE TABLE IF NOT EXISTS ${this.tableName} ( - "${this.idColumnName}" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, - "${this.contentColumnName}" text, - "${this.metadataColumnName}" jsonb, - "${this.vectorColumnName}" vector - ); - `); - } - - /** - * Method to ensure the existence of the collection table in the database. - * It creates the table if it does not already exist. - * - * @returns Promise that resolves when the collection table has been ensured. - */ - async ensureCollectionTableInDatabase(): Promise { - try { - await this.pool.query(` - CREATE TABLE IF NOT EXISTS ${this.collectionTableName} ( - uuid uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, - name character varying, - cmetadata jsonb - ); - - ALTER TABLE ${this.tableName} - ADD COLUMN collection_id uuid; - - ALTER TABLE ${this.tableName} - ADD CONSTRAINT ${this.tableName}_collection_id_fkey - FOREIGN KEY (collection_id) - REFERENCES ${this.collectionTableName}(uuid) - ON DELETE CASCADE; - `); - } catch (e) { - if (!(e as Error).message.includes("already exists")) { - console.error(e); - throw new Error(`Error adding column: ${(e as Error).message}`); - } - } - } - - /** - * Static method to create a new `PGVectorStore` instance from an - * array of texts and their metadata. It converts the texts into - * `Document` instances and adds them to the store. - * - * @param texts - Array of texts. - * @param metadatas - Array of metadata objects or a single metadata object. - * @param embeddings - Embeddings instance. - * @param dbConfig - `PGVectorStoreArgs` instance. - * @returns Promise that resolves with a new instance of `PGVectorStore`. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: PGVectorStoreArgs - ): Promise { - const docs = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return PGVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a new `PGVectorStore` instance from an - * array of `Document` instances. It adds the documents to the store. - * - * @param docs - Array of `Document` instances. - * @param embeddings - Embeddings instance. - * @param dbConfig - `PGVectorStoreArgs` instance. - * @returns Promise that resolves with a new instance of `PGVectorStore`. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: PGVectorStoreArgs - ): Promise { - const instance = await PGVectorStore.initialize(embeddings, dbConfig); - await instance.addDocuments(docs); - - return instance; - } - - /** - * Closes all the clients in the pool and terminates the pool. - * - * @returns Promise that resolves when all clients are closed and the pool is terminated. - */ - async end(): Promise { - this.client?.release(); - return this.pool.end(); - } -} +export * from "@langchain/community/vectorstores/pgvector"; diff --git a/langchain/src/vectorstores/pinecone.ts b/langchain/src/vectorstores/pinecone.ts index d4978f7a9289..667b9e8a8860 100644 --- a/langchain/src/vectorstores/pinecone.ts +++ b/langchain/src/vectorstores/pinecone.ts @@ -1,360 +1 @@ -/* eslint-disable no-process-env */ -import * as uuid from "uuid"; -import flatten from "flat"; - -import { - RecordMetadata, - PineconeRecord, - Index as PineconeIndex, -} from "@pinecone-database/pinecone"; - -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; -import { maximalMarginalRelevance } from "../util/math.js"; -import { chunkArray } from "../util/chunk.js"; - -// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any -type PineconeMetadata = Record; - -export interface PineconeLibArgs extends AsyncCallerParams { - pineconeIndex: PineconeIndex; - textKey?: string; - namespace?: string; - filter?: PineconeMetadata; -} - -/** - * Type that defines the parameters for the delete operation in the - * PineconeStore class. It includes ids, filter, deleteAll flag, and namespace. - */ -export type PineconeDeleteParams = { - ids?: string[]; - deleteAll?: boolean; - filter?: object; - namespace?: string; -}; - -/** - * Class that extends the VectorStore class and provides methods to - * interact with the Pinecone vector database. - */ -export class PineconeStore extends VectorStore { - declare FilterType: PineconeMetadata; - - textKey: string; - - namespace?: string; - - pineconeIndex: PineconeIndex; - - filter?: PineconeMetadata; - - caller: AsyncCaller; - - _vectorstoreType(): string { - return "pinecone"; - } - - constructor(embeddings: Embeddings, args: PineconeLibArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - const { namespace, pineconeIndex, textKey, filter, ...asyncCallerArgs } = - args; - this.namespace = namespace; - this.pineconeIndex = pineconeIndex; - this.textKey = textKey ?? "text"; - this.filter = filter; - this.caller = new AsyncCaller(asyncCallerArgs); - } - - /** - * Method that adds documents to the Pinecone database. - * @param documents Array of documents to add to the Pinecone database. - * @param options Optional ids for the documents. - * @returns Promise that resolves with the ids of the added documents. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] } | string[] - ) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method that adds vectors to the Pinecone database. - * @param vectors Array of vectors to add to the Pinecone database. - * @param documents Array of documents associated with the vectors. - * @param options Optional ids for the vectors. - * @returns Promise that resolves with the ids of the added vectors. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } | string[] - ) { - const ids = Array.isArray(options) ? options : options?.ids; - const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; - const pineconeVectors = vectors.map((values, idx) => { - // Pinecone doesn't support nested objects, so we flatten them - const documentMetadata = { ...documents[idx].metadata }; - // preserve string arrays which are allowed - const stringArrays: Record = {}; - for (const key of Object.keys(documentMetadata)) { - if ( - Array.isArray(documentMetadata[key]) && - // eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any - documentMetadata[key].every((el: any) => typeof el === "string") - ) { - stringArrays[key] = documentMetadata[key]; - delete documentMetadata[key]; - } - } - const metadata: { - [key: string]: string | number | boolean | string[] | null; - } = { - ...flatten(documentMetadata), - ...stringArrays, - [this.textKey]: documents[idx].pageContent, - }; - // Pinecone doesn't support null values, so we remove them - for (const key of Object.keys(metadata)) { - if (metadata[key] == null) { - delete metadata[key]; - } else if ( - typeof metadata[key] === "object" && - Object.keys(metadata[key] as unknown as object).length === 0 - ) { - delete metadata[key]; - } - } - - return { - id: documentIds[idx], - metadata, - values, - } as PineconeRecord; - }); - - const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); - // Pinecone recommends a limit of 100 vectors per upsert request - const chunkSize = 100; - const chunkedVectors = chunkArray(pineconeVectors, chunkSize); - const batchRequests = chunkedVectors.map((chunk) => - this.caller.call(async () => namespace.upsert(chunk)) - ); - - await Promise.all(batchRequests); - - return documentIds; - } - - /** - * Method that deletes vectors from the Pinecone database. - * @param params Parameters for the delete operation. - * @returns Promise that resolves when the delete operation is complete. - */ - async delete(params: PineconeDeleteParams): Promise { - const { deleteAll, ids, filter } = params; - const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); - - if (deleteAll) { - await namespace.deleteAll(); - } else if (ids) { - const batchSize = 1000; - for (let i = 0; i < ids.length; i += batchSize) { - const batchIds = ids.slice(i, i + batchSize); - await namespace.deleteMany(batchIds); - } - } else if (filter) { - await namespace.deleteMany(filter); - } else { - throw new Error("Either ids or delete_all must be provided."); - } - } - - protected async _runPineconeQuery( - query: number[], - k: number, - filter?: PineconeMetadata, - options?: { includeValues: boolean } - ) { - if (filter && this.filter) { - throw new Error("cannot provide both `filter` and `this.filter`"); - } - const _filter = filter ?? this.filter; - const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); - - const results = await namespace.query({ - includeMetadata: true, - topK: k, - vector: query, - filter: _filter, - ...options, - }); - - return results; - } - - /** - * Method that performs a similarity search in the Pinecone database and - * returns the results along with their scores. - * @param query Query vector for the similarity search. - * @param k Number of top results to return. - * @param filter Optional filter to apply to the search. - * @returns Promise that resolves with an array of documents and their scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: PineconeMetadata - ): Promise<[Document, number][]> { - const results = await this._runPineconeQuery(query, k, filter); - const result: [Document, number][] = []; - - if (results.matches) { - for (const res of results.matches) { - const { [this.textKey]: pageContent, ...metadata } = (res.metadata ?? - {}) as PineconeMetadata; - if (res.score) { - result.push([new Document({ metadata, pageContent }), res.score]); - } - } - } - - return result; - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK=20 - Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {PineconeMetadata} options.filter - Optional filter to apply to the search. - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const queryEmbedding = await this.embeddings.embedQuery(query); - - const results = await this._runPineconeQuery( - queryEmbedding, - options.fetchK ?? 20, - options.filter, - { includeValues: true } - ); - - const matches = results?.matches ?? []; - const embeddingList = matches.map((match) => match.values); - - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - options.lambda, - options.k - ); - - const topMmrMatches = mmrIndexes.map((idx) => matches[idx]); - - const finalResult: Document[] = []; - for (const res of topMmrMatches) { - const { [this.textKey]: pageContent, ...metadata } = (res.metadata ?? - {}) as PineconeMetadata; - if (res.score) { - finalResult.push(new Document({ metadata, pageContent })); - } - } - - return finalResult; - } - - /** - * Static method that creates a new instance of the PineconeStore class - * from texts. - * @param texts Array of texts to add to the Pinecone database. - * @param metadatas Metadata associated with the texts. - * @param embeddings Embeddings to use for the texts. - * @param dbConfig Configuration for the Pinecone database. - * @returns Promise that resolves with a new instance of the PineconeStore class. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: - | { - pineconeIndex: PineconeIndex; - textKey?: string; - namespace?: string | undefined; - } - | PineconeLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - const args: PineconeLibArgs = { - pineconeIndex: dbConfig.pineconeIndex, - textKey: dbConfig.textKey, - namespace: dbConfig.namespace, - }; - return PineconeStore.fromDocuments(docs, embeddings, args); - } - - /** - * Static method that creates a new instance of the PineconeStore class - * from documents. - * @param docs Array of documents to add to the Pinecone database. - * @param embeddings Embeddings to use for the documents. - * @param dbConfig Configuration for the Pinecone database. - * @returns Promise that resolves with a new instance of the PineconeStore class. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: PineconeLibArgs - ): Promise { - const args = dbConfig; - args.textKey = dbConfig.textKey ?? "text"; - - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method that creates a new instance of the PineconeStore class - * from an existing index. - * @param embeddings Embeddings to use for the documents. - * @param dbConfig Configuration for the Pinecone database. - * @returns Promise that resolves with a new instance of the PineconeStore class. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: PineconeLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - return instance; - } -} +export * from "@langchain/community/vectorstores/pinecone"; diff --git a/langchain/src/vectorstores/prisma.ts b/langchain/src/vectorstores/prisma.ts index 219f570df37d..9e31386edfe3 100644 --- a/langchain/src/vectorstores/prisma.ts +++ b/langchain/src/vectorstores/prisma.ts @@ -1,511 +1 @@ -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { type Embeddings } from "../embeddings/base.js"; -import { Callbacks } from "../callbacks/manager.js"; - -const IdColumnSymbol = Symbol("id"); -const ContentColumnSymbol = Symbol("content"); - -type ColumnSymbol = typeof IdColumnSymbol | typeof ContentColumnSymbol; - -declare type Value = unknown; -declare type RawValue = Value | Sql; - -declare class Sql { - strings: string[]; - - constructor( - rawStrings: ReadonlyArray, - rawValues: ReadonlyArray - ); -} - -type PrismaNamespace = { - ModelName: Record; - Sql: typeof Sql; - raw: (sql: string) => Sql; - join: ( - values: RawValue[], - separator?: string, - prefix?: string, - suffix?: string - ) => Sql; - sql: (strings: ReadonlyArray, ...values: RawValue[]) => Sql; -}; - -type PrismaClient = { - $queryRaw( - query: TemplateStringsArray | Sql, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - ...values: any[] - ): Promise; - $executeRaw( - query: TemplateStringsArray | Sql, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - ...values: any[] - ): // eslint-disable-next-line @typescript-eslint/no-explicit-any - Promise; - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - $transaction

[]>(arg: [...P]): Promise; -}; - -type ObjectIntersect = { - [P in keyof A & keyof B]: A[P] | B[P]; -}; - -type ModelColumns> = { - [K in keyof TModel]?: true | ColumnSymbol; -}; - -export type PrismaSqlFilter> = { - [K in keyof TModel]?: { - equals?: TModel[K]; - in?: TModel[K][]; - isNull?: TModel[K]; - isNotNull?: TModel[K]; - like?: TModel[K]; - lt?: TModel[K]; - lte?: TModel[K]; - gt?: TModel[K]; - gte?: TModel[K]; - not?: TModel[K]; - }; -}; - -const OpMap = { - equals: "=", - in: "IN", - isNull: "IS NULL", - isNotNull: "IS NOT NULL", - like: "LIKE", - lt: "<", - lte: "<=", - gt: ">", - gte: ">=", - not: "<>", -}; - -type SimilarityModel< - TModel extends Record = Record, - TColumns extends ModelColumns = ModelColumns -> = Pick> & { - _distance: number | null; -}; - -type DefaultPrismaVectorStore = PrismaVectorStore< - Record, - string, - ModelColumns>, - PrismaSqlFilter> ->; - -/** - * A specific implementation of the VectorStore class that is designed to - * work with Prisma. It provides methods for adding models, documents, and - * vectors, as well as for performing similarity searches. - */ -export class PrismaVectorStore< - TModel extends Record, - TModelName extends string, - TSelectModel extends ModelColumns, - TFilterModel extends PrismaSqlFilter -> extends VectorStore { - protected tableName: string; - - protected vectorColumnName: string; - - protected selectColumns: string[]; - - filter?: TFilterModel; - - idColumn: keyof TModel & string; - - contentColumn: keyof TModel & string; - - static IdColumn: typeof IdColumnSymbol = IdColumnSymbol; - - static ContentColumn: typeof ContentColumnSymbol = ContentColumnSymbol; - - protected db: PrismaClient; - - protected Prisma: PrismaNamespace; - - _vectorstoreType(): string { - return "prisma"; - } - - constructor( - embeddings: Embeddings, - config: { - db: PrismaClient; - prisma: PrismaNamespace; - tableName: TModelName; - vectorColumnName: string; - columns: TSelectModel; - filter?: TFilterModel; - } - ) { - super(embeddings, {}); - - this.Prisma = config.prisma; - this.db = config.db; - - const entries = Object.entries(config.columns); - const idColumn = entries.find((i) => i[1] === IdColumnSymbol)?.[0]; - const contentColumn = entries.find( - (i) => i[1] === ContentColumnSymbol - )?.[0]; - - if (idColumn == null) throw new Error("Missing ID column"); - if (contentColumn == null) throw new Error("Missing content column"); - - this.idColumn = idColumn; - this.contentColumn = contentColumn; - - this.tableName = config.tableName; - this.vectorColumnName = config.vectorColumnName; - - this.selectColumns = entries - .map(([key, alias]) => (alias && key) || null) - .filter((x): x is string => !!x); - - if (config.filter) { - this.filter = config.filter; - } - } - - /** - * Creates a new PrismaVectorStore with the specified model. - * @param db The PrismaClient instance. - * @returns An object with create, fromTexts, and fromDocuments methods. - */ - static withModel>(db: PrismaClient) { - function create< - TPrisma extends PrismaNamespace, - TColumns extends ModelColumns, - TFilters extends PrismaSqlFilter - >( - embeddings: Embeddings, - config: { - prisma: TPrisma; - tableName: keyof TPrisma["ModelName"] & string; - vectorColumnName: string; - columns: TColumns; - filter?: TFilters; - } - ) { - type ModelName = keyof TPrisma["ModelName"] & string; - return new PrismaVectorStore( - embeddings, - { ...config, db } - ); - } - - async function fromTexts< - TPrisma extends PrismaNamespace, - TColumns extends ModelColumns - >( - texts: string[], - metadatas: TModel[], - embeddings: Embeddings, - dbConfig: { - prisma: TPrisma; - tableName: keyof TPrisma["ModelName"] & string; - vectorColumnName: string; - columns: TColumns; - } - ) { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return PrismaVectorStore.fromDocuments(docs, embeddings, { - ...dbConfig, - db, - }); - } - - async function fromDocuments< - TPrisma extends PrismaNamespace, - TColumns extends ModelColumns, - TFilters extends PrismaSqlFilter - >( - docs: Document[], - embeddings: Embeddings, - dbConfig: { - prisma: TPrisma; - tableName: keyof TPrisma["ModelName"] & string; - vectorColumnName: string; - columns: TColumns; - } - ) { - type ModelName = keyof TPrisma["ModelName"] & string; - const instance = new PrismaVectorStore< - TModel, - ModelName, - TColumns, - TFilters - >(embeddings, { ...dbConfig, db }); - await instance.addDocuments(docs); - return instance; - } - - return { create, fromTexts, fromDocuments }; - } - - /** - * Adds the specified models to the store. - * @param models The models to add. - * @returns A promise that resolves when the models have been added. - */ - async addModels(models: TModel[]) { - return this.addDocuments( - models.map((metadata) => { - const pageContent = metadata[this.contentColumn]; - if (typeof pageContent !== "string") - throw new Error("Content column must be a string"); - return new Document({ pageContent, metadata }); - }) - ); - } - - /** - * Adds the specified documents to the store. - * @param documents The documents to add. - * @returns A promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Adds the specified vectors to the store. - * @param vectors The vectors to add. - * @param documents The documents associated with the vectors. - * @returns A promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]) { - // table name, column name cannot be parametrised - // these fields are thus not escaped by Prisma and can be dangerous if user input is used - const idColumnRaw = this.Prisma.raw(`"${this.idColumn}"`); - const tableNameRaw = this.Prisma.raw(`"${this.tableName}"`); - const vectorColumnRaw = this.Prisma.raw(`"${this.vectorColumnName}"`); - - await this.db.$transaction( - vectors.map( - (vector, idx) => this.db.$executeRaw` - UPDATE ${tableNameRaw} - SET ${vectorColumnRaw} = ${`[${vector.join(",")}]`}::vector - WHERE ${idColumnRaw} = ${documents[idx].metadata[this.idColumn]} - ` - ) - ); - } - - /** - * Performs a similarity search with the specified query. - * @param query The query to use for the similarity search. - * @param k The number of results to return. - * @param _filter The filter to apply to the results. - * @param _callbacks The callbacks to use during the search. - * @returns A promise that resolves with the search results. - */ - async similaritySearch( - query: string, - k = 4, - _filter: this["FilterType"] | undefined = undefined, // not used. here to make the interface compatible with the other stores - _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later - ): Promise>[]> { - const results = await this.similaritySearchVectorWithScore( - await this.embeddings.embedQuery(query), - k - ); - - return results.map((result) => result[0]); - } - - /** - * Performs a similarity search with the specified query and returns the - * results along with their scores. - * @param query The query to use for the similarity search. - * @param k The number of results to return. - * @param filter The filter to apply to the results. - * @param _callbacks The callbacks to use during the search. - * @returns A promise that resolves with the search results and their scores. - */ - async similaritySearchWithScore( - query: string, - k?: number, - filter?: TFilterModel, - _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later - ) { - return super.similaritySearchWithScore(query, k, filter); - } - - /** - * Performs a similarity search with the specified vector and returns the - * results along with their scores. - * @param query The vector to use for the similarity search. - * @param k The number of results to return. - * @param filter The filter to apply to the results. - * @returns A promise that resolves with the search results and their scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: TFilterModel - ): Promise<[Document>, number][]> { - // table name, column names cannot be parametrised - // these fields are thus not escaped by Prisma and can be dangerous if user input is used - const vectorColumnRaw = this.Prisma.raw(`"${this.vectorColumnName}"`); - const tableNameRaw = this.Prisma.raw(`"${this.tableName}"`); - const selectRaw = this.Prisma.raw( - this.selectColumns.map((x) => `"${x}"`).join(", ") - ); - - const vector = `[${query.join(",")}]`; - const articles = await this.db.$queryRaw< - Array> - >( - this.Prisma.join( - [ - this.Prisma.sql` - SELECT ${selectRaw}, ${vectorColumnRaw} <=> ${vector}::vector as "_distance" - FROM ${tableNameRaw} - `, - this.buildSqlFilterStr(filter ?? this.filter), - this.Prisma.sql` - ORDER BY "_distance" ASC - LIMIT ${k}; - `, - ].filter((x) => x != null), - "" - ) - ); - - const results: [Document>, number][] = - []; - for (const article of articles) { - if (article._distance != null && article[this.contentColumn] != null) { - results.push([ - new Document({ - pageContent: article[this.contentColumn] as string, - metadata: article, - }), - article._distance, - ]); - } - } - - return results; - } - - buildSqlFilterStr(filter?: TFilterModel) { - if (filter == null) return null; - return this.Prisma.join( - Object.entries(filter).flatMap(([key, ops]) => - Object.entries(ops).map(([opName, value]) => { - // column name, operators cannot be parametrised - // these fields are thus not escaped by Prisma and can be dangerous if user input is used - const opNameKey = opName as keyof typeof OpMap; - const colRaw = this.Prisma.raw(`"${key}"`); - const opRaw = this.Prisma.raw(OpMap[opNameKey]); - - switch (OpMap[opNameKey]) { - case OpMap.in: { - if ( - !Array.isArray(value) || - !value.every((v) => typeof v === "string") - ) { - throw new Error( - `Invalid filter: IN operator requires an array of strings. Received: ${JSON.stringify( - value, - null, - 2 - )}` - ); - } - return this.Prisma.sql`${colRaw} ${opRaw} (${this.Prisma.join( - value - )})`; - } - case OpMap.isNull: - case OpMap.isNotNull: - return this.Prisma.sql`${colRaw} ${opRaw}`; - default: - return this.Prisma.sql`${colRaw} ${opRaw} ${value}`; - } - }) - ), - " AND ", - " WHERE " - ); - } - - /** - * Creates a new PrismaVectorStore from the specified texts. - * @param texts The texts to use to create the store. - * @param metadatas The metadata for the texts. - * @param embeddings The embeddings to use. - * @param dbConfig The database configuration. - * @returns A promise that resolves with the new PrismaVectorStore. - */ - static async fromTexts( - texts: string[], - metadatas: object[], - embeddings: Embeddings, - dbConfig: { - db: PrismaClient; - prisma: PrismaNamespace; - tableName: string; - vectorColumnName: string; - columns: ModelColumns>; - } - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return PrismaVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new PrismaVectorStore from the specified documents. - * @param docs The documents to use to create the store. - * @param embeddings The embeddings to use. - * @param dbConfig The database configuration. - * @returns A promise that resolves with the new PrismaVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: { - db: PrismaClient; - prisma: PrismaNamespace; - tableName: string; - vectorColumnName: string; - columns: ModelColumns>; - } - ): Promise { - const instance = new PrismaVectorStore(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/prisma"; diff --git a/langchain/src/vectorstores/qdrant.ts b/langchain/src/vectorstores/qdrant.ts index 6ec54233a6ec..a4a8c713d6f4 100644 --- a/langchain/src/vectorstores/qdrant.ts +++ b/langchain/src/vectorstores/qdrant.ts @@ -1,260 +1 @@ -import { QdrantClient } from "@qdrant/js-client-rest"; -import type { Schemas as QdrantSchemas } from "@qdrant/js-client-rest"; -import { v4 as uuid } from "uuid"; - -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface for the arguments that can be passed to the - * `QdrantVectorStore` constructor. It includes options for specifying a - * `QdrantClient` instance, the URL and API key for a Qdrant database, and - * the name and configuration for a collection. - */ -export interface QdrantLibArgs { - client?: QdrantClient; - url?: string; - apiKey?: string; - collectionName?: string; - collectionConfig?: QdrantSchemas["CreateCollection"]; -} - -/** - * Type for the response returned by a search operation in the Qdrant - * database. It includes the score and payload (metadata and content) for - * each point (document) in the search results. - */ -type QdrantSearchResponse = QdrantSchemas["ScoredPoint"] & { - payload: { - metadata: object; - content: string; - }; -}; - -/** - * Class that extends the `VectorStore` base class to interact with a - * Qdrant database. It includes methods for adding documents and vectors - * to the Qdrant database, searching for similar vectors, and ensuring the - * existence of a collection in the database. - */ -export class QdrantVectorStore extends VectorStore { - get lc_secrets(): { [key: string]: string } { - return { - apiKey: "QDRANT_API_KEY", - url: "QDRANT_URL", - }; - } - - client: QdrantClient; - - collectionName: string; - - collectionConfig?: QdrantSchemas["CreateCollection"]; - - _vectorstoreType(): string { - return "qdrant"; - } - - constructor(embeddings: Embeddings, args: QdrantLibArgs) { - super(embeddings, args); - - const url = args.url ?? getEnvironmentVariable("QDRANT_URL"); - const apiKey = args.apiKey ?? getEnvironmentVariable("QDRANT_API_KEY"); - - if (!args.client && !url) { - throw new Error("Qdrant client or url address must be set."); - } - - this.client = - args.client || - new QdrantClient({ - url, - apiKey, - }); - - this.collectionName = args.collectionName ?? "documents"; - - this.collectionConfig = args.collectionConfig; - } - - /** - * Method to add documents to the Qdrant database. It generates vectors - * from the documents using the `Embeddings` instance and then adds the - * vectors to the database. - * @param documents Array of `Document` instances to be added to the Qdrant database. - * @returns Promise that resolves when the documents have been added to the database. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method to add vectors to the Qdrant database. Each vector is associated - * with a document, which is stored as the payload for a point in the - * database. - * @param vectors Array of vectors to be added to the Qdrant database. - * @param documents Array of `Document` instances associated with the vectors. - * @returns Promise that resolves when the vectors have been added to the database. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - - await this.ensureCollection(); - - const points = vectors.map((embedding, idx) => ({ - id: uuid(), - vector: embedding, - payload: { - content: documents[idx].pageContent, - metadata: documents[idx].metadata, - }, - })); - - try { - await this.client.upsert(this.collectionName, { - wait: true, - points, - }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (e: any) { - const error = new Error( - `${e?.status ?? "Undefined error code"} ${e?.message}: ${ - e?.data?.status?.error - }` - ); - throw error; - } - } - - /** - * Method to search for vectors in the Qdrant database that are similar to - * a given query vector. The search results include the score and payload - * (metadata and content) for each similar vector. - * @param query Query vector to search for similar vectors in the Qdrant database. - * @param k Optional number of similar vectors to return. If not specified, all similar vectors are returned. - * @param filter Optional filter to apply to the search results. - * @returns Promise that resolves with an array of tuples, where each tuple includes a `Document` instance and a score for a similar vector. - */ - async similaritySearchVectorWithScore( - query: number[], - k?: number, - filter?: QdrantSchemas["Filter"] - ): Promise<[Document, number][]> { - if (!query) { - return []; - } - - await this.ensureCollection(); - - const results = await this.client.search(this.collectionName, { - vector: query, - limit: k, - filter, - }); - - const result: [Document, number][] = ( - results as QdrantSearchResponse[] - ).map((res) => [ - new Document({ - metadata: res.payload.metadata, - pageContent: res.payload.content, - }), - res.score, - ]); - - return result; - } - - /** - * Method to ensure the existence of a collection in the Qdrant database. - * If the collection does not exist, it is created. - * @returns Promise that resolves when the existence of the collection has been ensured. - */ - async ensureCollection() { - const response = await this.client.getCollections(); - - const collectionNames = response.collections.map( - (collection) => collection.name - ); - - if (!collectionNames.includes(this.collectionName)) { - const collectionConfig = this.collectionConfig ?? { - vectors: { - size: (await this.embeddings.embedQuery("test")).length, - distance: "Cosine", - }, - }; - await this.client.createCollection(this.collectionName, collectionConfig); - } - } - - /** - * Static method to create a `QdrantVectorStore` instance from texts. Each - * text is associated with metadata and converted to a `Document` - * instance, which is then added to the Qdrant database. - * @param texts Array of texts to be converted to `Document` instances and added to the Qdrant database. - * @param metadatas Array or single object of metadata to be associated with the texts. - * @param embeddings `Embeddings` instance used to generate vectors from the texts. - * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. - * @returns Promise that resolves with a new `QdrantVectorStore` instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: QdrantLibArgs - ): Promise { - const docs = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return QdrantVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a `QdrantVectorStore` instance from `Document` - * instances. The documents are added to the Qdrant database. - * @param docs Array of `Document` instances to be added to the Qdrant database. - * @param embeddings `Embeddings` instance used to generate vectors from the documents. - * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. - * @returns Promise that resolves with a new `QdrantVectorStore` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: QdrantLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create a `QdrantVectorStore` instance from an existing - * collection in the Qdrant database. - * @param embeddings `Embeddings` instance used to generate vectors from the documents in the collection. - * @param dbConfig `QdrantLibArgs` instance specifying the configuration for the Qdrant database. - * @returns Promise that resolves with a new `QdrantVectorStore` instance. - */ - static async fromExistingCollection( - embeddings: Embeddings, - dbConfig: QdrantLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.ensureCollection(); - return instance; - } -} +export * from "@langchain/community/vectorstores/qdrant"; diff --git a/langchain/src/vectorstores/redis.ts b/langchain/src/vectorstores/redis.ts index bc33d68ae28d..fa3b6aba3c78 100644 --- a/langchain/src/vectorstores/redis.ts +++ b/langchain/src/vectorstores/redis.ts @@ -1,458 +1 @@ -import type { - createCluster, - createClient, - RediSearchSchema, - SearchOptions, -} from "redis"; -import { SchemaFieldTypes, VectorAlgorithms } from "redis"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -// Adapated from internal redis types which aren't exported -/** - * Type for creating a schema vector field. It includes the algorithm, - * distance metric, and initial capacity. - */ -export type CreateSchemaVectorField< - T extends VectorAlgorithms, - A extends Record -> = { - ALGORITHM: T; - DISTANCE_METRIC: "L2" | "IP" | "COSINE"; - INITIAL_CAP?: number; -} & A; -/** - * Type for creating a flat schema vector field. It extends - * CreateSchemaVectorField with a block size property. - */ -export type CreateSchemaFlatVectorField = CreateSchemaVectorField< - VectorAlgorithms.FLAT, - { - BLOCK_SIZE?: number; - } ->; -/** - * Type for creating a HNSW schema vector field. It extends - * CreateSchemaVectorField with M, EF_CONSTRUCTION, and EF_RUNTIME - * properties. - */ -export type CreateSchemaHNSWVectorField = CreateSchemaVectorField< - VectorAlgorithms.HNSW, - { - M?: number; - EF_CONSTRUCTION?: number; - EF_RUNTIME?: number; - } ->; - -type CreateIndexOptions = NonNullable< - Parameters["ft"]["create"]>[3] ->; - -export type RedisSearchLanguages = `${NonNullable< - CreateIndexOptions["LANGUAGE"] ->}`; - -export type RedisVectorStoreIndexOptions = Omit< - CreateIndexOptions, - "LANGUAGE" -> & { LANGUAGE?: RedisSearchLanguages }; - -/** - * Interface for the configuration of the RedisVectorStore. It includes - * the Redis client, index name, index options, key prefix, content key, - * metadata key, vector key, and filter. - */ -export interface RedisVectorStoreConfig { - redisClient: - | ReturnType - | ReturnType; - indexName: string; - indexOptions?: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField; - createIndexOptions?: Omit; // PREFIX must be set with keyPrefix - keyPrefix?: string; - contentKey?: string; - metadataKey?: string; - vectorKey?: string; - filter?: RedisVectorStoreFilterType; -} - -/** - * Interface for the options when adding documents to the - * RedisVectorStore. It includes keys and batch size. - */ -export interface RedisAddOptions { - keys?: string[]; - batchSize?: number; -} - -/** - * Type for the filter used in the RedisVectorStore. It is an array of - * strings. - */ -export type RedisVectorStoreFilterType = string[]; - -/** - * Class representing a RedisVectorStore. It extends the VectorStore class - * and includes methods for adding documents and vectors, performing - * similarity searches, managing the index, and more. - */ -export class RedisVectorStore extends VectorStore { - declare FilterType: RedisVectorStoreFilterType; - - private redisClient: - | ReturnType - | ReturnType; - - indexName: string; - - indexOptions: CreateSchemaFlatVectorField | CreateSchemaHNSWVectorField; - - createIndexOptions: CreateIndexOptions; - - keyPrefix: string; - - contentKey: string; - - metadataKey: string; - - vectorKey: string; - - filter?: RedisVectorStoreFilterType; - - _vectorstoreType(): string { - return "redis"; - } - - constructor(embeddings: Embeddings, _dbConfig: RedisVectorStoreConfig) { - super(embeddings, _dbConfig); - - this.redisClient = _dbConfig.redisClient; - this.indexName = _dbConfig.indexName; - this.indexOptions = _dbConfig.indexOptions ?? { - ALGORITHM: VectorAlgorithms.HNSW, - DISTANCE_METRIC: "COSINE", - }; - this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`; - this.contentKey = _dbConfig.contentKey ?? "content"; - this.metadataKey = _dbConfig.metadataKey ?? "metadata"; - this.vectorKey = _dbConfig.vectorKey ?? "content_vector"; - this.filter = _dbConfig.filter; - this.createIndexOptions = { - ON: "HASH", - PREFIX: this.keyPrefix, - ...(_dbConfig.createIndexOptions as CreateIndexOptions), - }; - } - - /** - * Method for adding documents to the RedisVectorStore. It first converts - * the documents to texts and then adds them as vectors. - * @param documents The documents to add. - * @param options Optional parameters for adding the documents. - * @returns A promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[], options?: RedisAddOptions) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method for adding vectors to the RedisVectorStore. It checks if the - * index exists and creates it if it doesn't, then adds the vectors in - * batches. - * @param vectors The vectors to add. - * @param documents The documents associated with the vectors. - * @param keys Optional keys for the vectors. - * @param batchSize The size of the batches in which to add the vectors. Defaults to 1000. - * @returns A promise that resolves when the vectors have been added. - */ - async addVectors( - vectors: number[][], - documents: Document[], - { keys, batchSize = 1000 }: RedisAddOptions = {} - ) { - if (!vectors.length || !vectors[0].length) { - throw new Error("No vectors provided"); - } - // check if the index exists and create it if it doesn't - await this.createIndex(vectors[0].length); - - const info = await this.redisClient.ft.info(this.indexName); - const lastKeyCount = parseInt(info.numDocs, 10) || 0; - const multi = this.redisClient.multi(); - - vectors.map(async (vector, idx) => { - const key = - keys && keys.length - ? keys[idx] - : `${this.keyPrefix}${idx + lastKeyCount}`; - const metadata = - documents[idx] && documents[idx].metadata - ? documents[idx].metadata - : {}; - - multi.hSet(key, { - [this.vectorKey]: this.getFloat32Buffer(vector), - [this.contentKey]: documents[idx].pageContent, - [this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)), - }); - - // write batch - if (idx % batchSize === 0) { - await multi.exec(); - } - }); - - // insert final batch - await multi.exec(); - } - - /** - * Method for performing a similarity search in the RedisVectorStore. It - * returns the documents and their scores. - * @param query The query vector. - * @param k The number of nearest neighbors to return. - * @param filter Optional filter to apply to the search. - * @returns A promise that resolves to an array of documents and their scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: RedisVectorStoreFilterType - ): Promise<[Document, number][]> { - if (filter && this.filter) { - throw new Error("cannot provide both `filter` and `this.filter`"); - } - - const _filter = filter ?? this.filter; - const results = await this.redisClient.ft.search( - this.indexName, - ...this.buildQuery(query, k, _filter) - ); - const result: [Document, number][] = []; - - if (results.total) { - for (const res of results.documents) { - if (res.value) { - const document = res.value; - if (document.vector_score) { - result.push([ - new Document({ - pageContent: document[this.contentKey] as string, - metadata: JSON.parse( - this.unEscapeSpecialChars(document.metadata as string) - ), - }), - Number(document.vector_score), - ]); - } - } - } - } - - return result; - } - - /** - * Static method for creating a new instance of RedisVectorStore from - * texts. It creates documents from the texts and metadata, then adds them - * to the RedisVectorStore. - * @param texts The texts to add. - * @param metadatas The metadata associated with the texts. - * @param embeddings The embeddings to use. - * @param dbConfig The configuration for the RedisVectorStore. - * @returns A promise that resolves to a new instance of RedisVectorStore. - */ - static fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: RedisVectorStoreConfig - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method for creating a new instance of RedisVectorStore from - * documents. It adds the documents to the RedisVectorStore. - * @param docs The documents to add. - * @param embeddings The embeddings to use. - * @param dbConfig The configuration for the RedisVectorStore. - * @returns A promise that resolves to a new instance of RedisVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: RedisVectorStoreConfig - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Method for checking if an index exists in the RedisVectorStore. - * @returns A promise that resolves to a boolean indicating whether the index exists. - */ - async checkIndexExists() { - try { - await this.redisClient.ft.info(this.indexName); - } catch (err) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - if ((err as any)?.message.includes("unknown command")) { - throw new Error( - "Failed to run FT.INFO command. Please ensure that you are running a RediSearch-capable Redis instance: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/redis#setup" - ); - } - // index doesn't exist - return false; - } - - return true; - } - - /** - * Method for creating an index in the RedisVectorStore. If the index - * already exists, it does nothing. - * @param dimensions The dimensions of the index - * @returns A promise that resolves when the index has been created. - */ - async createIndex(dimensions = 1536): Promise { - if (await this.checkIndexExists()) { - return; - } - - const schema: RediSearchSchema = { - [this.vectorKey]: { - type: SchemaFieldTypes.VECTOR, - TYPE: "FLOAT32", - DIM: dimensions, - ...this.indexOptions, - }, - [this.contentKey]: SchemaFieldTypes.TEXT, - [this.metadataKey]: SchemaFieldTypes.TEXT, - }; - - await this.redisClient.ft.create( - this.indexName, - schema, - this.createIndexOptions - ); - } - - /** - * Method for dropping an index from the RedisVectorStore. - * @param deleteDocuments Optional boolean indicating whether to drop the associated documents. - * @returns A promise that resolves to a boolean indicating whether the index was dropped. - */ - async dropIndex(deleteDocuments?: boolean): Promise { - try { - const options = deleteDocuments ? { DD: deleteDocuments } : undefined; - await this.redisClient.ft.dropIndex(this.indexName, options); - - return true; - } catch (err) { - return false; - } - } - - /** - * Deletes vectors from the vector store. - * @param params The parameters for deleting vectors. - * @returns A promise that resolves when the vectors have been deleted. - */ - async delete(params: { deleteAll: boolean }): Promise { - if (params.deleteAll) { - await this.dropIndex(true); - } else { - throw new Error(`Invalid parameters passed to "delete".`); - } - } - - private buildQuery( - query: number[], - k: number, - filter?: RedisVectorStoreFilterType - ): [string, SearchOptions] { - const vectorScoreField = "vector_score"; - - let hybridFields = "*"; - // if a filter is set, modify the hybrid query - if (filter && filter.length) { - // `filter` is a list of strings, then it's applied using the OR operator in the metadata key - // for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar' - hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`; - } - - const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`; - const returnFields = [this.metadataKey, this.contentKey, vectorScoreField]; - - const options: SearchOptions = { - PARAMS: { - vector: this.getFloat32Buffer(query), - }, - RETURN: returnFields, - SORTBY: vectorScoreField, - DIALECT: 2, - LIMIT: { - from: 0, - size: k, - }, - }; - - return [baseQuery, options]; - } - - private prepareFilter(filter: RedisVectorStoreFilterType) { - return filter.map(this.escapeSpecialChars).join("|"); - } - - /** - * Escapes all '-' characters. - * RediSearch considers '-' as a negative operator, hence we need - * to escape it - * @see https://redis.io/docs/stack/search/reference/query_syntax - * - * @param str - * @returns - */ - private escapeSpecialChars(str: string) { - return str.replaceAll("-", "\\-"); - } - - /** - * Unescapes all '-' characters, returning the original string - * - * @param str - * @returns - */ - private unEscapeSpecialChars(str: string) { - return str.replaceAll("\\-", "-"); - } - - /** - * Converts the vector to the buffer Redis needs to - * correctly store an embedding - * - * @param vector - * @returns Buffer - */ - private getFloat32Buffer(vector: number[]) { - return Buffer.from(new Float32Array(vector).buffer); - } -} +export * from "@langchain/community/vectorstores/redis"; diff --git a/langchain/src/vectorstores/rockset.ts b/langchain/src/vectorstores/rockset.ts index 38a4f21dc5e3..5ae76eefb55f 100644 --- a/langchain/src/vectorstores/rockset.ts +++ b/langchain/src/vectorstores/rockset.ts @@ -1,453 +1 @@ -import { MainApi } from "@rockset/client"; -import type { CreateCollectionRequest } from "@rockset/client/dist/codegen/api.d.ts"; -import { Collection } from "@rockset/client/dist/codegen/api.js"; - -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -/** - * Generic Rockset vector storage error - */ -export class RocksetStoreError extends Error { - /** - * Constructs a RocksetStoreError - * @param message The error message - */ - constructor(message: string) { - super(message); - this.name = this.constructor.name; - } -} - -/** - * Error that is thrown when a RocksetStore function is called - * after `destroy()` is called (meaning the collection would be - * deleted). - */ -export class RocksetStoreDestroyedError extends RocksetStoreError { - constructor() { - super("The Rockset store has been destroyed"); - this.name = this.constructor.name; - } -} - -/** - * Functions to measure vector distance/similarity by. - * See https://rockset.com/docs/vector-functions/#vector-distance-functions - * @enum SimilarityMetric - */ -export const SimilarityMetric = { - CosineSimilarity: "COSINE_SIM", - EuclideanDistance: "EUCLIDEAN_DIST", - DotProduct: "DOT_PRODUCT", -} as const; - -export type SimilarityMetric = - (typeof SimilarityMetric)[keyof typeof SimilarityMetric]; - -interface CollectionNotFoundError { - message_key: string; -} - -/** - * Vector store arguments - * @interface RocksetStore - */ -export interface RocksetLibArgs { - /** - * The rockset client object constructed with `rocksetConfigure` - * @type {MainAPI} - */ - client: MainApi; - /** - * The name of the Rockset collection to store vectors - * @type {string} - */ - collectionName: string; - /** - * The name of othe Rockset workspace that holds @member collectionName - * @type {string} - */ - workspaceName?: string; - /** - * The name of the collection column to contain page contnent of documents - * @type {string} - */ - textKey?: string; - /** - * The name of the collection column to contain vectors - * @type {string} - */ - embeddingKey?: string; - /** - * The SQL `WHERE` clause to filter by - * @type {string} - */ - filter?: string; - /** - * The metric used to measure vector relationship - * @type {SimilarityMetric} - */ - similarityMetric?: SimilarityMetric; -} - -/** - * Exposes Rockset's vector store/search functionality - */ -export class RocksetStore extends VectorStore { - declare FilterType: string; - - client: MainApi; - - collectionName: string; - - workspaceName: string; - - textKey: string; - - embeddingKey: string; - - filter?: string; - - private _similarityMetric: SimilarityMetric; - - private similarityOrder: "ASC" | "DESC"; - - private destroyed: boolean; - - /** - * Gets a string representation of the type of this VectorStore - * @returns {"rockset"} - */ - _vectorstoreType(): "rockset" { - return "rockset"; - } - - /** - * Constructs a new RocksetStore - * @param {Embeddings} embeddings Object used to embed queries and - * page content - * @param {RocksetLibArgs} args - */ - constructor(embeddings: Embeddings, args: RocksetLibArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - this.client = args.client; - this.collectionName = args.collectionName; - this.workspaceName = args.workspaceName ?? "commons"; - this.textKey = args.textKey ?? "text"; - this.embeddingKey = args.embeddingKey ?? "embedding"; - this.filter = args.filter; - this.similarityMetric = - args.similarityMetric ?? SimilarityMetric.CosineSimilarity; - this.setSimilarityOrder(); - } - - /** - * Sets the object's similarity order based on what - * SimilarityMetric is being used - */ - private setSimilarityOrder() { - this.checkIfDestroyed(); - this.similarityOrder = - this.similarityMetric === SimilarityMetric.EuclideanDistance - ? "ASC" - : "DESC"; - } - - /** - * Embeds and adds Documents to the store. - * @param {Documents[]} documents The documents to store - * @returns {Promise} The _id's of the documents added - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Adds vectors to the store given their corresponding Documents - * @param {number[][]} vectors The vectors to store - * @param {Document[]} documents The Documents they represent - * @return {Promise} The _id's of the added documents - */ - async addVectors(vectors: number[][], documents: Document[]) { - this.checkIfDestroyed(); - const rocksetDocs = []; - for (let i = 0; i < documents.length; i += 1) { - const currDoc = documents[i]; - const currVector = vectors[i]; - rocksetDocs.push({ - [this.textKey]: currDoc.pageContent, - [this.embeddingKey]: currVector, - ...currDoc.metadata, - }); - } - - return ( - await this.client.documents.addDocuments( - this.workspaceName, - this.collectionName, - { - data: rocksetDocs, - } - ) - ).data?.map((docStatus) => docStatus._id || ""); - } - - /** - * Deletes Rockset documements given their _id's - * @param {string[]} ids The IDS to remove documents with - */ - async delete(ids: string[]): Promise { - this.checkIfDestroyed(); - await this.client.documents.deleteDocuments( - this.workspaceName, - this.collectionName, - { - data: ids.map((id) => ({ _id: id })), - } - ); - } - - /** - * Gets the most relevant documents to a query along - * with their similarity score. The returned documents - * are ordered by similarity (most similar at the first - * index) - * @param {number[]} query The embedded query to search - * the store by - * @param {number} k The number of documents to retreive - * @param {string?} filter The SQL `WHERE` clause to filter by - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: string - ): Promise<[Document, number][]> { - this.checkIfDestroyed(); - if (filter && this.filter) { - throw new RocksetStoreError( - "cannot provide both `filter` and `this.filter`" - ); - } - const similarityKey = "similarity"; - const _filter = filter ?? this.filter; - return ( - ( - await this.client.queries.query({ - sql: { - query: ` - SELECT - * EXCEPT("${this.embeddingKey}"), - "${this.textKey}", - ${this.similarityMetric}(:query, "${ - this.embeddingKey - }") AS "${similarityKey}" - FROM - "${this.workspaceName}"."${this.collectionName}" - ${_filter ? `WHERE ${_filter}` : ""} - ORDER BY - "${similarityKey}" ${this.similarityOrder} - LIMIT - ${k} - `, - parameters: [ - { - name: "query", - type: "", - value: `[${query.toString()}]`, - }, - ], - }, - }) - ).results?.map((rocksetDoc) => [ - new Document>({ - pageContent: rocksetDoc[this.textKey], - metadata: (({ - [this.textKey]: t, - [similarityKey]: s, - ...rocksetDoc - }) => rocksetDoc)(rocksetDoc), - }), - rocksetDoc[similarityKey] as number, - ]) ?? [] - ); - } - - /** - * Constructs and returns a RocksetStore object given texts to store. - * @param {string[]} texts The texts to store - * @param {object[] | object} metadatas The metadatas that correspond - * to @param texts - * @param {Embeddings} embeddings The object used to embed queries - * and page content - * @param {RocksetLibArgs} dbConfig The options to be passed into the - * RocksetStore constructor - * @returns {RocksetStore} - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: RocksetLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return RocksetStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Constructs, adds docs to, and returns a RocksetStore object - * @param {Document[]} docs The Documents to store - * @param {Embeddings} embeddings The object used to embed queries - * and page content - * @param {RocksetLibArgs} dbConfig The options to be passed into the - * RocksetStore constructor - * @returns {RocksetStore} - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: RocksetLibArgs - ): Promise { - const args = { ...dbConfig, textKey: dbConfig.textKey ?? "text" }; - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Checks if a Rockset collection exists. - * @param {RocksetLibArgs} dbConfig The object containing the collection - * and workspace names - * @return {boolean} whether the collection exists - */ - private static async collectionExists(dbConfig: RocksetLibArgs) { - try { - await dbConfig.client.collections.getCollection( - dbConfig.workspaceName ?? "commons", - dbConfig.collectionName - ); - } catch (err) { - if ( - (err as CollectionNotFoundError).message_key === - "COLLECTION_DOES_NOT_EXIST" - ) { - return false; - } - throw err; - } - return true; - } - - /** - * Checks whether a Rockset collection is ready to be queried. - * @param {RocksetLibArgs} dbConfig The object containing the collection - * name and workspace - * @return {boolean} whether the collection is ready - */ - private static async collectionReady(dbConfig: RocksetLibArgs) { - return ( - ( - await dbConfig.client.collections.getCollection( - dbConfig.workspaceName ?? "commons", - dbConfig.collectionName - ) - ).data?.status === Collection.StatusEnum.READY - ); - } - - /** - * Deletes the collection this RocksetStore uses - * @param {boolean?} waitUntilDeletion Whether to sleep until the - * collection is ready to be - * queried - */ - async destroy(waitUntilDeletion?: boolean) { - await this.client.collections.deleteCollection( - this.workspaceName, - this.collectionName - ); - this.destroyed = true; - if (waitUntilDeletion) { - while ( - await RocksetStore.collectionExists({ - collectionName: this.collectionName, - client: this.client, - }) - ); - } - } - - /** - * Checks if this RocksetStore has been destroyed. - * @throws {RocksetStoreDestroyederror} if it has. - */ - private checkIfDestroyed() { - if (this.destroyed) { - throw new RocksetStoreDestroyedError(); - } - } - - /** - * Creates a new Rockset collection and returns a RocksetStore that - * uses it - * @param {Embeddings} embeddings Object used to embed queries and - * page content - * @param {RocksetLibArgs} dbConfig The options to be passed into the - * RocksetStore constructor - * @param {CreateCollectionRequest?} collectionOptions The arguments to sent with the - * HTTP request when creating the - * collection. Setting a field mapping - * that `VECTOR_ENFORCE`s is recommended - * when using this function. See - * https://rockset.com/docs/vector-functions/#vector_enforce - * @returns {RocsketStore} - */ - static async withNewCollection( - embeddings: Embeddings, - dbConfig: RocksetLibArgs, - collectionOptions?: CreateCollectionRequest - ): Promise { - if ( - collectionOptions?.name && - dbConfig.collectionName !== collectionOptions?.name - ) { - throw new RocksetStoreError( - "`dbConfig.name` and `collectionOptions.name` do not match" - ); - } - await dbConfig.client.collections.createCollection( - dbConfig.workspaceName ?? "commons", - collectionOptions || { name: dbConfig.collectionName } - ); - while ( - !(await this.collectionExists(dbConfig)) || - !(await this.collectionReady(dbConfig)) - ); - return new this(embeddings, dbConfig); - } - - public get similarityMetric() { - return this._similarityMetric; - } - - public set similarityMetric(metric: SimilarityMetric) { - this._similarityMetric = metric; - this.setSimilarityOrder(); - } -} +export * from "@langchain/community/vectorstores/rockset"; diff --git a/langchain/src/vectorstores/singlestore.ts b/langchain/src/vectorstores/singlestore.ts index e67d5e625f61..8d5df3a1dc1a 100644 --- a/langchain/src/vectorstores/singlestore.ts +++ b/langchain/src/vectorstores/singlestore.ts @@ -1,294 +1 @@ -import type { - Pool, - RowDataPacket, - OkPacket, - ResultSetHeader, - FieldPacket, - PoolOptions, -} from "mysql2/promise"; -import { format } from "mysql2"; -import { createPool } from "mysql2/promise"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type Metadata = Record; - -export type DistanceMetrics = "DOT_PRODUCT" | "EUCLIDEAN_DISTANCE"; - -const OrderingDirective: Record = { - DOT_PRODUCT: "DESC", - EUCLIDEAN_DISTANCE: "", -}; - -export interface ConnectionOptions extends PoolOptions {} - -type ConnectionWithUri = { - connectionOptions?: never; - connectionURI: string; -}; - -type ConnectionWithOptions = { - connectionURI?: never; - connectionOptions: ConnectionOptions; -}; - -type ConnectionConfig = ConnectionWithUri | ConnectionWithOptions; - -export type SingleStoreVectorStoreConfig = ConnectionConfig & { - tableName?: string; - contentColumnName?: string; - vectorColumnName?: string; - metadataColumnName?: string; - distanceMetric?: DistanceMetrics; -}; - -function withConnectAttributes( - config: SingleStoreVectorStoreConfig -): ConnectionOptions { - let newOptions: ConnectionOptions = {}; - if (config.connectionURI) { - newOptions = { - uri: config.connectionURI, - }; - } else if (config.connectionOptions) { - newOptions = { - ...config.connectionOptions, - }; - } - const result: ConnectionOptions = { - ...newOptions, - connectAttributes: { - ...newOptions.connectAttributes, - }, - }; - - if (!result.connectAttributes) { - result.connectAttributes = {}; - } - - result.connectAttributes = { - ...result.connectAttributes, - _connector_name: "langchain js sdk", - _connector_version: "1.0.0", - _driver_name: "Node-MySQL-2", - }; - - return result; -} - -/** - * Class for interacting with SingleStoreDB, a high-performance - * distributed SQL database. It provides vector storage and vector - * functions. - */ -export class SingleStoreVectorStore extends VectorStore { - connectionPool: Pool; - - tableName: string; - - contentColumnName: string; - - vectorColumnName: string; - - metadataColumnName: string; - - distanceMetric: DistanceMetrics; - - _vectorstoreType(): string { - return "singlestore"; - } - - constructor(embeddings: Embeddings, config: SingleStoreVectorStoreConfig) { - super(embeddings, config); - this.connectionPool = createPool(withConnectAttributes(config)); - this.tableName = config.tableName ?? "embeddings"; - this.contentColumnName = config.contentColumnName ?? "content"; - this.vectorColumnName = config.vectorColumnName ?? "vector"; - this.metadataColumnName = config.metadataColumnName ?? "metadata"; - this.distanceMetric = config.distanceMetric ?? "DOT_PRODUCT"; - } - - /** - * Creates a new table in the SingleStoreDB database if it does not - * already exist. - */ - async createTableIfNotExists(): Promise { - await this.connectionPool - .execute(`CREATE TABLE IF NOT EXISTS ${this.tableName} ( - ${this.contentColumnName} TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci, - ${this.vectorColumnName} BLOB, - ${this.metadataColumnName} JSON);`); - } - - /** - * Ends the connection to the SingleStoreDB database. - */ - async end(): Promise { - return this.connectionPool.end(); - } - - /** - * Adds new documents to the SingleStoreDB database. - * @param documents An array of Document objects. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - const vectors = await this.embeddings.embedDocuments(texts); - return this.addVectors(vectors, documents); - } - - /** - * Adds new vectors to the SingleStoreDB database. - * @param vectors An array of vectors. - * @param documents An array of Document objects. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - await this.createTableIfNotExists(); - const { tableName } = this; - - await Promise.all( - vectors.map(async (vector, idx) => { - try { - await this.connectionPool.execute( - format( - `INSERT INTO ${tableName} VALUES (?, JSON_ARRAY_PACK('[?]'), ?);`, - [ - documents[idx].pageContent, - vector, - JSON.stringify(documents[idx].metadata), - ] - ) - ); - } catch (error) { - console.error(`Error adding vector at index ${idx}:`, error); - } - }) - ); - } - - /** - * Performs a similarity search on the vectors stored in the SingleStoreDB - * database. - * @param query An array of numbers representing the query vector. - * @param k The number of nearest neighbors to return. - * @param filter Optional metadata to filter the vectors by. - * @returns Top matching vectors with score - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: Metadata - ): Promise<[Document, number][]> { - // build the where clause from filter - const whereArgs: string[] = []; - const buildWhereClause = (record: Metadata, argList: string[]): string => { - const whereTokens: string[] = []; - for (const key in record) - if (record[key] !== undefined) { - if ( - typeof record[key] === "object" && - record[key] != null && - !Array.isArray(record[key]) - ) { - whereTokens.push( - buildWhereClause(record[key], argList.concat([key])) - ); - } else { - whereTokens.push( - `JSON_EXTRACT_JSON(${this.metadataColumnName}, `.concat( - Array.from({ length: argList.length + 1 }, () => "?").join( - ", " - ), - ") = ?" - ) - ); - whereArgs.push(...argList, key, JSON.stringify(record[key])); - } - } - return whereTokens.join(" AND "); - }; - const whereClause = filter - ? "WHERE ".concat(buildWhereClause(filter, [])) - : ""; - - const [rows]: [ - ( - | RowDataPacket[] - | RowDataPacket[][] - | OkPacket - | OkPacket[] - | ResultSetHeader - ), - FieldPacket[] - ] = await this.connectionPool.query( - format( - `SELECT ${this.contentColumnName}, - ${this.metadataColumnName}, - ${this.distanceMetric}(${ - this.vectorColumnName - }, JSON_ARRAY_PACK('[?]')) as __score FROM ${ - this.tableName - } ${whereClause} - ORDER BY __score ${OrderingDirective[this.distanceMetric]} LIMIT ?;`, - [query, ...whereArgs, k] - ) - ); - const result: [Document, number][] = []; - for (const row of rows as RowDataPacket[]) { - const rowData = row as unknown as Record; - result.push([ - new Document({ - pageContent: rowData[this.contentColumnName] as string, - metadata: rowData[this.metadataColumnName] as Record, - }), - Number(rowData.score), - ]); - } - return result; - } - - /** - * Creates a new instance of the SingleStoreVectorStore class from a list - * of texts. - * @param texts An array of strings. - * @param metadatas An array of metadata objects. - * @param embeddings An Embeddings object. - * @param dbConfig A SingleStoreVectorStoreConfig object. - * @returns A new SingleStoreVectorStore instance - */ - static async fromTexts( - texts: string[], - metadatas: object[], - embeddings: Embeddings, - dbConfig: SingleStoreVectorStoreConfig - ): Promise { - const docs = texts.map((text, idx) => { - const metadata = Array.isArray(metadatas) ? metadatas[idx] : metadatas; - return new Document({ - pageContent: text, - metadata, - }); - }); - return SingleStoreVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new instance of the SingleStoreVectorStore class from a list - * of Document objects. - * @param docs An array of Document objects. - * @param embeddings An Embeddings object. - * @param dbConfig A SingleStoreVectorStoreConfig object. - * @returns A new SingleStoreVectorStore instance - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: SingleStoreVectorStoreConfig - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/singlestore"; diff --git a/langchain/src/vectorstores/supabase.ts b/langchain/src/vectorstores/supabase.ts index 0659af76cf97..103b466c3ea5 100644 --- a/langchain/src/vectorstores/supabase.ts +++ b/langchain/src/vectorstores/supabase.ts @@ -1,310 +1 @@ -import type { SupabaseClient } from "@supabase/supabase-js"; -import type { PostgrestFilterBuilder } from "@supabase/postgrest-js"; -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { maximalMarginalRelevance } from "../util/math.js"; - -/** - * Interface for the parameters required for searching embeddings. - */ -interface SearchEmbeddingsParams { - query_embedding: number[]; - match_count: number; // int - filter?: SupabaseMetadata | SupabaseFilterRPCCall; -} - -// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any -export type SupabaseMetadata = Record; -// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any -export type SupabaseFilter = PostgrestFilterBuilder; -export type SupabaseFilterRPCCall = (rpcCall: SupabaseFilter) => SupabaseFilter; - -/** - * Interface for the response returned when searching embeddings. - */ -interface SearchEmbeddingsResponse { - id: number; - content: string; - metadata: object; - embedding: number[]; - similarity: number; -} - -/** - * Interface for the arguments required to initialize a Supabase library. - */ -export interface SupabaseLibArgs { - client: SupabaseClient; - tableName?: string; - queryName?: string; - filter?: SupabaseMetadata | SupabaseFilterRPCCall; - upsertBatchSize?: number; -} - -/** - * Class for interacting with a Supabase database to store and manage - * vectors. - */ -export class SupabaseVectorStore extends VectorStore { - declare FilterType: SupabaseMetadata | SupabaseFilterRPCCall; - - client: SupabaseClient; - - tableName: string; - - queryName: string; - - filter?: SupabaseMetadata | SupabaseFilterRPCCall; - - upsertBatchSize = 500; - - _vectorstoreType(): string { - return "supabase"; - } - - constructor(embeddings: Embeddings, args: SupabaseLibArgs) { - super(embeddings, args); - - this.client = args.client; - this.tableName = args.tableName || "documents"; - this.queryName = args.queryName || "match_documents"; - this.filter = args.filter; - this.upsertBatchSize = args.upsertBatchSize ?? this.upsertBatchSize; - } - - /** - * Adds documents to the vector store. - * @param documents The documents to add. - * @param options Optional parameters for adding the documents. - * @returns A promise that resolves when the documents have been added. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] | number[] } - ) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Adds vectors to the vector store. - * @param vectors The vectors to add. - * @param documents The documents associated with the vectors. - * @param options Optional parameters for adding the vectors. - * @returns A promise that resolves with the IDs of the added vectors when the vectors have been added. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] | number[] } - ) { - const rows = vectors.map((embedding, idx) => ({ - content: documents[idx].pageContent, - embedding, - metadata: documents[idx].metadata, - })); - - // upsert returns 500/502/504 (yes really any of them) if given too many rows/characters - // ~2000 trips it, but my data is probably smaller than average pageContent and metadata - let returnedIds: string[] = []; - for (let i = 0; i < rows.length; i += this.upsertBatchSize) { - const chunk = rows.slice(i, i + this.upsertBatchSize).map((row, j) => { - if (options?.ids) { - return { id: options.ids[i + j], ...row }; - } - return row; - }); - - const res = await this.client.from(this.tableName).upsert(chunk).select(); - if (res.error) { - throw new Error( - `Error inserting: ${res.error.message} ${res.status} ${res.statusText}` - ); - } - if (res.data) { - returnedIds = returnedIds.concat(res.data.map((row) => row.id)); - } - } - return returnedIds; - } - - /** - * Deletes vectors from the vector store. - * @param params The parameters for deleting vectors. - * @returns A promise that resolves when the vectors have been deleted. - */ - async delete(params: { ids: string[] | number[] }): Promise { - const { ids } = params; - for (const id of ids) { - await this.client.from(this.tableName).delete().eq("id", id); - } - } - - protected async _searchSupabase( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise { - if (filter && this.filter) { - throw new Error("cannot provide both `filter` and `this.filter`"); - } - const _filter = filter ?? this.filter ?? {}; - const matchDocumentsParams: Partial = { - query_embedding: query, - }; - - let filterFunction: SupabaseFilterRPCCall; - - if (typeof _filter === "function") { - filterFunction = (rpcCall) => _filter(rpcCall).limit(k); - } else if (typeof _filter === "object") { - matchDocumentsParams.filter = _filter; - matchDocumentsParams.match_count = k; - filterFunction = (rpcCall) => rpcCall; - } else { - throw new Error("invalid filter type"); - } - - const rpcCall = this.client.rpc(this.queryName, matchDocumentsParams); - - const { data: searches, error } = await filterFunction(rpcCall); - - if (error) { - throw new Error( - `Error searching for documents: ${error.code} ${error.message} ${error.details}` - ); - } - - return searches; - } - - /** - * Performs a similarity search on the vector store. - * @param query The query vector. - * @param k The number of results to return. - * @param filter Optional filter to apply to the search. - * @returns A promise that resolves with the search results when the search is complete. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - const searches = await this._searchSupabase(query, k, filter); - const result: [Document, number][] = searches.map((resp) => [ - new Document({ - metadata: resp.metadata, - pageContent: resp.content, - }), - resp.similarity, - ]); - - return result; - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {SupabaseLibArgs} options.filter - Optional filter to apply to the search. - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const queryEmbedding = await this.embeddings.embedQuery(query); - - const searches = await this._searchSupabase( - queryEmbedding, - options.fetchK ?? 20, - options.filter - ); - - const embeddingList = searches.map((searchResp) => searchResp.embedding); - - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - options.lambda, - options.k - ); - - return mmrIndexes.map( - (idx) => - new Document({ - metadata: searches[idx].metadata, - pageContent: searches[idx].content, - }) - ); - } - - /** - * Creates a new SupabaseVectorStore instance from an array of texts. - * @param texts The texts to create documents from. - * @param metadatas The metadata for the documents. - * @param embeddings The embeddings to use. - * @param dbConfig The configuration for the Supabase database. - * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: SupabaseLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return SupabaseVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Creates a new SupabaseVectorStore instance from an array of documents. - * @param docs The documents to create the instance from. - * @param embeddings The embeddings to use. - * @param dbConfig The configuration for the Supabase database. - * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: SupabaseLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Creates a new SupabaseVectorStore instance from an existing index. - * @param embeddings The embeddings to use. - * @param dbConfig The configuration for the Supabase database. - * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: SupabaseLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - return instance; - } -} +export * from "@langchain/community/vectorstores/supabase"; diff --git a/langchain/src/vectorstores/tests/convex/convex/langchain/db.ts b/langchain/src/vectorstores/tests/convex/convex/langchain/db.ts deleted file mode 100644 index e09d4ecfe02d..000000000000 --- a/langchain/src/vectorstores/tests/convex/convex/langchain/db.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "../../../../../util/convex.js"; diff --git a/langchain/src/vectorstores/tigris.ts b/langchain/src/vectorstores/tigris.ts index d4cfd73828fe..d3702e081d3b 100644 --- a/langchain/src/vectorstores/tigris.ts +++ b/langchain/src/vectorstores/tigris.ts @@ -1,177 +1 @@ -import * as uuid from "uuid"; - -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -/** - * Type definition for the arguments required to initialize a - * TigrisVectorStore instance. - */ -export type TigrisLibArgs = { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - index: any; -}; - -/** - * Class for managing and operating vector search applications with - * Tigris, an open-source Serverless NoSQL Database and Search Platform. - */ -export class TigrisVectorStore extends VectorStore { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - index?: any; - - _vectorstoreType(): string { - return "tigris"; - } - - constructor(embeddings: Embeddings, args: TigrisLibArgs) { - super(embeddings, args); - - this.embeddings = embeddings; - this.index = args.index; - } - - /** - * Method to add an array of documents to the Tigris database. - * @param documents An array of Document instances to be added to the Tigris database. - * @param options Optional parameter that can either be an array of string IDs or an object with a property 'ids' that is an array of string IDs. - * @returns A Promise that resolves when the documents have been added to the Tigris database. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] } | string[] - ): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - await this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method to add vectors to the Tigris database. - * @param vectors An array of vectors to be added to the Tigris database. - * @param documents An array of Document instances corresponding to the vectors. - * @param options Optional parameter that can either be an array of string IDs or an object with a property 'ids' that is an array of string IDs. - * @returns A Promise that resolves when the vectors have been added to the Tigris database. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } | string[] - ) { - if (vectors.length === 0) { - return; - } - - if (vectors.length !== documents.length) { - throw new Error(`Vectors and metadatas must have the same length`); - } - - const ids = Array.isArray(options) ? options : options?.ids; - const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; - await this.index?.addDocumentsWithVectors({ - ids: documentIds, - embeddings: vectors, - documents: documents.map(({ metadata, pageContent }) => ({ - content: pageContent, - metadata, - })), - }); - } - - /** - * Method to perform a similarity search in the Tigris database and return - * the k most similar vectors along with their similarity scores. - * @param query The query vector. - * @param k The number of most similar vectors to return. - * @param filter Optional filter object to apply during the search. - * @returns A Promise that resolves to an array of tuples, each containing a Document and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: object - ) { - const result = await this.index?.similaritySearchVectorWithScore({ - query, - k, - filter, - }); - - if (!result) { - return []; - } - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - return result.map(([document, score]: [any, any]) => [ - new Document({ - pageContent: document.content, - metadata: document.metadata, - }), - score, - ]) as [Document, number][]; - } - - /** - * Static method to create a new instance of TigrisVectorStore from an - * array of texts. - * @param texts An array of texts to be converted into Document instances and added to the Tigris database. - * @param metadatas Either an array of metadata objects or a single metadata object to be associated with the texts. - * @param embeddings An instance of Embeddings to be used for embedding the texts. - * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. - * @returns A Promise that resolves to a new instance of TigrisVectorStore. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: TigrisLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return TigrisVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a new instance of TigrisVectorStore from an - * array of Document instances. - * @param docs An array of Document instances to be added to the Tigris database. - * @param embeddings An instance of Embeddings to be used for embedding the documents. - * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. - * @returns A Promise that resolves to a new instance of TigrisVectorStore. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: TigrisLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create a new instance of TigrisVectorStore from an - * existing index. - * @param embeddings An instance of Embeddings to be used for embedding the documents. - * @param dbConfig An instance of TigrisLibArgs to be used for configuring the Tigris database. - * @returns A Promise that resolves to a new instance of TigrisVectorStore. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: TigrisLibArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - return instance; - } -} +export * from "@langchain/community/vectorstores/tigris"; diff --git a/langchain/src/vectorstores/typeorm.ts b/langchain/src/vectorstores/typeorm.ts index d0130303ed68..234f53c8d663 100644 --- a/langchain/src/vectorstores/typeorm.ts +++ b/langchain/src/vectorstores/typeorm.ts @@ -1,298 +1 @@ -import { Metadata } from "@opensearch-project/opensearch/api/types.js"; -import { DataSource, DataSourceOptions, EntitySchema } from "typeorm"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -/** - * Interface that defines the arguments required to create a - * `TypeORMVectorStore` instance. It includes Postgres connection options, - * table name, filter, and verbosity level. - */ -export interface TypeORMVectorStoreArgs { - postgresConnectionOptions: DataSourceOptions; - tableName?: string; - filter?: Metadata; - verbose?: boolean; -} - -/** - * Class that extends the `Document` base class and adds an `embedding` - * property. It represents a document in the vector store. - */ -export class TypeORMVectorStoreDocument extends Document { - embedding: string; - - id?: string; -} - -const defaultDocumentTableName = "documents"; - -/** - * Class that provides an interface to a Postgres vector database. It - * extends the `VectorStore` base class and implements methods for adding - * documents and vectors, performing similarity searches, and ensuring the - * existence of a table in the database. - */ -export class TypeORMVectorStore extends VectorStore { - declare FilterType: Metadata; - - tableName: string; - - documentEntity: EntitySchema; - - filter?: Metadata; - - appDataSource: DataSource; - - _verbose?: boolean; - - _vectorstoreType(): string { - return "typeorm"; - } - - private constructor(embeddings: Embeddings, fields: TypeORMVectorStoreArgs) { - super(embeddings, fields); - this.tableName = fields.tableName || defaultDocumentTableName; - this.filter = fields.filter; - - const TypeORMDocumentEntity = new EntitySchema({ - name: fields.tableName ?? defaultDocumentTableName, - columns: { - id: { - generated: "uuid", - type: "uuid", - primary: true, - }, - pageContent: { - type: String, - }, - metadata: { - type: "jsonb", - }, - embedding: { - type: String, - }, - }, - }); - const appDataSource = new DataSource({ - entities: [TypeORMDocumentEntity], - ...fields.postgresConnectionOptions, - }); - this.appDataSource = appDataSource; - this.documentEntity = TypeORMDocumentEntity; - - this._verbose = - getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? - fields.verbose ?? - false; - } - - /** - * Static method to create a new `TypeORMVectorStore` instance from a - * `DataSource`. It initializes the `DataSource` if it is not already - * initialized. - * @param embeddings Embeddings instance. - * @param fields `TypeORMVectorStoreArgs` instance. - * @returns A new instance of `TypeORMVectorStore`. - */ - static async fromDataSource( - embeddings: Embeddings, - fields: TypeORMVectorStoreArgs - ): Promise { - const postgresqlVectorStore = new TypeORMVectorStore(embeddings, fields); - - if (!postgresqlVectorStore.appDataSource.isInitialized) { - await postgresqlVectorStore.appDataSource.initialize(); - } - - return postgresqlVectorStore; - } - - /** - * Method to add documents to the vector store. It ensures the existence - * of the table in the database, converts the documents into vectors, and - * adds them to the store. - * @param documents Array of `Document` instances. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - // This will create the table if it does not exist. We can call it every time as it doesn't - // do anything if the table already exists, and it is not expensive in terms of performance - await this.ensureTableInDatabase(); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method to add vectors to the vector store. It converts the vectors into - * rows and inserts them into the database. - * @param vectors Array of vectors. - * @param documents Array of `Document` instances. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - const rows = vectors.map((embedding, idx) => { - const embeddingString = `[${embedding.join(",")}]`; - const documentRow = { - pageContent: documents[idx].pageContent, - embedding: embeddingString, - metadata: documents[idx].metadata, - }; - - return documentRow; - }); - - const documentRepository = this.appDataSource.getRepository( - this.documentEntity - ); - - const chunkSize = 500; - for (let i = 0; i < rows.length; i += chunkSize) { - const chunk = rows.slice(i, i + chunkSize); - - try { - await documentRepository.save(chunk); - } catch (e) { - console.error(e); - throw new Error(`Error inserting: ${chunk[0].pageContent}`); - } - } - } - - /** - * Method to perform a similarity search in the vector store. It returns - * the `k` most similar documents to the query vector, along with their - * similarity scores. - * @param query Query vector. - * @param k Number of most similar documents to return. - * @param filter Optional filter to apply to the search. - * @returns Promise that resolves with an array of tuples, each containing a `TypeORMVectorStoreDocument` and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[TypeORMVectorStoreDocument, number][]> { - const embeddingString = `[${query.join(",")}]`; - const _filter = filter ?? "{}"; - - const queryString = ` - SELECT *, embedding <=> $1 as "_distance" - FROM ${this.tableName} - WHERE metadata @> $2 - ORDER BY "_distance" ASC - LIMIT $3;`; - - const documents = await this.appDataSource.query(queryString, [ - embeddingString, - _filter, - k, - ]); - - const results = [] as [TypeORMVectorStoreDocument, number][]; - for (const doc of documents) { - if (doc._distance != null && doc.pageContent != null) { - const document = new Document(doc) as TypeORMVectorStoreDocument; - document.id = doc.id; - results.push([document, doc._distance]); - } - } - - return results; - } - - /** - * Method to ensure the existence of the table in the database. It creates - * the table if it does not already exist. - * @returns Promise that resolves when the table has been ensured. - */ - async ensureTableInDatabase(): Promise { - await this.appDataSource.query("CREATE EXTENSION IF NOT EXISTS vector;"); - await this.appDataSource.query( - 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp";' - ); - - await this.appDataSource.query(` - CREATE TABLE IF NOT EXISTS ${this.tableName} ( - "id" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, - "pageContent" text, - metadata jsonb, - embedding vector - ); - `); - } - - /** - * Static method to create a new `TypeORMVectorStore` instance from an - * array of texts and their metadata. It converts the texts into - * `Document` instances and adds them to the store. - * @param texts Array of texts. - * @param metadatas Array of metadata objects or a single metadata object. - * @param embeddings Embeddings instance. - * @param dbConfig `TypeORMVectorStoreArgs` instance. - * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig: TypeORMVectorStoreArgs - ): Promise { - const docs = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return TypeORMVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a new `TypeORMVectorStore` instance from an - * array of `Document` instances. It adds the documents to the store. - * @param docs Array of `Document` instances. - * @param embeddings Embeddings instance. - * @param dbConfig `TypeORMVectorStoreArgs` instance. - * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig: TypeORMVectorStoreArgs - ): Promise { - const instance = await TypeORMVectorStore.fromDataSource( - embeddings, - dbConfig - ); - await instance.addDocuments(docs); - - return instance; - } - - /** - * Static method to create a new `TypeORMVectorStore` instance from an - * existing index. - * @param embeddings Embeddings instance. - * @param dbConfig `TypeORMVectorStoreArgs` instance. - * @returns Promise that resolves with a new instance of `TypeORMVectorStore`. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig: TypeORMVectorStoreArgs - ): Promise { - const instance = await TypeORMVectorStore.fromDataSource( - embeddings, - dbConfig - ); - return instance; - } -} +export * from "@langchain/community/vectorstores/typeorm"; diff --git a/langchain/src/vectorstores/typesense.ts b/langchain/src/vectorstores/typesense.ts index e6f608312cc0..bf389cde3fcb 100644 --- a/langchain/src/vectorstores/typesense.ts +++ b/langchain/src/vectorstores/typesense.ts @@ -1,320 +1 @@ -import type { Client } from "typesense"; -import type { MultiSearchRequestSchema } from "typesense/lib/Typesense/MultiSearch.js"; -import type { - SearchResponseHit, - DocumentSchema, -} from "typesense/lib/Typesense/Documents.js"; -import type { Document } from "../document.js"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; - -/** - * Interface for the response hit from a vector search in Typesense. - */ -interface VectorSearchResponseHit - extends SearchResponseHit { - vector_distance?: number; -} - -/** - * Typesense vector store configuration. - */ -export interface TypesenseConfig extends AsyncCallerParams { - /** - * Typesense client. - */ - typesenseClient: Client; - /** - * Typesense schema name in which documents will be stored and searched. - */ - schemaName: string; - /** - * Typesense search parameters. - * @default { q: '*', per_page: 5, query_by: '' } - */ - searchParams?: MultiSearchRequestSchema; - /** - * Column names. - */ - columnNames?: { - /** - * Vector column name. - * @default 'vec' - */ - vector?: string; - /** - * Page content column name. - * @default 'text' - */ - pageContent?: string; - /** - * Metadata column names. - * @default [] - */ - metadataColumnNames?: string[]; - }; - /** - * Replace default import function. - * Default import function will update documents if there is a document with the same id. - * @param data - * @param collectionName - */ - import? = Record>( - data: T[], - collectionName: string - ): Promise; -} - -/** - * Typesense vector store. - */ -export class Typesense extends VectorStore { - declare FilterType: Partial; - - private client: Client; - - private schemaName: string; - - private searchParams: MultiSearchRequestSchema; - - private vectorColumnName: string; - - private pageContentColumnName: string; - - private metadataColumnNames: string[]; - - private caller: AsyncCaller; - - private import: ( - data: Record[], - collectionName: string - ) => Promise; - - _vectorstoreType(): string { - return "typesense"; - } - - constructor(embeddings: Embeddings, config: TypesenseConfig) { - super(embeddings, config); - - // Assign config values to class properties. - this.client = config.typesenseClient; - this.schemaName = config.schemaName; - this.searchParams = config.searchParams || { - q: "*", - per_page: 5, - query_by: "", - }; - this.vectorColumnName = config.columnNames?.vector || "vec"; - this.pageContentColumnName = config.columnNames?.pageContent || "text"; - this.metadataColumnNames = config.columnNames?.metadataColumnNames || []; - - // Assign import function. - this.import = config.import || this.importToTypesense.bind(this); - - this.caller = new AsyncCaller(config); - } - - /** - * Default function to import data to typesense - * @param data - * @param collectionName - */ - private async importToTypesense< - T extends Record = Record - >(data: T[], collectionName: string) { - const chunkSize = 2000; - for (let i = 0; i < data.length; i += chunkSize) { - const chunk = data.slice(i, i + chunkSize); - - await this.caller.call(async () => { - await this.client - .collections(collectionName) - .documents() - .import(chunk, { action: "emplace", dirty_values: "drop" }); - }); - } - } - - /** - * Transform documents to Typesense records. - * @param documents - * @returns Typesense records. - */ - _documentsToTypesenseRecords( - documents: Document[], - vectors: number[][] - ): Record[] { - const metadatas = documents.map((doc) => doc.metadata); - - const typesenseDocuments = documents.map((doc, index) => { - const metadata = metadatas[index]; - const objectWithMetadatas: Record = {}; - - this.metadataColumnNames.forEach((metadataColumnName) => { - objectWithMetadatas[metadataColumnName] = metadata[metadataColumnName]; - }); - - return { - [this.pageContentColumnName]: doc.pageContent, - [this.vectorColumnName]: vectors[index], - ...objectWithMetadatas, - }; - }); - - return typesenseDocuments; - } - - /** - * Transform the Typesense records to documents. - * @param typesenseRecords - * @returns documents - */ - _typesenseRecordsToDocuments( - typesenseRecords: - | { document?: Record; vector_distance: number }[] - | undefined - ): [Document, number][] { - const documents: [Document, number][] = - typesenseRecords?.map((hit) => { - const objectWithMetadatas: Record = {}; - const hitDoc = hit.document || {}; - this.metadataColumnNames.forEach((metadataColumnName) => { - objectWithMetadatas[metadataColumnName] = hitDoc[metadataColumnName]; - }); - - const document: Document = { - pageContent: (hitDoc[this.pageContentColumnName] as string) || "", - metadata: objectWithMetadatas, - }; - return [document, hit.vector_distance]; - }) || []; - - return documents; - } - - /** - * Add documents to the vector store. - * Will be updated if in the metadata there is a document with the same id if is using the default import function. - * Metadata will be added in the columns of the schema based on metadataColumnNames. - * @param documents Documents to add. - */ - async addDocuments(documents: Document[]) { - const typesenseDocuments = this._documentsToTypesenseRecords( - documents, - await this.embeddings.embedDocuments( - documents.map((doc) => doc.pageContent) - ) - ); - await this.import(typesenseDocuments, this.schemaName); - } - - /** - * Adds vectors to the vector store. - * @param vectors Vectors to add. - * @param documents Documents associated with the vectors. - */ - async addVectors(vectors: number[][], documents: Document[]) { - const typesenseDocuments = this._documentsToTypesenseRecords( - documents, - vectors - ); - await this.import(typesenseDocuments, this.schemaName); - } - - /** - * Search for similar documents with their similarity score. - * @param vectorPrompt vector to search for - * @param k amount of results to return - * @returns similar documents with their similarity score - */ - async similaritySearchVectorWithScore( - vectorPrompt: number[], - k?: number, - filter: this["FilterType"] = {} - ) { - const amount = k || this.searchParams.per_page || 5; - const vector_query = `${this.vectorColumnName}:([${vectorPrompt}], k:${amount})`; - const typesenseResponse = await this.client.multiSearch.perform( - { - searches: [ - { - ...this.searchParams, - ...filter, - per_page: amount, - vector_query, - collection: this.schemaName, - }, - ], - }, - {} - ); - const results = typesenseResponse.results[0].hits; - - const hits = results?.map((hit: VectorSearchResponseHit) => ({ - document: hit?.document || {}, - vector_distance: hit?.vector_distance || 2, - })) as - | { document: Record; vector_distance: number }[] - | undefined; - - return this._typesenseRecordsToDocuments(hits); - } - - /** - * Delete documents from the vector store. - * @param documentIds ids of the documents to delete - */ - async deleteDocuments(documentIds: string[]) { - await this.client - .collections(this.schemaName) - .documents() - .delete({ - filter_by: `id:=${documentIds.join(",")}`, - }); - } - - /** - * Create a vector store from documents. - * @param docs documents - * @param embeddings embeddings - * @param config Typesense configuration - * @returns Typesense vector store - * @warning You can omit this method, and only use the constructor and addDocuments. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - config: TypesenseConfig - ): Promise { - const instance = new Typesense(embeddings, config); - await instance.addDocuments(docs); - - return instance; - } - - /** - * Create a vector store from texts. - * @param texts - * @param metadatas - * @param embeddings - * @param config - * @returns Typesense vector store - */ - static async fromTexts( - texts: string[], - metadatas: object[], - embeddings: Embeddings, - config: TypesenseConfig - ) { - const instance = new Typesense(embeddings, config); - const documents: Document[] = texts.map((text, i) => ({ - pageContent: text, - metadata: metadatas[i] || {}, - })); - await instance.addDocuments(documents); - - return instance; - } -} +export * from "@langchain/community/vectorstores/typesense"; diff --git a/langchain/src/vectorstores/usearch.ts b/langchain/src/vectorstores/usearch.ts index 00faf0bfa25b..181f05100ade 100644 --- a/langchain/src/vectorstores/usearch.ts +++ b/langchain/src/vectorstores/usearch.ts @@ -1,223 +1 @@ -import usearch from "usearch"; -import * as uuid from "uuid"; -import { Embeddings } from "../embeddings/base.js"; -import { SaveableVectorStore } from "./base.js"; -import { Document } from "../document.js"; -import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; - -/** - * Interface that defines the arguments that can be passed to the - * `USearch` constructor. It includes optional properties for a - * `docstore`, `index`, and `mapping`. - */ -export interface USearchArgs { - docstore?: SynchronousInMemoryDocstore; - index?: usearch.Index; - mapping?: Record; -} - -/** - * Class that extends `SaveableVectorStore` and provides methods for - * adding documents and vectors to a `usearch` index, performing - * similarity searches, and saving the index. - */ -export class USearch extends SaveableVectorStore { - _index?: usearch.Index; - - _mapping: Record; - - docstore: SynchronousInMemoryDocstore; - - args: USearchArgs; - - _vectorstoreType(): string { - return "usearch"; - } - - constructor(embeddings: Embeddings, args: USearchArgs) { - super(embeddings, args); - this.args = args; - this._index = args.index; - this._mapping = args.mapping ?? {}; - this.embeddings = embeddings; - this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); - } - - /** - * Method that adds documents to the `usearch` index. It generates - * embeddings for the documents and adds them to the index. - * @param documents An array of `Document` instances to be added to the index. - * @returns A promise that resolves with an array of document IDs. - */ - async addDocuments(documents: Document[]) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - public get index(): usearch.Index { - if (!this._index) { - throw new Error( - "Vector store not initialised yet. Try calling `fromTexts` or `fromDocuments` first." - ); - } - return this._index; - } - - private set index(index: usearch.Index) { - this._index = index; - } - - /** - * Method that adds vectors to the `usearch` index. It also updates the - * mapping between vector IDs and document IDs. - * @param vectors An array of vectors to be added to the index. - * @param documents An array of `Document` instances corresponding to the vectors. - * @returns A promise that resolves with an array of document IDs. - */ - async addVectors(vectors: number[][], documents: Document[]) { - if (vectors.length === 0) { - return []; - } - if (vectors.length !== documents.length) { - throw new Error(`Vectors and documents must have the same length`); - } - const dv = vectors[0].length; - if (!this._index) { - this._index = new usearch.Index({ - metric: "l2sq", - connectivity: BigInt(16), - dimensions: BigInt(dv), - }); - } - const d = this.index.dimensions(); - if (BigInt(dv) !== d) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${d})` - ); - } - - const docstoreSize = this.index.size(); - const documentIds = []; - for (let i = 0; i < vectors.length; i += 1) { - const documentId = uuid.v4(); - documentIds.push(documentId); - const id = Number(docstoreSize) + i; - this.index.add(BigInt(id), new Float32Array(vectors[i])); - this._mapping[id] = documentId; - this.docstore.add({ [documentId]: documents[i] }); - } - return documentIds; - } - - /** - * Method that performs a similarity search in the `usearch` index. It - * returns the `k` most similar documents to a given query vector, along - * with their similarity scores. - * @param query The query vector. - * @param k The number of most similar documents to return. - * @returns A promise that resolves with an array of tuples, each containing a `Document` and its similarity score. - */ - async similaritySearchVectorWithScore(query: number[], k: number) { - const d = this.index.dimensions(); - if (BigInt(query.length) !== d) { - throw new Error( - `Query vector must have the same length as the number of dimensions (${d})` - ); - } - if (k > this.index.size()) { - const total = this.index.size(); - console.warn( - `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` - ); - // eslint-disable-next-line no-param-reassign - k = Number(total); - } - const result = this.index.search(new Float32Array(query), BigInt(k)); - - const return_list: [Document, number][] = []; - for (let i = 0; i < result.count; i += 1) { - const uuid = this._mapping[Number(result.keys[i])]; - return_list.push([this.docstore.search(uuid), result.distances[i]]); - } - - return return_list; - } - - /** - * Method that saves the `usearch` index and the document store to disk. - * @param directory The directory where the index and document store should be saved. - * @returns A promise that resolves when the save operation is complete. - */ - async save(directory: string) { - const fs = await import("node:fs/promises"); - const path = await import("node:path"); - await fs.mkdir(directory, { recursive: true }); - await Promise.all([ - this.index.save(path.join(directory, "usearch.index")), - await fs.writeFile( - path.join(directory, "docstore.json"), - JSON.stringify([ - Array.from(this.docstore._docs.entries()), - this._mapping, - ]) - ), - ]); - } - - /** - * Static method that creates a new `USearch` instance from a list of - * texts. It generates embeddings for the texts and adds them to the - * `usearch` index. - * @param texts An array of texts to be added to the index. - * @param metadatas Metadata associated with the texts. - * @param embeddings An instance of `Embeddings` used to generate embeddings for the texts. - * @param dbConfig Optional configuration for the document store. - * @returns A promise that resolves with a new `USearch` instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return this.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method that creates a new `USearch` instance from a list of - * documents. It generates embeddings for the documents and adds them to - * the `usearch` index. - * @param docs An array of `Document` instances to be added to the index. - * @param embeddings An instance of `Embeddings` used to generate embeddings for the documents. - * @param dbConfig Optional configuration for the document store. - * @returns A promise that resolves with a new `USearch` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: { - docstore?: SynchronousInMemoryDocstore; - } - ): Promise { - const args: USearchArgs = { - docstore: dbConfig?.docstore, - }; - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/usearch"; diff --git a/langchain/src/vectorstores/vectara.ts b/langchain/src/vectorstores/vectara.ts index 1a35a0f7c4ba..7b8d7579e011 100644 --- a/langchain/src/vectorstores/vectara.ts +++ b/langchain/src/vectorstores/vectara.ts @@ -1,532 +1 @@ -import * as uuid from "uuid"; - -import { Document } from "../document.js"; -import { Embeddings } from "../embeddings/base.js"; -import { FakeEmbeddings } from "../embeddings/fake.js"; -import { getEnvironmentVariable } from "../util/env.js"; -import { VectorStore } from "./base.js"; - -/** - * Interface for the arguments required to initialize a VectaraStore - * instance. - */ -export interface VectaraLibArgs { - customerId: number; - corpusId: number | number[]; - apiKey: string; - verbose?: boolean; - source?: string; -} - -/** - * Interface for the headers required for Vectara API calls. - */ -interface VectaraCallHeader { - headers: { - "x-api-key": string; - "Content-Type": string; - "customer-id": string; - "X-Source": string; - }; -} - -/** - * Interface for the file objects to be uploaded to Vectara. - */ -export interface VectaraFile { - // The contents of the file to be uploaded. - blob: Blob; - // The name of the file to be uploaded. - fileName: string; -} - -/** - * Interface for the filter options used in Vectara API calls. - */ -export interface VectaraFilter { - // Example of a vectara filter string can be: "doc.rating > 3.0 and part.lang = 'deu'" - // See https://docs.vectara.com/docs/search-apis/sql/filter-overview for more details. - filter?: string; - // Improve retrieval accuracy by adjusting the balance (from 0 to 1), known as lambda, - // between neural search and keyword-based search factors. Values between 0.01 and 0.2 tend to work well. - // see https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching for more details. - lambda?: number; - // The number of sentences before/after the matching segment to add to the context. - contextConfig?: VectaraContextConfig; -} - -/** - * Interface for the context configuration used in Vectara API calls. - */ -export interface VectaraContextConfig { - // The number of sentences before the matching segment to add. Default is 2. - sentencesBefore?: number; - // The number of sentences after the matching segment to add. Default is 2. - sentencesAfter?: number; -} - -/** - * Class for interacting with the Vectara API. Extends the VectorStore - * class. - */ -export class VectaraStore extends VectorStore { - get lc_secrets(): { [key: string]: string } { - return { - apiKey: "VECTARA_API_KEY", - corpusId: "VECTARA_CORPUS_ID", - customerId: "VECTARA_CUSTOMER_ID", - }; - } - - get lc_aliases(): { [key: string]: string } { - return { - apiKey: "vectara_api_key", - corpusId: "vectara_corpus_id", - customerId: "vectara_customer_id", - }; - } - - declare FilterType: VectaraFilter; - - private apiEndpoint = "api.vectara.io"; - - private apiKey: string; - - private corpusId: number[]; - - private customerId: number; - - private verbose: boolean; - - private source: string; - - private vectaraApiTimeoutSeconds = 60; - - _vectorstoreType(): string { - return "vectara"; - } - - constructor(args: VectaraLibArgs) { - // Vectara doesn't need embeddings, but we need to pass something to the parent constructor - // The embeddings are abstracted out from the user in Vectara. - super(new FakeEmbeddings(), args); - - const apiKey = args.apiKey ?? getEnvironmentVariable("VECTARA_API_KEY"); - if (!apiKey) { - throw new Error("Vectara api key is not provided."); - } - this.apiKey = apiKey; - this.source = args.source ?? "langchainjs"; - - const corpusId = - args.corpusId ?? - getEnvironmentVariable("VECTARA_CORPUS_ID") - ?.split(",") - .map((id) => { - const num = Number(id); - if (Number.isNaN(num)) - throw new Error("Vectara corpus id is not a number."); - return num; - }); - if (!corpusId) { - throw new Error("Vectara corpus id is not provided."); - } - - if (typeof corpusId === "number") { - this.corpusId = [corpusId]; - } else { - if (corpusId.length === 0) - throw new Error("Vectara corpus id is not provided."); - this.corpusId = corpusId; - } - - const customerId = - args.customerId ?? getEnvironmentVariable("VECTARA_CUSTOMER_ID"); - if (!customerId) { - throw new Error("Vectara customer id is not provided."); - } - this.customerId = customerId; - - this.verbose = args.verbose ?? false; - } - - /** - * Returns a header for Vectara API calls. - * @returns A Promise that resolves to a VectaraCallHeader object. - */ - async getJsonHeader(): Promise { - return { - headers: { - "x-api-key": this.apiKey, - "Content-Type": "application/json", - "customer-id": this.customerId.toString(), - "X-Source": this.source, - }, - }; - } - - /** - * Throws an error, as this method is not implemented. Use addDocuments - * instead. - * @param _vectors Not used. - * @param _documents Not used. - * @returns Does not return a value. - */ - async addVectors( - _vectors: number[][], - _documents: Document[] - ): Promise { - throw new Error( - "Method not implemented. Please call addDocuments instead." - ); - } - - /** - * Method to delete data from the Vectara corpus. - * @param params an array of document IDs to be deleted - * @returns Promise that resolves when the deletion is complete. - */ - async deleteDocuments(ids: string[]): Promise { - if (ids && ids.length > 0) { - const headers = await this.getJsonHeader(); - for (const id of ids) { - const data = { - customer_id: this.customerId, - corpus_id: this.corpusId[0], - document_id: id, - }; - - try { - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(), - this.vectaraApiTimeoutSeconds * 1000 - ); - const response = await fetch( - `https://${this.apiEndpoint}/v1/delete-doc`, - { - method: "POST", - headers: headers?.headers, - body: JSON.stringify(data), - signal: controller.signal, - } - ); - clearTimeout(timeout); - if (response.status !== 200) { - throw new Error( - `Vectara API returned status code ${response.status} when deleting document ${id}` - ); - } - } catch (e) { - const error = new Error(`Error ${(e as Error).message}`); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).code = 500; - throw error; - } - } - } else { - throw new Error(`no "ids" specified for deletion`); - } - } - - /** - * Adds documents to the Vectara store. - * @param documents An array of Document objects to add to the Vectara store. - * @returns A Promise that resolves to an array of document IDs indexed in Vectara. - */ - async addDocuments(documents: Document[]): Promise { - if (this.corpusId.length > 1) - throw new Error("addDocuments does not support multiple corpus ids"); - - const headers = await this.getJsonHeader(); - const doc_ids: string[] = []; - let countAdded = 0; - for (const document of documents) { - const doc_id: string = document.metadata?.document_id ?? uuid.v4(); - const data = { - customer_id: this.customerId, - corpus_id: this.corpusId[0], - document: { - document_id: doc_id, - title: document.metadata?.title ?? "", - metadata_json: JSON.stringify(document.metadata ?? {}), - section: [ - { - text: document.pageContent, - }, - ], - }, - }; - - try { - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(), - this.vectaraApiTimeoutSeconds * 1000 - ); - const response = await fetch(`https://${this.apiEndpoint}/v1/index`, { - method: "POST", - headers: headers?.headers, - body: JSON.stringify(data), - signal: controller.signal, - }); - clearTimeout(timeout); - const result = await response.json(); - if ( - result.status?.code !== "OK" && - result.status?.code !== "ALREADY_EXISTS" - ) { - const error = new Error( - `Vectara API returned status code ${ - result.status?.code - }: ${JSON.stringify(result.message)}` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).code = 500; - throw error; - } else { - countAdded += 1; - doc_ids.push(doc_id); - } - } catch (e) { - const error = new Error( - `Error ${(e as Error).message} while adding document` - ); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (error as any).code = 500; - throw error; - } - } - if (this.verbose) { - console.log(`Added ${countAdded} documents to Vectara`); - } - - return doc_ids; - } - - /** - * Vectara provides a way to add documents directly via their API. This API handles - * pre-processing and chunking internally in an optimal manner. This method is a wrapper - * to utilize that API within LangChain. - * - * @param files An array of VectaraFile objects representing the files and their respective file names to be uploaded to Vectara. - * @param metadata Optional. An array of metadata objects corresponding to each file in the `filePaths` array. - * @returns A Promise that resolves to the number of successfully uploaded files. - */ - async addFiles( - files: VectaraFile[], - metadatas: Record | undefined = undefined - ) { - if (this.corpusId.length > 1) - throw new Error("addFiles does not support multiple corpus ids"); - - const doc_ids: string[] = []; - - for (const [index, file] of files.entries()) { - const md = metadatas ? metadatas[index] : {}; - - const data = new FormData(); - data.append("file", file.blob, file.fileName); - data.append("doc-metadata", JSON.stringify(md)); - - const response = await fetch( - `https://api.vectara.io/v1/upload?c=${this.customerId}&o=${this.corpusId[0]}&d=true`, - { - method: "POST", - headers: { - "x-api-key": this.apiKey, - "X-Source": this.source, - }, - body: data, - } - ); - - const { status } = response; - if (status === 409) { - throw new Error(`File at index ${index} already exists in Vectara`); - } else if (status !== 200) { - throw new Error(`Vectara API returned status code ${status}`); - } else { - const result = await response.json(); - const doc_id = result.document.documentId; - doc_ids.push(doc_id); - } - } - - if (this.verbose) { - console.log(`Uploaded ${files.length} files to Vectara`); - } - - return doc_ids; - } - - /** - * Performs a similarity search and returns documents along with their - * scores. - * @param query The query string for the similarity search. - * @param k Optional. The number of results to return. Default is 10. - * @param filter Optional. A VectaraFilter object to refine the search results. - * @returns A Promise that resolves to an array of tuples, each containing a Document and its score. - */ - async similaritySearchWithScore( - query: string, - k = 10, - filter: VectaraFilter | undefined = undefined - ): Promise<[Document, number][]> { - const headers = await this.getJsonHeader(); - - const corpusKeys = this.corpusId.map((corpusId) => ({ - customerId: this.customerId, - corpusId, - metadataFilter: filter?.filter ?? "", - lexicalInterpolationConfig: { lambda: filter?.lambda ?? 0.025 }, - })); - - const data = { - query: [ - { - query, - numResults: k, - contextConfig: { - sentencesAfter: filter?.contextConfig?.sentencesAfter ?? 2, - sentencesBefore: filter?.contextConfig?.sentencesBefore ?? 2, - }, - corpusKey: corpusKeys, - }, - ], - }; - - const controller = new AbortController(); - const timeout = setTimeout( - () => controller.abort(), - this.vectaraApiTimeoutSeconds * 1000 - ); - const response = await fetch(`https://${this.apiEndpoint}/v1/query`, { - method: "POST", - headers: headers?.headers, - body: JSON.stringify(data), - signal: controller.signal, - }); - clearTimeout(timeout); - if (response.status !== 200) { - throw new Error(`Vectara API returned status code ${response.status}`); - } - - const result = await response.json(); - const responses = result.responseSet[0].response; - const documents = result.responseSet[0].document; - - for (let i = 0; i < responses.length; i += 1) { - const responseMetadata = responses[i].metadata; - const documentMetadata = documents[responses[i].documentIndex].metadata; - const combinedMetadata: Record = {}; - - responseMetadata.forEach((item: { name: string; value: unknown }) => { - combinedMetadata[item.name] = item.value; - }); - - documentMetadata.forEach((item: { name: string; value: unknown }) => { - combinedMetadata[item.name] = item.value; - }); - - responses[i].metadata = combinedMetadata; - } - - const documentsAndScores = responses.map( - (response: { - text: string; - metadata: Record; - score: number; - }) => [ - new Document({ - pageContent: response.text, - metadata: response.metadata, - }), - response.score, - ] - ); - return documentsAndScores; - } - - /** - * Performs a similarity search and returns documents. - * @param query The query string for the similarity search. - * @param k Optional. The number of results to return. Default is 10. - * @param filter Optional. A VectaraFilter object to refine the search results. - * @returns A Promise that resolves to an array of Document objects. - */ - async similaritySearch( - query: string, - k = 10, - filter: VectaraFilter | undefined = undefined - ): Promise { - const resultWithScore = await this.similaritySearchWithScore( - query, - k, - filter - ); - return resultWithScore.map((result) => result[0]); - } - - /** - * Throws an error, as this method is not implemented. Use - * similaritySearch or similaritySearchWithScore instead. - * @param _query Not used. - * @param _k Not used. - * @param _filter Not used. - * @returns Does not return a value. - */ - async similaritySearchVectorWithScore( - _query: number[], - _k: number, - _filter?: VectaraFilter | undefined - ): Promise<[Document, number][]> { - throw new Error( - "Method not implemented. Please call similaritySearch or similaritySearchWithScore instead." - ); - } - - /** - * Creates a VectaraStore instance from texts. - * @param texts An array of text strings. - * @param metadatas Metadata for the texts. Can be a single object or an array of objects. - * @param _embeddings Not used. - * @param args A VectaraLibArgs object for initializing the VectaraStore instance. - * @returns A Promise that resolves to a VectaraStore instance. - */ - static fromTexts( - texts: string[], - metadatas: object | object[], - _embeddings: Embeddings, - args: VectaraLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return VectaraStore.fromDocuments(docs, new FakeEmbeddings(), args); - } - - /** - * Creates a VectaraStore instance from documents. - * @param docs An array of Document objects. - * @param _embeddings Not used. - * @param args A VectaraLibArgs object for initializing the VectaraStore instance. - * @returns A Promise that resolves to a VectaraStore instance. - */ - static async fromDocuments( - docs: Document[], - _embeddings: Embeddings, - args: VectaraLibArgs - ): Promise { - const instance = new this(args); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/vectara"; diff --git a/langchain/src/vectorstores/vercel_postgres.ts b/langchain/src/vectorstores/vercel_postgres.ts index 755d659faa3c..22ec83d4f4ad 100644 --- a/langchain/src/vectorstores/vercel_postgres.ts +++ b/langchain/src/vectorstores/vercel_postgres.ts @@ -1,393 +1 @@ -import { - type VercelPool, - type VercelPoolClient, - type VercelPostgresPoolConfig, - createPool, -} from "@vercel/postgres"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { getEnvironmentVariable } from "../util/env.js"; - -type Metadata = Record>; - -/** - * Interface that defines the arguments required to create a - * `VercelPostgres` instance. It includes Postgres connection options, - * table name, filter, and verbosity level. - */ -export interface VercelPostgresFields { - pool: VercelPool; - client: VercelPoolClient; - tableName?: string; - columns?: { - idColumnName?: string; - vectorColumnName?: string; - contentColumnName?: string; - metadataColumnName?: string; - }; - filter?: Metadata; - verbose?: boolean; -} - -/** - * Class that provides an interface to a Vercel Postgres vector database. It - * extends the `VectorStore` base class and implements methods for adding - * documents and vectors and performing similarity searches. - */ -export class VercelPostgres extends VectorStore { - declare FilterType: Metadata; - - tableName: string; - - idColumnName: string; - - vectorColumnName: string; - - contentColumnName: string; - - metadataColumnName: string; - - filter?: Metadata; - - _verbose?: boolean; - - pool: VercelPool; - - client: VercelPoolClient; - - _vectorstoreType(): string { - return "vercel"; - } - - private constructor(embeddings: Embeddings, config: VercelPostgresFields) { - super(embeddings, config); - this.tableName = config.tableName ?? "langchain_vectors"; - this.filter = config.filter; - - this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding"; - this.contentColumnName = config.columns?.contentColumnName ?? "text"; - this.idColumnName = config.columns?.idColumnName ?? "id"; - this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata"; - - this.pool = config.pool; - this.client = config.client; - - this._verbose = - getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? - !!config.verbose; - } - - /** - * Static method to create a new `VercelPostgres` instance from a - * connection. It creates a table if one does not exist, and calls - * `connect` to return a new instance of `VercelPostgres`. - * - * @param embeddings - Embeddings instance. - * @param fields - `VercelPostgres` configuration options. - * @returns A new instance of `VercelPostgres`. - */ - static async initialize( - embeddings: Embeddings, - config?: Partial & { - postgresConnectionOptions?: VercelPostgresPoolConfig; - } - ): Promise { - // Default maxUses to 1 for edge environments: - // https://github.com/vercel/storage/tree/main/packages/postgres#a-note-on-edge-environments - const pool = - config?.pool ?? - createPool({ maxUses: 1, ...config?.postgresConnectionOptions }); - const client = config?.client ?? (await pool.connect()); - const postgresqlVectorStore = new VercelPostgres(embeddings, { - ...config, - pool, - client, - }); - - await postgresqlVectorStore.ensureTableInDatabase(); - - return postgresqlVectorStore; - } - - /** - * Method to add documents to the vector store. It converts the documents into - * vectors, and adds them to the store. - * - * @param documents - Array of `Document` instances. - * @returns Promise that resolves when the documents have been added. - */ - async addDocuments( - documents: Document[], - options?: { ids?: string[] } - ): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Generates the SQL placeholders for a specific row at the provided index. - * - * @param index - The index of the row for which placeholders need to be generated. - * @returns The SQL placeholders for the row values. - */ - protected generatePlaceholderForRowAt( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - row: (string | Record)[], - index: number - ): string { - const base = index * row.length; - return `(${row.map((_, j) => `$${base + 1 + j}`)})`; - } - - /** - * Constructs the SQL query for inserting rows into the specified table. - * - * @param rows - The rows of data to be inserted, consisting of values and records. - * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. - * @returns The complete SQL INSERT INTO query string. - */ - protected async runInsertQuery( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - rows: (string | Record)[][], - useIdColumn: boolean - ) { - const values = rows.map((row, j) => - this.generatePlaceholderForRowAt(row, j) - ); - const flatValues = rows.flat(); - return this.client.query( - ` - INSERT INTO ${this.tableName} ( - ${useIdColumn ? `${this.idColumnName},` : ""} - ${this.contentColumnName}, - ${this.vectorColumnName}, - ${this.metadataColumnName} - ) VALUES ${values.join(", ")} - ON CONFLICT (${this.idColumnName}) - DO UPDATE - SET - ${this.contentColumnName} = EXCLUDED.${this.contentColumnName}, - ${this.vectorColumnName} = EXCLUDED.${this.vectorColumnName}, - ${this.metadataColumnName} = EXCLUDED.${this.metadataColumnName} - RETURNING ${this.idColumnName}`, - flatValues - ); - } - - /** - * Method to add vectors to the vector store. It converts the vectors into - * rows and inserts them into the database. - * - * @param vectors - Array of vectors. - * @param documents - Array of `Document` instances. - * @returns Promise that resolves when the vectors have been added. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ): Promise { - if (options?.ids !== undefined && options?.ids.length !== vectors.length) { - throw new Error( - `If provided, the length of "ids" must be the same as the number of vectors.` - ); - } - const rows = vectors.map((embedding, idx) => { - const embeddingString = `[${embedding.join(",")}]`; - const row = [ - documents[idx].pageContent, - embeddingString, - documents[idx].metadata, - ]; - if (options?.ids) { - return [options.ids[idx], ...row]; - } - return row; - }); - - const chunkSize = 500; - - const ids = []; - - for (let i = 0; i < rows.length; i += chunkSize) { - const chunk = rows.slice(i, i + chunkSize); - try { - const result = await this.runInsertQuery( - chunk, - options?.ids !== undefined - ); - ids.push(...result.rows.map((row) => row[this.idColumnName])); - } catch (e) { - console.error(e); - throw new Error(`Error inserting: ${(e as Error).message}`); - } - } - return ids; - } - - /** - * Method to perform a similarity search in the vector store. It returns - * the `k` most similar documents to the query vector, along with their - * similarity scores. - * - * @param query - Query vector. - * @param k - Number of most similar documents to return. - * @param filter - Optional filter to apply to the search. - * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - const embeddingString = `[${query.join(",")}]`; - const _filter: this["FilterType"] = filter ?? {}; - const whereClauses = []; - const values = [embeddingString, k]; - let paramCount = values.length; - - for (const [key, value] of Object.entries(_filter)) { - if (typeof value === "object" && value !== null) { - const currentParamCount = paramCount; - const placeholders = value.in - .map((_, index) => `$${currentParamCount + index + 1}`) - .join(","); - whereClauses.push( - `${this.metadataColumnName}->>'${key}' IN (${placeholders})` - ); - values.push(...value.in); - paramCount += value.in.length; - } else { - paramCount += 1; - whereClauses.push( - `${this.metadataColumnName}->>'${key}' = $${paramCount}` - ); - values.push(value); - } - } - - const whereClause = whereClauses.length - ? `WHERE ${whereClauses.join(" AND ")}` - : ""; - - const queryString = ` - SELECT *, ${this.vectorColumnName} <=> $1 as "_distance" - FROM ${this.tableName} - ${whereClause} - ORDER BY "_distance" ASC - LIMIT $2;`; - - const documents = (await this.client.query(queryString, values)).rows; - const results = [] as [Document, number][]; - for (const doc of documents) { - if (doc._distance != null && doc[this.contentColumnName] != null) { - const document = new Document({ - pageContent: doc[this.contentColumnName], - metadata: doc[this.metadataColumnName], - }); - results.push([document, doc._distance]); - } - } - return results; - } - - async delete(params: { ids?: string[]; deleteAll?: boolean }): Promise { - if (params.ids !== undefined) { - await this.client.query( - `DELETE FROM ${this.tableName} WHERE ${ - this.idColumnName - } IN (${params.ids.map((_, idx) => `$${idx + 1}`)})`, - params.ids - ); - } else if (params.deleteAll) { - await this.client.query(`TRUNCATE TABLE ${this.tableName}`); - } - } - - /** - * Method to ensure the existence of the table in the database. It creates - * the table if it does not already exist. - * - * @returns Promise that resolves when the table has been ensured. - */ - async ensureTableInDatabase(): Promise { - await this.client.query(`CREATE EXTENSION IF NOT EXISTS vector;`); - await this.client.query(`CREATE EXTENSION IF NOT EXISTS "uuid-ossp";`); - await this.client.query(`CREATE TABLE IF NOT EXISTS "${this.tableName}" ( - "${this.idColumnName}" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, - "${this.contentColumnName}" text, - "${this.metadataColumnName}" jsonb, - "${this.vectorColumnName}" vector - );`); - } - - /** - * Static method to create a new `VercelPostgres` instance from an - * array of texts and their metadata. It converts the texts into - * `Document` instances and adds them to the store. - * - * @param texts - Array of texts. - * @param metadatas - Array of metadata objects or a single metadata object. - * @param embeddings - Embeddings instance. - * @param fields - `VercelPostgres` configuration options. - * @returns Promise that resolves with a new instance of `VercelPostgres`. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: Partial & { - postgresConnectionOptions?: VercelPostgresPoolConfig; - } - ): Promise { - const docs = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - - return this.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a new `VercelPostgres` instance from an - * array of `Document` instances. It adds the documents to the store. - * - * @param docs - Array of `Document` instances. - * @param embeddings - Embeddings instance. - * @param fields - `VercelPostgres` configuration options. - * @returns Promise that resolves with a new instance of `VercelPostgres`. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: Partial & { - postgresConnectionOptions?: VercelPostgresPoolConfig; - } - ): Promise { - const instance = await this.initialize(embeddings, dbConfig); - await instance.addDocuments(docs); - - return instance; - } - - /** - * Closes all the clients in the pool and terminates the pool. - * - * @returns Promise that resolves when all clients are closed and the pool is terminated. - */ - async end(): Promise { - await this.client?.release(); - return this.pool.end(); - } -} +export * from "@langchain/community/vectorstores/vercel_postgres"; diff --git a/langchain/src/vectorstores/voy.ts b/langchain/src/vectorstores/voy.ts index c968b3dfb71f..09428f9e9734 100644 --- a/langchain/src/vectorstores/voy.ts +++ b/langchain/src/vectorstores/voy.ts @@ -1,191 +1 @@ -import type { Voy as VoyOriginClient, SearchResult } from "voy-search"; -import { Embeddings } from "../embeddings/base.js"; -import { VectorStore } from "./base.js"; -import { Document } from "../document.js"; - -export type VoyClient = Omit< - VoyOriginClient, - "remove" | "size" | "serialize" | "free" ->; - -/** - * Internal interface for storing documents mappings. - */ -interface InternalDoc { - embeddings: number[]; - document: Document; -} - -/** - * Class that extends `VectorStore`. It allows to perform similarity search using - * Voi similarity search engine. The class requires passing Voy Client as an input parameter. - */ -export class VoyVectorStore extends VectorStore { - client: VoyClient; - - numDimensions: number | null = null; - - docstore: InternalDoc[] = []; - - _vectorstoreType(): string { - return "voi"; - } - - constructor(client: VoyClient, embeddings: Embeddings) { - super(embeddings, {}); - this.client = client; - this.embeddings = embeddings; - } - - /** - * Adds documents to the Voy database. The documents are embedded using embeddings provided while instantiating the class. - * @param documents An array of `Document` instances associated with the vectors. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - if (documents.length === 0) { - return; - } - - const firstVector = ( - await this.embeddings.embedDocuments(texts.slice(0, 1)) - )[0]; - if (this.numDimensions === null) { - this.numDimensions = firstVector.length; - } else if (this.numDimensions !== firstVector.length) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${this.numDimensions})` - ); - } - const restResults = await this.embeddings.embedDocuments(texts.slice(1)); - await this.addVectors([firstVector, ...restResults], documents); - } - - /** - * Adds vectors to the Voy database. The vectors are associated with - * the provided documents. - * @param vectors An array of vectors to be added to the database. - * @param documents An array of `Document` instances associated with the vectors. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - if (vectors.length === 0) { - return; - } - if (this.numDimensions === null) { - this.numDimensions = vectors[0].length; - } - - if (vectors.length !== documents.length) { - throw new Error(`Vectors and metadata must have the same length`); - } - if (!vectors.every((v) => v.length === this.numDimensions)) { - throw new Error( - `Vectors must have the same length as the number of dimensions (${this.numDimensions})` - ); - } - - vectors.forEach((item, idx) => { - const doc = documents[idx]; - this.docstore.push({ embeddings: item, document: doc }); - }); - const embeddings = this.docstore.map((item, idx) => ({ - id: String(idx), - embeddings: item.embeddings, - title: "", - url: "", - })); - this.client.index({ embeddings }); - } - - /** - * Searches for vectors in the Voy database that are similar to the - * provided query vector. - * @param query The query vector. - * @param k The number of similar vectors to return. - * @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score. - */ - async similaritySearchVectorWithScore(query: number[], k: number) { - if (this.numDimensions === null) { - throw new Error("There aren't any elements in the index yet."); - } - if (query.length !== this.numDimensions) { - throw new Error( - `Query vector must have the same length as the number of dimensions (${this.numDimensions})` - ); - } - const itemsToQuery = Math.min(this.docstore.length, k); - if (itemsToQuery > this.docstore.length) { - console.warn( - `k (${k}) is greater than the number of elements in the index (${this.docstore.length}), setting k to ${itemsToQuery}` - ); - } - const results: SearchResult = this.client.search( - new Float32Array(query), - itemsToQuery - ); - return results.neighbors.map( - ({ id }, idx) => - [this.docstore[parseInt(id, 10)].document, idx] as [Document, number] - ); - } - - /** - * Method to delete data from the Voy index. It can delete data based - * on specific IDs or a filter. - * @param params Object that includes either an array of IDs or a filter for the data to be deleted. - * @returns Promise that resolves when the deletion is complete. - */ - async delete(params: { deleteAll?: boolean }): Promise { - if (params.deleteAll === true) { - await this.client.clear(); - } else { - throw new Error(`You must provide a "deleteAll" parameter.`); - } - } - - /** - * Creates a new `VoyVectorStore` instance from an array of text strings. The text - * strings are converted to `Document` instances and added to the Voy - * database. - * @param texts An array of text strings. - * @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array. - * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. - * @param client An instance of Voy client to use in the underlying operations. - * @returns A promise that resolves with a new `VoyVectorStore` instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - client: VoyClient - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return VoyVectorStore.fromDocuments(docs, embeddings, client); - } - - /** - * Creates a new `VoyVectorStore` instance from an array of `Document` instances. - * The documents are added to the Voy database. - * @param docs An array of `Document` instances. - * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. - * @param client An instance of Voy client to use in the underlying operations. - * @returns A promise that resolves with a new `VoyVectorStore` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - client: VoyClient - ): Promise { - const instance = new VoyVectorStore(client, embeddings); - await instance.addDocuments(docs); - return instance; - } -} +export * from "@langchain/community/vectorstores/voy"; diff --git a/langchain/src/vectorstores/weaviate.ts b/langchain/src/vectorstores/weaviate.ts index 1bd1fba5a77a..e7a76b406db7 100644 --- a/langchain/src/vectorstores/weaviate.ts +++ b/langchain/src/vectorstores/weaviate.ts @@ -1,432 +1 @@ -import * as uuid from "uuid"; -import type { - WeaviateClient, - WeaviateObject, - WhereFilter, -} from "weaviate-ts-client"; -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { maximalMarginalRelevance } from "../util/math.js"; - -// Note this function is not generic, it is designed specifically for Weaviate -// https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction -export const flattenObjectForWeaviate = ( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - obj: Record -) => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const flattenedObject: Record = {}; - - for (const key in obj) { - if (!Object.hasOwn(obj, key)) { - continue; - } - const value = obj[key]; - if (typeof obj[key] === "object" && !Array.isArray(value)) { - const recursiveResult = flattenObjectForWeaviate(value); - - for (const deepKey in recursiveResult) { - if (Object.hasOwn(obj, key)) { - flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey]; - } - } - } else if (Array.isArray(value)) { - if ( - value.length > 0 && - typeof value[0] !== "object" && - // eslint-disable-next-line @typescript-eslint/no-explicit-any - value.every((el: any) => typeof el === typeof value[0]) - ) { - // Weaviate only supports arrays of primitive types, - // where all elements are of the same type - flattenedObject[key] = value; - } - } else { - flattenedObject[key] = value; - } - } - - return flattenedObject; -}; - -/** - * Interface that defines the arguments required to create a new instance - * of the `WeaviateStore` class. It includes the Weaviate client, the name - * of the class in Weaviate, and optional keys for text and metadata. - */ -export interface WeaviateLibArgs { - client: WeaviateClient; - /** - * The name of the class in Weaviate. Must start with a capital letter. - */ - indexName: string; - textKey?: string; - metadataKeys?: string[]; - tenant?: string; -} - -interface ResultRow { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - [key: string]: any; -} - -/** - * Interface that defines a filter for querying data from Weaviate. It - * includes a distance and a `WhereFilter`. - */ -export interface WeaviateFilter { - distance?: number; - where: WhereFilter; -} - -/** - * Class that extends the `VectorStore` base class. It provides methods to - * interact with a Weaviate index, including adding vectors and documents, - * deleting data, and performing similarity searches. - */ -export class WeaviateStore extends VectorStore { - declare FilterType: WeaviateFilter; - - private client: WeaviateClient; - - private indexName: string; - - private textKey: string; - - private queryAttrs: string[]; - - private tenant?: string; - - _vectorstoreType(): string { - return "weaviate"; - } - - constructor(public embeddings: Embeddings, args: WeaviateLibArgs) { - super(embeddings, args); - - this.client = args.client; - this.indexName = args.indexName; - this.textKey = args.textKey || "text"; - this.queryAttrs = [this.textKey]; - this.tenant = args.tenant; - - if (args.metadataKeys) { - this.queryAttrs = [ - ...new Set([ - ...this.queryAttrs, - ...args.metadataKeys.filter((k) => { - // https://spec.graphql.org/June2018/#sec-Names - // queryAttrs need to be valid GraphQL Names - const keyIsValid = /^[_A-Za-z][_0-9A-Za-z]*$/.test(k); - if (!keyIsValid) { - console.warn( - `Skipping metadata key ${k} as it is not a valid GraphQL Name` - ); - } - return keyIsValid; - }), - ]), - ]; - } - } - - /** - * Method to add vectors and corresponding documents to the Weaviate - * index. - * @param vectors Array of vectors to be added. - * @param documents Array of documents corresponding to the vectors. - * @param options Optional parameter that can include specific IDs for the documents. - * @returns An array of document IDs. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ) { - const documentIds = options?.ids ?? documents.map((_) => uuid.v4()); - const batch: WeaviateObject[] = documents.map((document, index) => { - if (Object.hasOwn(document.metadata, "id")) - throw new Error( - "Document inserted to Weaviate vectorstore should not have `id` in their metadata." - ); - - const flattenedMetadata = flattenObjectForWeaviate(document.metadata); - return { - ...(this.tenant ? { tenant: this.tenant } : {}), - class: this.indexName, - id: documentIds[index], - vector: vectors[index], - properties: { - [this.textKey]: document.pageContent, - ...flattenedMetadata, - }, - }; - }); - - try { - const responses = await this.client.batch - .objectsBatcher() - .withObjects(...batch) - .do(); - // if storing vectors fails, we need to know why - const errorMessages: string[] = []; - responses.forEach((response) => { - if (response?.result?.errors?.error) { - errorMessages.push( - ...response.result.errors.error.map( - (err) => - err.message ?? - "!! Unfortunately no error message was presented in the API response !!" - ) - ); - } - }); - if (errorMessages.length > 0) { - throw new Error(errorMessages.join("\n")); - } - } catch (e) { - throw Error(`Error adding vectors: ${e}`); - } - return documentIds; - } - - /** - * Method to add documents to the Weaviate index. It first generates - * vectors for the documents using the embeddings, then adds the vectors - * and documents to the index. - * @param documents Array of documents to be added. - * @param options Optional parameter that can include specific IDs for the documents. - * @returns An array of document IDs. - */ - async addDocuments(documents: Document[], options?: { ids?: string[] }) { - return this.addVectors( - await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), - documents, - options - ); - } - - /** - * Method to delete data from the Weaviate index. It can delete data based - * on specific IDs or a filter. - * @param params Object that includes either an array of IDs or a filter for the data to be deleted. - * @returns Promise that resolves when the deletion is complete. - */ - async delete(params: { - ids?: string[]; - filter?: WeaviateFilter; - }): Promise { - const { ids, filter } = params; - - if (ids && ids.length > 0) { - for (const id of ids) { - let deleter = this.client.data - .deleter() - .withClassName(this.indexName) - .withId(id); - - if (this.tenant) { - deleter = deleter.withTenant(this.tenant); - } - - await deleter.do(); - } - } else if (filter) { - let batchDeleter = this.client.batch - .objectsBatchDeleter() - .withClassName(this.indexName) - .withWhere(filter.where); - - if (this.tenant) { - batchDeleter = batchDeleter.withTenant(this.tenant); - } - - await batchDeleter.do(); - } else { - throw new Error( - `This method requires either "ids" or "filter" to be set in the input object` - ); - } - } - - /** - * Method to perform a similarity search on the stored vectors in the - * Weaviate index. It returns the top k most similar documents and their - * similarity scores. - * @param query The query vector. - * @param k The number of most similar documents to return. - * @param filter Optional filter to apply to the search. - * @returns An array of tuples, where each tuple contains a document and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: WeaviateFilter - ): Promise<[Document, number][]> { - const resultsWithEmbedding = - await this.similaritySearchVectorWithScoreAndEmbedding(query, k, filter); - return resultsWithEmbedding.map(([document, score, _embedding]) => [ - document, - score, - ]); - } - - /** - * Method to perform a similarity search on the stored vectors in the - * Weaviate index. It returns the top k most similar documents, their - * similarity scores and embedding vectors. - * @param query The query vector. - * @param k The number of most similar documents to return. - * @param filter Optional filter to apply to the search. - * @returns An array of tuples, where each tuple contains a document, its similarity score and its embedding vector. - */ - async similaritySearchVectorWithScoreAndEmbedding( - query: number[], - k: number, - filter?: WeaviateFilter - ): Promise<[Document, number, number[]][]> { - try { - let builder = this.client.graphql - .get() - .withClassName(this.indexName) - .withFields( - `${this.queryAttrs.join(" ")} _additional { distance vector }` - ) - .withNearVector({ - vector: query, - distance: filter?.distance, - }) - .withLimit(k); - - if (this.tenant) { - builder = builder.withTenant(this.tenant); - } - - if (filter?.where) { - builder = builder.withWhere(filter.where); - } - - const result = await builder.do(); - - const documents: [Document, number, number[]][] = []; - for (const data of result.data.Get[this.indexName]) { - const { [this.textKey]: text, _additional, ...rest }: ResultRow = data; - - documents.push([ - new Document({ - pageContent: text, - metadata: rest, - }), - _additional.distance, - _additional.vector, - ]); - } - return documents; - } catch (e) { - throw Error(`'Error in similaritySearch' ${e}`); - } - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {this["FilterType"]} options.filter - Optional filter - * @param _callbacks - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - override async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions, - _callbacks?: undefined - ): Promise { - const { k, fetchK = 20, lambda = 0.5, filter } = options; - const queryEmbedding: number[] = await this.embeddings.embedQuery(query); - const allResults: [Document, number, number[]][] = - await this.similaritySearchVectorWithScoreAndEmbedding( - queryEmbedding, - fetchK, - filter - ); - const embeddingList = allResults.map( - ([_doc, _score, embedding]) => embedding - ); - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - lambda, - k - ); - return mmrIndexes - .filter((idx) => idx !== -1) - .map((idx) => allResults[idx][0]); - } - - /** - * Static method to create a new `WeaviateStore` instance from a list of - * texts. It first creates documents from the texts and metadata, then - * adds the documents to the Weaviate index. - * @param texts Array of texts. - * @param metadatas Metadata for the texts. Can be a single object or an array of objects. - * @param embeddings Embeddings to be used for the texts. - * @param args Arguments required to create a new `WeaviateStore` instance. - * @returns A new `WeaviateStore` instance. - */ - static fromTexts( - texts: string[], - metadatas: object | object[], - embeddings: Embeddings, - args: WeaviateLibArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return WeaviateStore.fromDocuments(docs, embeddings, args); - } - - /** - * Static method to create a new `WeaviateStore` instance from a list of - * documents. It adds the documents to the Weaviate index. - * @param docs Array of documents. - * @param embeddings Embeddings to be used for the documents. - * @param args Arguments required to create a new `WeaviateStore` instance. - * @returns A new `WeaviateStore` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - args: WeaviateLibArgs - ): Promise { - const instance = new this(embeddings, args); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create a new `WeaviateStore` instance from an existing - * Weaviate index. - * @param embeddings Embeddings to be used for the Weaviate index. - * @param args Arguments required to create a new `WeaviateStore` instance. - * @returns A new `WeaviateStore` instance. - */ - static async fromExistingIndex( - embeddings: Embeddings, - args: WeaviateLibArgs - ): Promise { - return new this(embeddings, args); - } -} +export * from "@langchain/community/vectorstores/weaviate"; diff --git a/langchain/src/vectorstores/xata.ts b/langchain/src/vectorstores/xata.ts index ccd6089ea4e9..9ec25bef187b 100644 --- a/langchain/src/vectorstores/xata.ts +++ b/langchain/src/vectorstores/xata.ts @@ -1,149 +1 @@ -import { BaseClient } from "@xata.io/client"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -/** - * Interface for the arguments required to create a XataClient. Includes - * the client instance and the table name. - */ -export interface XataClientArgs { - readonly client: XataClient; - readonly table: string; -} - -/** - * Type for the filter object used in Xata database queries. - */ -type XataFilter = object; - -/** - * Class for interacting with a Xata database as a VectorStore. Provides - * methods to add documents and vectors to the database, delete entries, - * and perform similarity searches. - */ -export class XataVectorSearch< - XataClient extends BaseClient -> extends VectorStore { - declare FilterType: XataFilter; - - private readonly client: XataClient; - - private readonly table: string; - - _vectorstoreType(): string { - return "xata"; - } - - constructor(embeddings: Embeddings, args: XataClientArgs) { - super(embeddings, args); - - this.client = args.client; - this.table = args.table; - } - - /** - * Method to add documents to the Xata database. Maps the page content of - * each document, embeds the documents using the embeddings, and adds the - * vectors to the database. - * @param documents Array of documents to be added. - * @param options Optional object containing an array of ids. - * @returns Promise resolving to an array of ids of the added documents. - */ - async addDocuments(documents: Document[], options?: { ids?: string[] }) { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents, - options - ); - } - - /** - * Method to add vectors to the Xata database. Maps each vector to a row - * with the document's content, embedding, and metadata. Creates or - * replaces these rows in the Xata database. - * @param vectors Array of vectors to be added. - * @param documents Array of documents corresponding to the vectors. - * @param options Optional object containing an array of ids. - * @returns Promise resolving to an array of ids of the added vectors. - */ - async addVectors( - vectors: number[][], - documents: Document[], - options?: { ids?: string[] } - ) { - const rows = vectors - .map((embedding, idx) => ({ - content: documents[idx].pageContent, - embedding, - ...documents[idx].metadata, - })) - .map((row, idx) => { - if (options?.ids) { - return { id: options.ids[idx], ...row }; - } - return row; - }); - - const res = await this.client.db[this.table].createOrReplace(rows); - // Since we have an untyped BaseClient, it doesn't know the - // actual return type of the overload. - const results = res as unknown as { id: string }[]; - const returnedIds = results.map((row) => row.id); - return returnedIds; - } - - /** - * Method to delete entries from the Xata database. Deletes the entries - * with the provided ids. - * @param params Object containing an array of ids of the entries to be deleted. - * @returns Promise resolving to void. - */ - async delete(params: { ids: string[] }): Promise { - const { ids } = params; - await this.client.db[this.table].delete(ids); - } - - /** - * Method to perform a similarity search in the Xata database. Returns the - * k most similar documents along with their scores. - * @param query Query vector for the similarity search. - * @param k Number of most similar documents to return. - * @param filter Optional filter for the search. - * @returns Promise resolving to an array of tuples, each containing a Document and its score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: XataFilter | undefined - ): Promise<[Document, number][]> { - const { records } = await this.client.db[this.table].vectorSearch( - "embedding", - query, - { - size: k, - filter, - } - ); - - return ( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - records?.map((record: any) => [ - new Document({ - pageContent: record.content, - metadata: Object.fromEntries( - Object.entries(record).filter( - ([key]) => - key !== "content" && - key !== "embedding" && - key !== "xata" && - key !== "id" - ) - ), - }), - record.xata.score, - ]) ?? [] - ); - } -} +export * from "@langchain/community/vectorstores/xata"; diff --git a/langchain/src/vectorstores/zep.ts b/langchain/src/vectorstores/zep.ts index 63919fa34970..ea9578092dbe 100644 --- a/langchain/src/vectorstores/zep.ts +++ b/langchain/src/vectorstores/zep.ts @@ -1,424 +1 @@ -import { - DocumentCollection, - IDocument, - NotFoundError, - ZepClient, -} from "@getzep/zep-js"; - -import { MaxMarginalRelevanceSearchOptions, VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; -import { FakeEmbeddings } from "../embeddings/fake.js"; -import { Callbacks } from "../callbacks/index.js"; -import { maximalMarginalRelevance } from "../util/math.js"; - -/** - * Interface for the arguments required to initialize a ZepVectorStore - * instance. - */ -export interface IZepArgs { - collection: DocumentCollection; -} - -/** - * Interface for the configuration options for a ZepVectorStore instance. - */ -export interface IZepConfig { - apiUrl: string; - apiKey?: string; - collectionName: string; - description?: string; - metadata?: Record; - embeddingDimensions?: number; - isAutoEmbedded?: boolean; -} - -/** - * Interface for the parameters required to delete documents from a - * ZepVectorStore instance. - */ -export interface IZepDeleteParams { - uuids: string[]; -} - -/** - * ZepVectorStore is a VectorStore implementation that uses the Zep long-term memory store as a backend. - * - * If the collection does not exist, it will be created automatically. - * - * Requires `zep-js` to be installed: - * ```bash - * npm install @getzep/zep-js - * ``` - * - * @property {ZepClient} client - The ZepClient instance used to interact with Zep's API. - * @property {Promise} initPromise - A promise that resolves when the collection is initialized. - * @property {DocumentCollection} collection - The Zep document collection. - */ -export class ZepVectorStore extends VectorStore { - public client: ZepClient; - - public collection: DocumentCollection; - - private initPromise: Promise; - - private autoEmbed = false; - - constructor(embeddings: Embeddings, args: IZepConfig) { - super(embeddings, args); - - this.embeddings = embeddings; - - // eslint-disable-next-line no-instanceof/no-instanceof - if (this.embeddings instanceof FakeEmbeddings) { - this.autoEmbed = true; - } - - this.initPromise = this.initCollection(args).catch((err) => { - console.error("Error initializing collection:", err); - throw err; - }); - } - - /** - * Initializes the document collection. If the collection does not exist, it creates a new one. - * - * @param {IZepConfig} args - The configuration object for the Zep API. - */ - private async initCollection(args: IZepConfig) { - this.client = await ZepClient.init(args.apiUrl, args.apiKey); - try { - this.collection = await this.client.document.getCollection( - args.collectionName - ); - - // If the Embedding passed in is fake, but the collection is not auto embedded, throw an error - // eslint-disable-next-line no-instanceof/no-instanceof - if (!this.collection.is_auto_embedded && this.autoEmbed) { - throw new Error(`You can't pass in FakeEmbeddings when collection ${args.collectionName} - is not set to auto-embed.`); - } - } catch (err) { - // eslint-disable-next-line no-instanceof/no-instanceof - if (err instanceof Error) { - // eslint-disable-next-line no-instanceof/no-instanceof - if (err instanceof NotFoundError || err.name === "NotFoundError") { - await this.createCollection(args); - } else { - throw err; - } - } - } - } - - /** - * Creates a new document collection. - * - * @param {IZepConfig} args - The configuration object for the Zep API. - */ - private async createCollection(args: IZepConfig) { - if (!args.embeddingDimensions) { - throw new Error(`Collection ${args.collectionName} not found. - You can create a new Collection by providing embeddingDimensions.`); - } - - this.collection = await this.client.document.addCollection({ - name: args.collectionName, - description: args.description, - metadata: args.metadata, - embeddingDimensions: args.embeddingDimensions, - isAutoEmbedded: this.autoEmbed, - }); - - console.info("Created new collection:", args.collectionName); - } - - /** - * Adds vectors and corresponding documents to the collection. - * - * @param {number[][]} vectors - The vectors to add. - * @param {Document[]} documents - The corresponding documents to add. - * @returns {Promise} - A promise that resolves with the UUIDs of the added documents. - */ - async addVectors( - vectors: number[][], - documents: Document[] - ): Promise { - if (!this.autoEmbed && vectors.length === 0) { - throw new Error(`Vectors must be provided if autoEmbed is false`); - } - if (!this.autoEmbed && vectors.length !== documents.length) { - throw new Error(`Vectors and documents must have the same length`); - } - - const docs: Array = []; - for (let i = 0; i < documents.length; i += 1) { - const doc: IDocument = { - content: documents[i].pageContent, - metadata: documents[i].metadata, - embedding: vectors.length > 0 ? vectors[i] : undefined, - }; - docs.push(doc); - } - // Wait for collection to be initialized - await this.initPromise; - return await this.collection.addDocuments(docs); - } - - /** - * Adds documents to the collection. The documents are first embedded into vectors - * using the provided embedding model. - * - * @param {Document[]} documents - The documents to add. - * @returns {Promise} - A promise that resolves with the UUIDs of the added documents. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - let vectors: number[][] = []; - if (!this.autoEmbed) { - vectors = await this.embeddings.embedDocuments(texts); - } - return this.addVectors(vectors, documents); - } - - _vectorstoreType(): string { - return "zep"; - } - - /** - * Deletes documents from the collection. - * - * @param {IZepDeleteParams} params - The list of Zep document UUIDs to delete. - * @returns {Promise} - */ - async delete(params: IZepDeleteParams): Promise { - // Wait for collection to be initialized - await this.initPromise; - for (const uuid of params.uuids) { - await this.collection.deleteDocument(uuid); - } - } - - /** - * Performs a similarity search in the collection and returns the results with their scores. - * - * @param {number[]} query - The query vector. - * @param {number} k - The number of results to return. - * @param {Record} filter - The filter to apply to the search. Zep only supports Record as filter. - * @returns {Promise<[Document, number][]>} - A promise that resolves with the search results and their scores. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: Record | undefined - ): Promise<[Document, number][]> { - await this.initPromise; - const results = await this.collection.search( - { - embedding: new Float32Array(query), - metadata: assignMetadata(filter), - }, - k - ); - return zepDocsToDocumentsAndScore(results); - } - - async _similaritySearchWithScore( - query: string, - k: number, - filter?: Record | undefined - ): Promise<[Document, number][]> { - await this.initPromise; - const results = await this.collection.search( - { - text: query, - metadata: assignMetadata(filter), - }, - k - ); - return zepDocsToDocumentsAndScore(results); - } - - async similaritySearchWithScore( - query: string, - k = 4, - filter: Record | undefined = undefined, - _callbacks = undefined // implement passing to embedQuery later - ): Promise<[Document, number][]> { - if (this.autoEmbed) { - return this._similaritySearchWithScore(query, k, filter); - } else { - return this.similaritySearchVectorWithScore( - await this.embeddings.embedQuery(query), - k, - filter - ); - } - } - - /** - * Performs a similarity search on the Zep collection. - * - * @param {string} query - The query string to search for. - * @param {number} [k=4] - The number of results to return. Defaults to 4. - * @param {this["FilterType"] | undefined} [filter=undefined] - An optional set of JSONPath filters to apply to the search. - * @param {Callbacks | undefined} [_callbacks=undefined] - Optional callbacks. Currently not implemented. - * @returns {Promise} - A promise that resolves to an array of Documents that are similar to the query. - * - * @async - */ - async similaritySearch( - query: string, - k = 4, - filter: this["FilterType"] | undefined = undefined, - _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later - ): Promise { - await this.initPromise; - - let results: [Document, number][]; - if (this.autoEmbed) { - const zepResults = await this.collection.search( - { text: query, metadata: assignMetadata(filter) }, - k - ); - results = zepDocsToDocumentsAndScore(zepResults); - } else { - results = await this.similaritySearchVectorWithScore( - await this.embeddings.embedQuery(query), - k, - assignMetadata(filter) - ); - } - - return results.map((result) => result[0]); - } - - /** - * Return documents selected using the maximal marginal relevance. - * Maximal marginal relevance optimizes for similarity to the query AND diversity - * among selected documents. - * - * @param {string} query - Text to look up documents similar to. - * @param options - * @param {number} options.k - Number of documents to return. - * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. - * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, - * where 0 corresponds to maximum diversity and 1 to minimum diversity. - * @param {Record} options.filter - Optional Zep JSONPath query to pre-filter on document metadata field - * - * @returns {Promise} - List of documents selected by maximal marginal relevance. - */ - async maxMarginalRelevanceSearch( - query: string, - options: MaxMarginalRelevanceSearchOptions - ): Promise { - const { k, fetchK = 20, lambda = 0.5, filter } = options; - - let queryEmbedding: number[]; - let zepResults: IDocument[]; - if (!this.autoEmbed) { - queryEmbedding = await this.embeddings.embedQuery(query); - zepResults = await this.collection.search( - { - embedding: new Float32Array(queryEmbedding), - metadata: assignMetadata(filter), - }, - fetchK - ); - } else { - let queryEmbeddingArray: Float32Array; - [zepResults, queryEmbeddingArray] = - await this.collection.searchReturnQueryVector( - { text: query, metadata: assignMetadata(filter) }, - fetchK - ); - queryEmbedding = Array.from(queryEmbeddingArray); - } - - const results = zepDocsToDocumentsAndScore(zepResults); - - const embeddingList = zepResults.map((doc) => - Array.from(doc.embedding ? doc.embedding : []) - ); - - const mmrIndexes = maximalMarginalRelevance( - queryEmbedding, - embeddingList, - lambda, - k - ); - - return mmrIndexes.filter((idx) => idx !== -1).map((idx) => results[idx][0]); - } - - /** - * Creates a new ZepVectorStore instance from an array of texts. Each text is converted into a Document and added to the collection. - * - * @param {string[]} texts - The texts to convert into Documents. - * @param {object[] | object} metadatas - The metadata to associate with each Document. If an array is provided, each element is associated with the corresponding Document. If an object is provided, it is associated with all Documents. - * @param {Embeddings} embeddings - The embeddings to use for vectorizing the texts. - * @param {IZepConfig} zepConfig - The configuration object for the Zep API. - * @returns {Promise} - A promise that resolves with the new ZepVectorStore instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - zepConfig: IZepConfig - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return ZepVectorStore.fromDocuments(docs, embeddings, zepConfig); - } - - /** - * Creates a new ZepVectorStore instance from an array of Documents. Each Document is added to a Zep collection. - * - * @param {Document[]} docs - The Documents to add. - * @param {Embeddings} embeddings - The embeddings to use for vectorizing the Document contents. - * @param {IZepConfig} zepConfig - The configuration object for the Zep API. - * @returns {Promise} - A promise that resolves with the new ZepVectorStore instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - zepConfig: IZepConfig - ): Promise { - const instance = new this(embeddings, zepConfig); - // Wait for collection to be initialized - await instance.initPromise; - await instance.addDocuments(docs); - return instance; - } -} - -function zepDocsToDocumentsAndScore( - results: IDocument[] -): [Document, number][] { - return results.map((d) => [ - new Document({ - pageContent: d.content, - metadata: d.metadata, - }), - d.score ? d.score : 0, - ]); -} - -function assignMetadata( - value: string | Record | object | undefined -): Record | undefined { - if (typeof value === "object" && value !== null) { - return value as Record; - } - if (value !== undefined) { - console.warn("Metadata filters must be an object, Record, or undefined."); - } - return undefined; -} +export * from "@langchain/community/vectorstores/zep"; diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index 7f3a870e6c10..e252ea2ea09f 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -169,6 +169,12 @@ vectorstores/chroma.d.ts vectorstores/clickhouse.cjs vectorstores/clickhouse.js vectorstores/clickhouse.d.ts +vectorstores/closevector/node.cjs +vectorstores/closevector/node.js +vectorstores/closevector/node.d.ts +vectorstores/closevector/web.cjs +vectorstores/closevector/web.js +vectorstores/closevector/web.d.ts vectorstores/cloudflare_vectorize.cjs vectorstores/cloudflare_vectorize.js vectorstores/cloudflare_vectorize.d.ts @@ -178,12 +184,27 @@ vectorstores/convex.d.ts vectorstores/elasticsearch.cjs vectorstores/elasticsearch.js vectorstores/elasticsearch.d.ts +vectorstores/faiss.cjs +vectorstores/faiss.js +vectorstores/faiss.d.ts +vectorstores/googlevertexai.cjs +vectorstores/googlevertexai.js +vectorstores/googlevertexai.d.ts +vectorstores/hnswlib.cjs +vectorstores/hnswlib.js +vectorstores/hnswlib.d.ts vectorstores/lancedb.cjs vectorstores/lancedb.js vectorstores/lancedb.d.ts vectorstores/milvus.cjs vectorstores/milvus.js vectorstores/milvus.d.ts +vectorstores/momento_vector_index.cjs +vectorstores/momento_vector_index.js +vectorstores/momento_vector_index.d.ts +vectorstores/mongodb_atlas.cjs +vectorstores/mongodb_atlas.js +vectorstores/mongodb_atlas.d.ts vectorstores/myscale.cjs vectorstores/myscale.js vectorstores/myscale.d.ts @@ -193,6 +214,12 @@ vectorstores/neo4j_vector.d.ts vectorstores/opensearch.cjs vectorstores/opensearch.js vectorstores/opensearch.d.ts +vectorstores/pgvector.cjs +vectorstores/pgvector.js +vectorstores/pgvector.d.ts +vectorstores/pinecone.cjs +vectorstores/pinecone.js +vectorstores/pinecone.d.ts vectorstores/prisma.cjs vectorstores/prisma.js vectorstores/prisma.d.ts @@ -208,6 +235,9 @@ vectorstores/rockset.d.ts vectorstores/singlestore.cjs vectorstores/singlestore.js vectorstores/singlestore.d.ts +vectorstores/supabase.cjs +vectorstores/supabase.js +vectorstores/supabase.d.ts vectorstores/tigris.cjs vectorstores/tigris.js vectorstores/tigris.d.ts @@ -217,6 +247,9 @@ vectorstores/typeorm.d.ts vectorstores/typesense.cjs vectorstores/typesense.js vectorstores/typesense.d.ts +vectorstores/usearch.cjs +vectorstores/usearch.js +vectorstores/usearch.d.ts vectorstores/vectara.cjs vectorstores/vectara.js vectorstores/vectara.d.ts @@ -226,9 +259,15 @@ vectorstores/vercel_postgres.d.ts vectorstores/voy.cjs vectorstores/voy.js vectorstores/voy.d.ts +vectorstores/weaviate.cjs +vectorstores/weaviate.js +vectorstores/weaviate.d.ts vectorstores/xata.cjs vectorstores/xata.js vectorstores/xata.d.ts +vectorstores/zep.cjs +vectorstores/zep.js +vectorstores/zep.d.ts chat_models/baiduwenxin.cjs chat_models/baiduwenxin.js chat_models/baiduwenxin.d.ts @@ -331,6 +370,12 @@ storage/upstash_redis.d.ts storage/vercel_kv.cjs storage/vercel_kv.js storage/vercel_kv.d.ts +stores/doc/base.cjs +stores/doc/base.js +stores/doc/base.d.ts +stores/doc/in_memory.cjs +stores/doc/in_memory.js +stores/doc/in_memory.d.ts stores/message/cassandra.cjs stores/message/cassandra.js stores/message/cassandra.d.ts diff --git a/libs/langchain-community/cache/cloudflare_kv.cjs b/libs/langchain-community/cache/cloudflare_kv.cjs deleted file mode 100644 index b2c7c86e3052..000000000000 --- a/libs/langchain-community/cache/cloudflare_kv.cjs +++ /dev/null @@ -1 +0,0 @@ -module.exports = require('../dist/cache/cloudflare_kv.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/cloudflare_kv.d.ts b/libs/langchain-community/cache/cloudflare_kv.d.ts deleted file mode 100644 index 694d0a8efdb2..000000000000 --- a/libs/langchain-community/cache/cloudflare_kv.d.ts +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/cloudflare_kv.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/cloudflare_kv.js b/libs/langchain-community/cache/cloudflare_kv.js deleted file mode 100644 index 694d0a8efdb2..000000000000 --- a/libs/langchain-community/cache/cloudflare_kv.js +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/cloudflare_kv.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.cjs b/libs/langchain-community/cache/momento.cjs deleted file mode 100644 index 8720815d3117..000000000000 --- a/libs/langchain-community/cache/momento.cjs +++ /dev/null @@ -1 +0,0 @@ -module.exports = require('../dist/cache/momento.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.d.ts b/libs/langchain-community/cache/momento.d.ts deleted file mode 100644 index 4dc60e32d9e8..000000000000 --- a/libs/langchain-community/cache/momento.d.ts +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/momento.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/momento.js b/libs/langchain-community/cache/momento.js deleted file mode 100644 index 4dc60e32d9e8..000000000000 --- a/libs/langchain-community/cache/momento.js +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/momento.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.cjs b/libs/langchain-community/cache/upstash_redis.cjs deleted file mode 100644 index ac5c1eb58ab0..000000000000 --- a/libs/langchain-community/cache/upstash_redis.cjs +++ /dev/null @@ -1 +0,0 @@ -module.exports = require('../dist/cache/upstash_redis.cjs'); \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.d.ts b/libs/langchain-community/cache/upstash_redis.d.ts deleted file mode 100644 index 7bc892103ff9..000000000000 --- a/libs/langchain-community/cache/upstash_redis.d.ts +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/upstash_redis.js' \ No newline at end of file diff --git a/libs/langchain-community/cache/upstash_redis.js b/libs/langchain-community/cache/upstash_redis.js deleted file mode 100644 index 7bc892103ff9..000000000000 --- a/libs/langchain-community/cache/upstash_redis.js +++ /dev/null @@ -1 +0,0 @@ -export * from '../dist/cache/upstash_redis.js' \ No newline at end of file diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 7b9556373417..cb63dc732e11 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -33,8 +33,9 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/core": "~0.0.9", + "@langchain/core": "~0.0.11-rc.1", "@langchain/openai": "~0.0.1", + "flat": "^5.0.2", "langsmith": "~0.0.48", "uuid": "^9.0.0", "zod": "^3.22.3" @@ -45,17 +46,14 @@ "@aws-sdk/client-dynamodb": "^3.310.0", "@aws-sdk/client-kendra": "^3.352.0", "@aws-sdk/client-lambda": "^3.310.0", - "@aws-sdk/client-s3": "^3.310.0", "@aws-sdk/client-sagemaker-runtime": "^3.414.0", "@aws-sdk/client-sfn": "^3.362.0", "@aws-sdk/credential-provider-node": "^3.388.0", "@aws-sdk/types": "^3.357.0", - "@azure/storage-blob": "^12.15.0", "@clickhouse/client": "^0.2.5", "@cloudflare/ai": "^1.0.12", "@cloudflare/workers-types": "^4.20230922.0", "@elastic/elasticsearch": "^8.4.0", - "@faker-js/faker": "^7.6.0", "@getmetal/metal-sdk": "^4.0.0", "@getzep/zep-js": "^0.9.0", "@gomomento/sdk": "^1.51.1", @@ -86,15 +84,11 @@ "@tensorflow/tfjs-converter": "^3.6.0", "@tensorflow/tfjs-core": "^3.6.0", "@tsconfig/recommended": "^1.0.2", - "@types/d3-dsv": "^2", - "@types/decamelize": "^1.2.0", "@types/flat": "^5.0.2", "@types/html-to-text": "^9", - "@types/js-yaml": "^4", "@types/jsdom": "^21.1.1", "@types/lodash": "^4", "@types/mozilla-readability": "^0.2.1", - "@types/pdf-parse": "^1.1.1", "@types/pg": "^8", "@types/pg-copy-streams": "^1.2.2", "@types/uuid": "^9", @@ -108,11 +102,8 @@ "@xata.io/client": "^0.28.0", "@xenova/transformers": "^2.5.4", "@zilliz/milvus2-sdk-node": ">=2.2.11", - "apify-client": "^2.7.1", - "assemblyai": "^2.0.2", "axios": "^0.26.0", "cassandra-driver": "^4.7.2", - "cheerio": "^1.0.0-rc.12", "chromadb": "^1.5.3", "closevector-common": "0.1.0-alpha.1", "closevector-node": "0.1.0-alpha.10", @@ -122,7 +113,6 @@ "d3-dsv": "^2.0.0", "dotenv": "^16.0.3", "dpdm": "^3.12.0", - "epub2": "^3.0.1", "eslint": "^8.33.0", "eslint-config-airbnb-base": "^15.0.0", "eslint-config-prettier": "^8.6.0", @@ -146,31 +136,21 @@ "llmonitor": "^0.5.9", "lodash": "^4.17.21", "mammoth": "^1.5.1", - "ml-matrix": "^6.10.4", "mongodb": "^5.2.0", "mysql2": "^3.3.3", "neo4j-driver": "^5.12.0", "node-llama-cpp": "2.7.3", - "notion-to-md": "^3.1.0", - "officeparser": "^4.0.4", - "pdf-parse": "1.1.1", - "peggy": "^3.0.2", "pg": "^8.11.0", "pg-copy-streams": "^6.0.5", "pickleparser": "^0.2.1", - "playwright": "^1.32.1", "portkey-ai": "^0.1.11", "prettier": "^2.8.3", - "puppeteer": "^19.7.2", "pyodide": "^0.24.1", "redis": "^4.6.6", "release-it": "^15.10.1", "replicate": "^0.18.0", - "rimraf": "^5.0.1", "rollup": "^3.19.1", - "sonix-speech-recognition": "^2.1.1", "sqlite3": "^5.1.4", - "srt-parser-2": "^1.2.2", "ts-jest": "^29.1.0", "typeorm": "^0.3.12", "typescript": "~5.1.6", @@ -179,9 +159,7 @@ "vectordb": "^0.1.4", "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", - "web-auth-library": "^1.0.3", - "youtube-transcript": "^1.0.6", - "youtubei.js": "^5.8.0" + "web-auth-library": "^1.0.3" }, "peerDependencies": { "@aws-crypto/sha256-js": "^5.0.0", @@ -189,11 +167,9 @@ "@aws-sdk/client-dynamodb": "^3.310.0", "@aws-sdk/client-kendra": "^3.352.0", "@aws-sdk/client-lambda": "^3.310.0", - "@aws-sdk/client-s3": "^3.310.0", "@aws-sdk/client-sagemaker-runtime": "^3.310.0", "@aws-sdk/client-sfn": "^3.310.0", "@aws-sdk/credential-provider-node": "^3.388.0", - "@azure/storage-blob": "^12.15.0", "@clickhouse/client": "^0.2.5", "@cloudflare/ai": "^1.0.12", "@elastic/elasticsearch": "^8.4.0", @@ -230,11 +206,8 @@ "@xata.io/client": "^0.28.0", "@xenova/transformers": "^2.5.4", "@zilliz/milvus2-sdk-node": ">=2.2.7", - "apify-client": "^2.7.1", - "assemblyai": "^2.0.2", "axios": "*", "cassandra-driver": "^4.7.2", - "cheerio": "^1.0.0-rc.12", "chromadb": "*", "closevector-common": "0.1.0-alpha.1", "closevector-node": "0.1.0-alpha.10", @@ -242,7 +215,6 @@ "cohere-ai": ">=6.0.0", "convex": "^1.3.1", "d3-dsv": "^2.0.0", - "epub2": "^3.0.1", "faiss-node": "^0.5.1", "fast-xml-parser": "^4.2.7", "firebase-admin": "^11.9.0", @@ -260,21 +232,13 @@ "mysql2": "^3.3.3", "neo4j-driver": "*", "node-llama-cpp": "*", - "notion-to-md": "^3.1.0", - "officeparser": "^4.0.4", - "pdf-parse": "1.1.1", - "peggy": "^3.0.2", "pg": "^8.11.0", "pg-copy-streams": "^6.0.5", "pickleparser": "^0.2.1", - "playwright": "^1.32.1", "portkey-ai": "^0.1.11", - "puppeteer": "^19.7.2", "pyodide": "^0.24.1", "redis": "^4.6.4", "replicate": "^0.18.0", - "sonix-speech-recognition": "^2.1.1", - "srt-parser-2": "^1.2.2", "typeorm": "^0.3.12", "typesense": "^1.5.3", "usearch": "^1.1.1", @@ -282,9 +246,7 @@ "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", "web-auth-library": "^1.0.3", - "ws": "^8.14.2", - "youtube-transcript": "^1.0.6", - "youtubei.js": "^5.8.0" + "ws": "^8.14.2" }, "peerDependenciesMeta": { "@aws-crypto/sha256-js": { @@ -302,9 +264,6 @@ "@aws-sdk/client-lambda": { "optional": true }, - "@aws-sdk/client-s3": { - "optional": true - }, "@aws-sdk/client-sagemaker-runtime": { "optional": true }, @@ -314,9 +273,6 @@ "@aws-sdk/credential-provider-node": { "optional": true }, - "@azure/storage-blob": { - "optional": true - }, "@clickhouse/client": { "optional": true }, @@ -425,21 +381,12 @@ "@zilliz/milvus2-sdk-node": { "optional": true }, - "apify-client": { - "optional": true - }, - "assemblyai": { - "optional": true - }, "axios": { "optional": true }, "cassandra-driver": { "optional": true }, - "cheerio": { - "optional": true - }, "chromadb": { "optional": true }, @@ -461,9 +408,6 @@ "d3-dsv": { "optional": true }, - "epub2": { - "optional": true - }, "faiss-node": { "optional": true }, @@ -515,18 +459,6 @@ "node-llama-cpp": { "optional": true }, - "notion-to-md": { - "optional": true - }, - "officeparser": { - "optional": true - }, - "pdf-parse": { - "optional": true - }, - "peggy": { - "optional": true - }, "pg": { "optional": true }, @@ -536,15 +468,9 @@ "pickleparser": { "optional": true }, - "playwright": { - "optional": true - }, "portkey-ai": { "optional": true }, - "puppeteer": { - "optional": true - }, "pyodide": { "optional": true }, @@ -554,12 +480,6 @@ "replicate": { "optional": true }, - "sonix-speech-recognition": { - "optional": true - }, - "srt-parser-2": { - "optional": true - }, "typeorm": { "optional": true }, @@ -583,12 +503,6 @@ }, "ws": { "optional": true - }, - "youtube-transcript": { - "optional": true - }, - "youtubei.js": { - "optional": true } }, "publishConfig": { @@ -880,6 +794,16 @@ "import": "./vectorstores/clickhouse.js", "require": "./vectorstores/clickhouse.cjs" }, + "./vectorstores/closevector/node": { + "types": "./vectorstores/closevector/node.d.ts", + "import": "./vectorstores/closevector/node.js", + "require": "./vectorstores/closevector/node.cjs" + }, + "./vectorstores/closevector/web": { + "types": "./vectorstores/closevector/web.d.ts", + "import": "./vectorstores/closevector/web.js", + "require": "./vectorstores/closevector/web.cjs" + }, "./vectorstores/cloudflare_vectorize": { "types": "./vectorstores/cloudflare_vectorize.d.ts", "import": "./vectorstores/cloudflare_vectorize.js", @@ -895,6 +819,21 @@ "import": "./vectorstores/elasticsearch.js", "require": "./vectorstores/elasticsearch.cjs" }, + "./vectorstores/faiss": { + "types": "./vectorstores/faiss.d.ts", + "import": "./vectorstores/faiss.js", + "require": "./vectorstores/faiss.cjs" + }, + "./vectorstores/googlevertexai": { + "types": "./vectorstores/googlevertexai.d.ts", + "import": "./vectorstores/googlevertexai.js", + "require": "./vectorstores/googlevertexai.cjs" + }, + "./vectorstores/hnswlib": { + "types": "./vectorstores/hnswlib.d.ts", + "import": "./vectorstores/hnswlib.js", + "require": "./vectorstores/hnswlib.cjs" + }, "./vectorstores/lancedb": { "types": "./vectorstores/lancedb.d.ts", "import": "./vectorstores/lancedb.js", @@ -905,6 +844,16 @@ "import": "./vectorstores/milvus.js", "require": "./vectorstores/milvus.cjs" }, + "./vectorstores/momento_vector_index": { + "types": "./vectorstores/momento_vector_index.d.ts", + "import": "./vectorstores/momento_vector_index.js", + "require": "./vectorstores/momento_vector_index.cjs" + }, + "./vectorstores/mongodb_atlas": { + "types": "./vectorstores/mongodb_atlas.d.ts", + "import": "./vectorstores/mongodb_atlas.js", + "require": "./vectorstores/mongodb_atlas.cjs" + }, "./vectorstores/myscale": { "types": "./vectorstores/myscale.d.ts", "import": "./vectorstores/myscale.js", @@ -920,6 +869,16 @@ "import": "./vectorstores/opensearch.js", "require": "./vectorstores/opensearch.cjs" }, + "./vectorstores/pgvector": { + "types": "./vectorstores/pgvector.d.ts", + "import": "./vectorstores/pgvector.js", + "require": "./vectorstores/pgvector.cjs" + }, + "./vectorstores/pinecone": { + "types": "./vectorstores/pinecone.d.ts", + "import": "./vectorstores/pinecone.js", + "require": "./vectorstores/pinecone.cjs" + }, "./vectorstores/prisma": { "types": "./vectorstores/prisma.d.ts", "import": "./vectorstores/prisma.js", @@ -945,6 +904,11 @@ "import": "./vectorstores/singlestore.js", "require": "./vectorstores/singlestore.cjs" }, + "./vectorstores/supabase": { + "types": "./vectorstores/supabase.d.ts", + "import": "./vectorstores/supabase.js", + "require": "./vectorstores/supabase.cjs" + }, "./vectorstores/tigris": { "types": "./vectorstores/tigris.d.ts", "import": "./vectorstores/tigris.js", @@ -960,6 +924,11 @@ "import": "./vectorstores/typesense.js", "require": "./vectorstores/typesense.cjs" }, + "./vectorstores/usearch": { + "types": "./vectorstores/usearch.d.ts", + "import": "./vectorstores/usearch.js", + "require": "./vectorstores/usearch.cjs" + }, "./vectorstores/vectara": { "types": "./vectorstores/vectara.d.ts", "import": "./vectorstores/vectara.js", @@ -975,11 +944,21 @@ "import": "./vectorstores/voy.js", "require": "./vectorstores/voy.cjs" }, + "./vectorstores/weaviate": { + "types": "./vectorstores/weaviate.d.ts", + "import": "./vectorstores/weaviate.js", + "require": "./vectorstores/weaviate.cjs" + }, "./vectorstores/xata": { "types": "./vectorstores/xata.d.ts", "import": "./vectorstores/xata.js", "require": "./vectorstores/xata.cjs" }, + "./vectorstores/zep": { + "types": "./vectorstores/zep.d.ts", + "import": "./vectorstores/zep.js", + "require": "./vectorstores/zep.cjs" + }, "./chat_models/baiduwenxin": { "types": "./chat_models/baiduwenxin.d.ts", "import": "./chat_models/baiduwenxin.js", @@ -1150,6 +1129,16 @@ "import": "./storage/vercel_kv.js", "require": "./storage/vercel_kv.cjs" }, + "./stores/doc/base": { + "types": "./stores/doc/base.d.ts", + "import": "./stores/doc/base.js", + "require": "./stores/doc/base.cjs" + }, + "./stores/doc/in_memory": { + "types": "./stores/doc/in_memory.d.ts", + "import": "./stores/doc/in_memory.js", + "require": "./stores/doc/in_memory.cjs" + }, "./stores/message/cassandra": { "types": "./stores/message/cassandra.d.ts", "import": "./stores/message/cassandra.js", @@ -1385,6 +1374,12 @@ "vectorstores/clickhouse.cjs", "vectorstores/clickhouse.js", "vectorstores/clickhouse.d.ts", + "vectorstores/closevector/node.cjs", + "vectorstores/closevector/node.js", + "vectorstores/closevector/node.d.ts", + "vectorstores/closevector/web.cjs", + "vectorstores/closevector/web.js", + "vectorstores/closevector/web.d.ts", "vectorstores/cloudflare_vectorize.cjs", "vectorstores/cloudflare_vectorize.js", "vectorstores/cloudflare_vectorize.d.ts", @@ -1394,12 +1389,27 @@ "vectorstores/elasticsearch.cjs", "vectorstores/elasticsearch.js", "vectorstores/elasticsearch.d.ts", + "vectorstores/faiss.cjs", + "vectorstores/faiss.js", + "vectorstores/faiss.d.ts", + "vectorstores/googlevertexai.cjs", + "vectorstores/googlevertexai.js", + "vectorstores/googlevertexai.d.ts", + "vectorstores/hnswlib.cjs", + "vectorstores/hnswlib.js", + "vectorstores/hnswlib.d.ts", "vectorstores/lancedb.cjs", "vectorstores/lancedb.js", "vectorstores/lancedb.d.ts", "vectorstores/milvus.cjs", "vectorstores/milvus.js", "vectorstores/milvus.d.ts", + "vectorstores/momento_vector_index.cjs", + "vectorstores/momento_vector_index.js", + "vectorstores/momento_vector_index.d.ts", + "vectorstores/mongodb_atlas.cjs", + "vectorstores/mongodb_atlas.js", + "vectorstores/mongodb_atlas.d.ts", "vectorstores/myscale.cjs", "vectorstores/myscale.js", "vectorstores/myscale.d.ts", @@ -1409,6 +1419,12 @@ "vectorstores/opensearch.cjs", "vectorstores/opensearch.js", "vectorstores/opensearch.d.ts", + "vectorstores/pgvector.cjs", + "vectorstores/pgvector.js", + "vectorstores/pgvector.d.ts", + "vectorstores/pinecone.cjs", + "vectorstores/pinecone.js", + "vectorstores/pinecone.d.ts", "vectorstores/prisma.cjs", "vectorstores/prisma.js", "vectorstores/prisma.d.ts", @@ -1424,6 +1440,9 @@ "vectorstores/singlestore.cjs", "vectorstores/singlestore.js", "vectorstores/singlestore.d.ts", + "vectorstores/supabase.cjs", + "vectorstores/supabase.js", + "vectorstores/supabase.d.ts", "vectorstores/tigris.cjs", "vectorstores/tigris.js", "vectorstores/tigris.d.ts", @@ -1433,6 +1452,9 @@ "vectorstores/typesense.cjs", "vectorstores/typesense.js", "vectorstores/typesense.d.ts", + "vectorstores/usearch.cjs", + "vectorstores/usearch.js", + "vectorstores/usearch.d.ts", "vectorstores/vectara.cjs", "vectorstores/vectara.js", "vectorstores/vectara.d.ts", @@ -1442,9 +1464,15 @@ "vectorstores/voy.cjs", "vectorstores/voy.js", "vectorstores/voy.d.ts", + "vectorstores/weaviate.cjs", + "vectorstores/weaviate.js", + "vectorstores/weaviate.d.ts", "vectorstores/xata.cjs", "vectorstores/xata.js", "vectorstores/xata.d.ts", + "vectorstores/zep.cjs", + "vectorstores/zep.js", + "vectorstores/zep.d.ts", "chat_models/baiduwenxin.cjs", "chat_models/baiduwenxin.js", "chat_models/baiduwenxin.d.ts", @@ -1547,6 +1575,12 @@ "storage/vercel_kv.cjs", "storage/vercel_kv.js", "storage/vercel_kv.d.ts", + "stores/doc/base.cjs", + "stores/doc/base.js", + "stores/doc/base.d.ts", + "stores/doc/in_memory.cjs", + "stores/doc/in_memory.js", + "stores/doc/in_memory.d.ts", "stores/message/cassandra.cjs", "stores/message/cassandra.js", "stores/message/cassandra.d.ts", diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index b04416657cf9..af6925e9c22b 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -68,26 +68,39 @@ const entrypoints = { "vectorstores/cassandra": "vectorstores/cassandra", "vectorstores/chroma": "vectorstores/chroma", "vectorstores/clickhouse": "vectorstores/clickhouse", + "vectorstores/closevector/node": "vectorstores/closevector/node", + "vectorstores/closevector/web": "vectorstores/closevector/web", "vectorstores/cloudflare_vectorize": "vectorstores/cloudflare_vectorize", "vectorstores/convex": "vectorstores/convex", "vectorstores/elasticsearch": "vectorstores/elasticsearch", + "vectorstores/faiss": "vectorstores/faiss", + "vectorstores/googlevertexai": "vectorstores/googlevertexai", + "vectorstores/hnswlib": "vectorstores/hnswlib", "vectorstores/lancedb": "vectorstores/lancedb", "vectorstores/milvus": "vectorstores/milvus", + "vectorstores/momento_vector_index": "vectorstores/momento_vector_index", + "vectorstores/mongodb_atlas": "vectorstores/mongodb_atlas", "vectorstores/myscale": "vectorstores/myscale", "vectorstores/neo4j_vector": "vectorstores/neo4j_vector", "vectorstores/opensearch": "vectorstores/opensearch", + "vectorstores/pgvector": "vectorstores/pgvector", + "vectorstores/pinecone": "vectorstores/pinecone", "vectorstores/prisma": "vectorstores/prisma", "vectorstores/qdrant": "vectorstores/qdrant", "vectorstores/redis": "vectorstores/redis", "vectorstores/rockset": "vectorstores/rockset", "vectorstores/singlestore": "vectorstores/singlestore", + "vectorstores/supabase": "vectorstores/supabase", "vectorstores/tigris": "vectorstores/tigris", "vectorstores/typeorm": "vectorstores/typeorm", "vectorstores/typesense": "vectorstores/typesense", + "vectorstores/usearch": "vectorstores/usearch", "vectorstores/vectara": "vectorstores/vectara", "vectorstores/vercel_postgres": "vectorstores/vercel_postgres", "vectorstores/voy": "vectorstores/voy", + "vectorstores/weaviate": "vectorstores/weaviate", "vectorstores/xata": "vectorstores/xata", + "vectorstores/zep": "vectorstores/zep", // chat_models "chat_models/baiduwenxin": "chat_models/baiduwenxin", "chat_models/bedrock": "chat_models/bedrock/index", @@ -131,6 +144,8 @@ const entrypoints = { "storage/upstash_redis": "storage/upstash_redis", "storage/vercel_kv": "storage/vercel_kv", // stores + "stores/doc/base": "stores/doc/base", + "stores/doc/in_memory": "stores/doc/in_memory", "stores/message/cassandra": "stores/message/cassandra", "stores/message/cloudflare_d1": "stores/message/cloudflare_d1", "stores/message/convex": "stores/message/convex", @@ -186,23 +201,37 @@ const requiresOptionalDependency = [ "vectorstores/cassandra", "vectorstores/chroma", "vectorstores/clickhouse", + "vectorstores/closevector/node", + "vectorstores/closevector/web", "vectorstores/cloudflare_vectorize", "vectorstores/convex", "vectorstores/elasticsearch", + "vectorstores/faiss", + "vectorstores/googlevertexai", + "vectorstores/hnswlib", "vectorstores/lancedb", "vectorstores/milvus", + "vectorstores/momento_vector_index", + "vectorstores/mongodb_atlas", "vectorstores/myscale", "vectorstores/neo4j_vector", "vectorstores/opensearch", + "vectorstores/pgvector", + "vectorstores/pinecone", "vectorstores/qdrant", "vectorstores/redis", "vectorstores/rockset", "vectorstores/singlestore", + "vectorstores/supabase", "vectorstores/tigris", "vectorstores/typeorm", "vectorstores/typesense", + "vectorstores/usearch", "vectorstores/vercel_postgres", "vectorstores/voy", + "vectorstores/weaviate", + "vectorstores/xata", + "vectorstores/zep", "chat_models/bedrock", "chat_models/bedrock/web", "chat_models/googlevertexai", diff --git a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts deleted file mode 100644 index 228f2c8f7865..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatbaiduwenxin.int.test.ts +++ /dev/null @@ -1,136 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { SystemMessage, HumanMessage } from "@langchain/core/messages"; -import { ChatBaiduWenxin } from "../baiduwenxin.js"; - -interface TestConfig { - modelName: string | undefined; - config: { - description?: string; - temperature?: number; - topP?: number; - penaltyScore?: number; - streaming?: boolean; - callbacks?: Array<{ - nrNewTokens?: number; - streamedCompletion?: string; - handleLLMNewToken?: (token: string) => Promise; - }>; - }; - system?: string; - message?: string; - shouldThrow?: boolean; -} - -const runTest = async ({ - modelName, - config, - system = "", - message = "Hello!", - shouldThrow = false, -}: TestConfig) => { - const description = `Test ChatBaiduWenxin ${modelName || "default model"} ${ - config.description || "" - }`.trim(); - let nrNewTokens = 0; - let streamedCompletion = ""; - if (config.streaming) { - // eslint-disable-next-line no-param-reassign - config.callbacks = [ - { - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }, - ]; - } - test.skip(description, async () => { - const chat = new ChatBaiduWenxin({ - modelName, - ...config, - }); - - const messages = []; - if (system) { - messages.push(new SystemMessage(system)); - } - messages.push(new HumanMessage(message)); - - if (shouldThrow) { - await expect(chat.call(messages)).rejects.toThrow(); - return; - } - - const res = await chat.call(messages); - console.log({ res }); - - if (config.streaming) { - expect(nrNewTokens > 0).toBe(true); - expect(res.text).toBe(streamedCompletion); - } - }); -}; - -const testConfigs: TestConfig[] = [ - { modelName: undefined, config: {} }, - { modelName: "ERNIE-Bot", config: {} }, - { - modelName: "ERNIE-Bot", - config: { description: "with temperature", temperature: 1 }, - }, - { modelName: "ERNIE-Bot", config: { description: "with topP", topP: 1 } }, - { - modelName: "ERNIE-Bot", - config: { description: "with penaltyScore", penaltyScore: 1 }, - }, - { - modelName: "ERNIE-Bot", - config: { - description: "in streaming mode", - streaming: true, - }, - message: "您好,请讲个长笑话", - }, - { - modelName: "ERNIE-Bot", - config: { - description: "illegal input should throw an error", - temperature: 0, - }, - shouldThrow: true, - }, - { - modelName: "ERNIE-Bot", - config: { - description: "illegal input in streaming mode should throw an error", - streaming: true, - temperature: 0, - }, - shouldThrow: true, - }, - { modelName: "ERNIE-Bot-turbo", config: {} }, - { - modelName: "ERNIE-Bot-turbo", - config: { - description: "in streaming mode", - streaming: true, - }, - message: "您好,请讲个长笑话", - }, - { - modelName: "ERNIE-Bot-turbo", - config: { - description: "with system message", - }, - system: "你是一个说文言文的人", - }, - { - modelName: "ERNIE-Bot-4", - config: {}, - }, -]; - -testConfigs.forEach((testConfig) => { - // eslint-disable-next-line no-void - void runTest(testConfig); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts deleted file mode 100644 index 7415d67b9f45..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatbedrock.int.test.ts +++ /dev/null @@ -1,186 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ - -import { test, expect } from "@jest/globals"; -import { HumanMessage } from "@langchain/core/messages"; -import { BedrockChat } from "../bedrock/web.js"; - -// void testChatModel( -// "Test Bedrock chat model: Llama2 13B v1", -// "us-east-1", -// "meta.llama2-13b-chat-v1", -// "What is your name?" -// ); -// void testChatStreamingModel( -// "Test Bedrock streaming chat model: Llama2 13B v1", -// "us-east-1", -// "meta.llama2-13b-chat-v1", -// "What is your name and something about yourself?" -// ); - -void testChatModel( - "Test Bedrock chat model: Claude-v2", - "us-east-1", - "anthropic.claude-v2", - "What is your name?" -); -void testChatStreamingModel( - "Test Bedrock chat model streaming: Claude-v2", - "us-east-1", - "anthropic.claude-v2", - "What is your name and something about yourself?" -); - -void testChatHandleLLMNewToken( - "Test Bedrock chat model HandleLLMNewToken: Claude-v2", - "us-east-1", - "anthropic.claude-v2", - "What is your name and something about yourself?" -); -// void testChatHandleLLMNewToken( -// "Test Bedrock chat model HandleLLMNewToken: Llama2 13B v1", -// "us-east-1", -// "meta.llama2-13b-chat-v1", -// "What is your name and something about yourself?" -// ); - -/** - * Tests a BedrockChat model - * @param title The name of the test to run - * @param defaultRegion The AWS region to default back to if not set via environment - * @param model The model string to test - * @param message The prompt test to send to the LLM - */ -async function testChatModel( - title: string, - defaultRegion: string, - model: string, - message: string -) { - test(title, async () => { - const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; - - const bedrock = new BedrockChat({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const res = await bedrock.call([new HumanMessage(message)]); - console.log(res); - }); -} -/** - * Tests a BedrockChat model with a streaming response - * @param title The name of the test to run - * @param defaultRegion The AWS region to default back to if not set via environment - * @param model The model string to test - * @param message The prompt test to send to the LLM - */ -async function testChatStreamingModel( - title: string, - defaultRegion: string, - model: string, - message: string -) { - test(title, async () => { - const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; - - const bedrock = new BedrockChat({ - maxTokens: 200, - region, - model, - maxRetries: 0, - credentials: { - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const stream = await bedrock.stream([ - new HumanMessage({ - content: message, - }), - ]); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - }); -} -/** - * Tests a BedrockChat model with a streaming response using a new token callback - * @param title The name of the test to run - * @param defaultRegion The AWS region to default back to if not set via environment - * @param model The model string to test - * @param message The prompt test to send to the LLM - */ -async function testChatHandleLLMNewToken( - title: string, - defaultRegion: string, - model: string, - message: string -) { - test(title, async () => { - const region = process.env.BEDROCK_AWS_REGION ?? defaultRegion; - const tokens: string[] = []; - - const bedrock = new BedrockChat({ - maxTokens: 200, - region, - model, - maxRetries: 0, - credentials: { - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - streaming: true, - callbacks: [ - { - handleLLMNewToken: (token) => { - tokens.push(token); - }, - }, - ], - }); - const stream = await bedrock.call([new HumanMessage(message)]); - expect(tokens.length).toBeGreaterThan(1); - expect(stream.content).toEqual(tokens.join("")); - }); -} - -test.skip.each([ - "amazon.titan-text-express-v1", - // These models should be supported in the future - // "amazon.titan-text-lite-v1", - // "amazon.titan-text-agile-v1", -])("Test Bedrock base chat model: %s", async (model) => { - const region = process.env.BEDROCK_AWS_REGION ?? "us-east-1"; - - const bedrock = new BedrockChat({ - region, - model, - maxRetries: 0, - modelKwargs: {}, - credentials: { - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const res = await bedrock.call([new HumanMessage("What is your name?")]); - console.log(res); - - expect(res.content.length).toBeGreaterThan(1); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts deleted file mode 100644 index 09ecd3ad17f0..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatcloudflare_workersai.int.test.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { describe, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "@langchain/core/messages"; -import { - PromptTemplate, - ChatPromptTemplate, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "@langchain/core/prompts"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { ChatCloudflareWorkersAI } from "../cloudflare_workersai.js"; - -describe("ChatCloudflareWorkersAI", () => { - test("call", async () => { - const chat = new ChatCloudflareWorkersAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("generate", async () => { - const chat = new ChatCloudflareWorkersAI(); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message]]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("generate with streaming true", async () => { - const chat = new ChatCloudflareWorkersAI({ - streaming: true, - }); - const message = new HumanMessage("What is 2 + 2?"); - const tokens: string[] = []; - const res = await chat.generate([[message]], { - callbacks: [ - { - handleLLMNewToken: (token) => { - tokens.push(token); - }, - }, - ], - }); - expect(tokens.length).toBeGreaterThan(1); - expect(tokens.join("")).toEqual(res.generations[0][0].text); - }); - - test("stream", async () => { - const chat = new ChatCloudflareWorkersAI(); - const message = new HumanMessage("What is 2 + 2?"); - const stream = await chat.stream([message]); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk.content); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - console.log(chunks.map((chunk) => chunk.content).join("")); - expect( - chunks.map((chunk) => chunk.content).join("").length - ).toBeGreaterThan(1); - }); - - test("custom messages", async () => { - const chat = new ChatCloudflareWorkersAI(); - const res = await chat.call([new ChatMessage("Hello!", "user")]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("prompt templates", async () => { - const chat = new ChatCloudflareWorkersAI(); - - // PaLM doesn't support translation yet - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a {job}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - job: "pirate", - text: "What would be a good company name a company that makes colorful socks?", - }), - ]); - - console.log(responseA.generations); - }); - - test("longer chain of messages", async () => { - const chat = new ChatCloudflareWorkersAI(); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); - }); - - test.skip("custom base url", async () => { - const chat = new ChatCloudflareWorkersAI({ - baseUrl: `https://gateway.ai.cloudflare.com/v1/${getEnvironmentVariable( - "CLOUDFLARE_ACCOUNT_ID" - )}/lang-chainjs/workers-ai/`, - }); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); - }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts deleted file mode 100644 index fe934e8d856a..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatfireworks.int.test.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { describe, test } from "@jest/globals"; -import { ChatMessage, HumanMessage } from "@langchain/core/messages"; -import { - PromptTemplate, - ChatPromptTemplate, - AIMessagePromptTemplate, - HumanMessagePromptTemplate, - SystemMessagePromptTemplate, -} from "@langchain/core/prompts"; -import { ChatFireworks } from "../fireworks.js"; - -describe("ChatFireworks", () => { - test("call", async () => { - const chat = new ChatFireworks(); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); - }); - - test("generate", async () => { - const chat = new ChatFireworks(); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message]]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("custom messages", async () => { - const chat = new ChatFireworks(); - const res = await chat.call([new ChatMessage("Hello!", "user")]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("prompt templates", async () => { - const chat = new ChatFireworks(); - - // PaLM doesn't support translation yet - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant who must always respond like a {job}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - job: "pirate", - text: "What would be a good company name a company that makes colorful socks?", - }), - ]); - - console.log(responseA.generations); - }); - - test("longer chain of messages", async () => { - const chat = new ChatFireworks(); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate(`Hi, my name is Joe!`), - AIMessagePromptTemplate.fromTemplate(`Nice to meet you, Joe!`), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - text: "What did I just say my name was?", - }), - ]); - - console.log(responseA.generations); - }); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts deleted file mode 100644 index 72e9927aa503..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglepalm.test.ts +++ /dev/null @@ -1,252 +0,0 @@ -import { protos } from "@google-ai/generativelanguage"; -import { expect, test } from "@jest/globals"; -import { - AIMessage, - BaseMessage, - HumanMessage, - SystemMessage, -} from "@langchain/core/messages"; -import { ChatGooglePaLM } from "../googlepalm.js"; - -// Test class extending actual class to test private & protected methods -class ChatGooglePaLMTest extends ChatGooglePaLM { - public _getPalmContextInstruction(messages: BaseMessage[]) { - return super._getPalmContextInstruction(messages); - } - - public _mapBaseMessagesToPalmMessages(messages: BaseMessage[]) { - return super._mapBaseMessagesToPalmMessages(messages); - } - - public _mapPalmMessagesToChatResult( - msgRes: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse - ) { - return super._mapPalmMessagesToChatResult(msgRes); - } -} - -test("Google Palm Chat - `temperature` must be in range [0.0,1.0]", async () => { - expect( - () => - new ChatGooglePaLMTest({ - temperature: -1.0, - }) - ).toThrow(); - expect( - () => - new ChatGooglePaLMTest({ - temperature: 1.1, - }) - ).toThrow(); -}); - -test("Google Palm Chat - `topP` must be positive", async () => { - expect( - () => - new ChatGooglePaLMTest({ - topP: -1, - }) - ).toThrow(); -}); - -test("Google Palm Chat - `topK` must be positive", async () => { - expect( - () => - new ChatGooglePaLMTest({ - topK: -1, - }) - ).toThrow(); -}); - -test("Google Palm Chat - gets the Palm prompt context from 'system' messages", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("system-1"), - new AIMessage("ai-1"), - new HumanMessage("human-1"), - new SystemMessage("system-2"), - ]; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - const context = model._getPalmContextInstruction(messages); - expect(context).toBe("system-1"); -}); - -test("Google Palm Chat - maps `BaseMessage` to Palm message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("system-1"), - new AIMessage("ai-1"), - new HumanMessage("human-1"), - new AIMessage({ - content: "ai-2", - name: "droid", - additional_kwargs: { - citationSources: [ - { - startIndex: 0, - endIndex: 5, - uri: "https://example.com", - license: "MIT", - }, - ], - }, - }), - new HumanMessage({ - content: "human-2", - name: "skywalker", - }), - ]; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - const palmMessages = model._mapBaseMessagesToPalmMessages(messages); - expect(palmMessages.length).toEqual(4); - expect(palmMessages[0]).toEqual({ - author: "ai", - content: "ai-1", - citationMetadata: { - citationSources: undefined, - }, - }); - expect(palmMessages[1]).toEqual({ - author: "human", - content: "human-1", - citationMetadata: { - citationSources: undefined, - }, - }); - expect(palmMessages[2]).toEqual({ - author: "droid", - content: "ai-2", - citationMetadata: { - citationSources: [ - { - startIndex: 0, - endIndex: 5, - uri: "https://example.com", - license: "MIT", - }, - ], - }, - }); - expect(palmMessages[3]).toEqual({ - author: "skywalker", - content: "human-2", - citationMetadata: { - citationSources: undefined, - }, - }); -}); - -test("Google Palm Chat - removes 'system' messages while mapping `BaseMessage` to Palm message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("system-1"), - new AIMessage("ai-1"), - new HumanMessage("human-1"), - new SystemMessage("system-2"), - ]; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - const palmMessages = model._mapBaseMessagesToPalmMessages(messages); - expect(palmMessages.length).toEqual(2); - expect(palmMessages[0].content).toEqual("ai-1"); - expect(palmMessages[1].content).toEqual("human-1"); -}); - -test("Google Palm Chat - throws error for consecutive 'ai'/'human' messages while mapping `BaseMessage` to Palm message", async () => { - const messages: BaseMessage[] = [ - new AIMessage("ai-1"), - new HumanMessage("human-1"), - new AIMessage("ai-2"), - new HumanMessage("human-2"), - new HumanMessage("human-3"), - ]; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - expect(() => model._mapBaseMessagesToPalmMessages(messages)).toThrow(); -}); - -test("Google Palm Chat - maps Palm generated message to `AIMessage` chat result", async () => { - const generations: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse = - { - candidates: [ - { - author: "droid", - content: "ai-1", - citationMetadata: { - citationSources: [ - { - startIndex: 0, - endIndex: 5, - uri: "https://example.com", - license: "MIT", - }, - ], - }, - }, - ], - filters: [ - { - message: "potential problem", - reason: "SAFETY", - }, - ], - }; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - const chatResult = model._mapPalmMessagesToChatResult(generations); - expect(chatResult.generations.length).toEqual(1); - expect(chatResult.generations[0].text).toBe("ai-1"); - expect(chatResult.generations[0].message._getType()).toBe("ai"); - expect(chatResult.generations[0].message.name).toBe("droid"); - expect(chatResult.generations[0].message.content).toBe("ai-1"); - expect( - chatResult.generations[0].message.additional_kwargs.citationSources - ).toEqual([ - { - startIndex: 0, - endIndex: 5, - uri: "https://example.com", - license: "MIT", - }, - ]); - expect(chatResult.generations[0].message.additional_kwargs.filters).toEqual([ - { - message: "potential problem", - reason: "SAFETY", - }, - ]); -}); - -test("Google Palm Chat - gets empty chat result & reason if generation failed", async () => { - const generations: protos.google.ai.generativelanguage.v1beta2.IGenerateMessageResponse = - { - candidates: [], - filters: [ - { - message: "potential problem", - reason: "SAFETY", - }, - ], - }; - const model = new ChatGooglePaLMTest({ - apiKey: "GOOGLE_PALM_API_KEY", - }); - - const chatResult = model._mapPalmMessagesToChatResult(generations); - expect(chatResult.generations.length).toEqual(0); - expect(chatResult.llmOutput?.filters).toEqual([ - { - message: "potential problem", - reason: "SAFETY", - }, - ]); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts deleted file mode 100644 index 9c3ee527d430..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai.test.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { - BaseMessage, - SystemMessage, - HumanMessage, - AIMessage, -} from "@langchain/core/messages"; -import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/index.js"; - -test("Google messages", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI(); - const instance = model.createInstance(messages); - expect(instance.context).toBe(""); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("bot"); -}); - -test("Google messages with a system message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI(); - const instance = model.createInstance(messages); - expect(instance.context).toBe("System1"); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("bot"); -}); - -test("Google examples", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const examples: ChatExample[] = [ - { - input: new HumanMessage("Example Human1"), - output: new AIMessage("Example AI1"), - }, - ]; - const model = new ChatGoogleVertexAI({ - examples, - }); - const instance = model.createInstance(messages); - console.log(JSON.stringify(instance, null, 2)); - expect(instance.examples?.[0].input.author).toBe("user"); - expect(instance.examples?.[0].output.author).toBe("bot"); -}); - -test("Google Throw an error for input messages where SystemMessage is not first", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new SystemMessage("System1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI(); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google Throw an error for input messages where messages the same type of message occurs twice in a row", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new HumanMessage("Human2"), - new AIMessage("AI1"), - ]; - const model = new ChatGoogleVertexAI(); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google Throw an error for an even number of non-system input messages", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human2"), - new AIMessage("AI1"), - ]; - const model = new ChatGoogleVertexAI(); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google code messages", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ model: "codechat-bison" }); - const instance = model.createInstance(messages); - expect(instance.context).toBe(""); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("system"); -}); - -test("Google code messages with a system message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ model: "codechat-bison" }); - const instance = model.createInstance(messages); - expect(instance.context).toBe("System1"); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("system"); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts b/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts deleted file mode 100644 index 5858e525fd84..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatgooglevertexai_web.test.ts +++ /dev/null @@ -1,149 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { - BaseMessage, - SystemMessage, - HumanMessage, - AIMessage, -} from "@langchain/core/messages"; -import { ChatExample, ChatGoogleVertexAI } from "../googlevertexai/web.js"; - -test("Google messages", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ - authOptions: { - credentials: "{}", - }, - }); - const instance = model.createInstance(messages); - expect(instance.context).toBe(""); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("bot"); -}); - -test("Google messages with a system message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ - authOptions: { - credentials: "{}", - }, - }); - const instance = model.createInstance(messages); - expect(instance.context).toBe("System1"); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("bot"); -}); - -test("Google examples", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const examples: ChatExample[] = [ - { - input: new HumanMessage("Example Human1"), - output: new AIMessage("Example AI1"), - }, - ]; - const model = new ChatGoogleVertexAI({ - examples, - authOptions: { - credentials: "{}", - }, - }); - const instance = model.createInstance(messages); - console.log(JSON.stringify(instance, null, 2)); - expect(instance.examples?.[0].input.author).toBe("user"); - expect(instance.examples?.[0].output.author).toBe("bot"); -}); - -test("Google Throw an error for input messages where SystemMessage is not first", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new SystemMessage("System1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ - authOptions: { - credentials: "{}", - }, - }); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google Throw an error for input messages where messages the same type of message occurs twice in a row", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new HumanMessage("Human2"), - new AIMessage("AI1"), - ]; - const model = new ChatGoogleVertexAI({ - authOptions: { - credentials: "{}", - }, - }); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google Throw an error for an even number of non-system input messages", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human2"), - new AIMessage("AI1"), - ]; - const model = new ChatGoogleVertexAI({ - authOptions: { - credentials: "{}", - }, - }); - expect(() => model.createInstance(messages)).toThrow(); -}); - -test("Google code messages", async () => { - const messages: BaseMessage[] = [ - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ - model: "codechat-bison", - authOptions: { - credentials: "{}", - }, - }); - const instance = model.createInstance(messages); - expect(instance.context).toBe(""); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("system"); -}); - -test("Google code messages with a system message", async () => { - const messages: BaseMessage[] = [ - new SystemMessage("System1"), - new HumanMessage("Human1"), - new AIMessage("AI1"), - new HumanMessage("Human2"), - ]; - const model = new ChatGoogleVertexAI({ - model: "codechat-bison", - authOptions: { - credentials: "{}", - }, - }); - const instance = model.createInstance(messages); - expect(instance.context).toBe("System1"); - expect(instance.messages[0].author).toBe("user"); - expect(instance.messages[1].author).toBe("system"); -}); diff --git a/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts b/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts deleted file mode 100644 index bb62d736d10e..000000000000 --- a/libs/langchain-community/src/chat_models/tests/chatiflytekxinghuo.int.test.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { HumanMessage } from "@langchain/core/messages"; -import { ChatIflytekXinghuo } from "../iflytek_xinghuo/index.js"; - -test.skip("Iflytek Xinghuo Call", async () => { - const model = new ChatIflytekXinghuo({ - iflytekAppid: "", - iflytekApiKey: "", - iflytekApiSecret: "", - }); - const messages = [new HumanMessage("Nice to meet you!")]; - await model.call(messages); -}); diff --git a/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts b/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts deleted file mode 100644 index 8ad6f4eee6d8..000000000000 --- a/libs/langchain-community/src/chat_models/tests/minimax.int.test.ts +++ /dev/null @@ -1,341 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { - ChatMessage, - HumanMessage, - SystemMessage, -} from "@langchain/core/messages"; -import { LLMResult } from "@langchain/core/outputs"; -import { CallbackManager } from "@langchain/core/callbacks/manager"; -import { - ChatPromptTemplate, - HumanMessagePromptTemplate, - PromptTemplate, - SystemMessagePromptTemplate, -} from "@langchain/core/prompts"; -import { ChatMinimax } from "../minimax.js"; - -test.skip("Test ChatMinimax", async () => { - const chat = new ChatMinimax({ - modelName: "abab5.5-chat", - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }); - const message = new HumanMessage("Hello!"); - const res = await chat.call([message]); - console.log({ res }); -}); - -test.skip("Test ChatMinimax with SystemChatMessage", async () => { - const chat = new ChatMinimax(); - const system_message = new SystemMessage("You are to chat with a user."); - const message = new HumanMessage("Hello!"); - const res = await chat.call([system_message, message]); - console.log({ res }); -}); - -test.skip("Test ChatMinimax Generate", async () => { - const chat = new ChatMinimax({ - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }); - const message = new HumanMessage("Hello!"); - const res = await chat.generate([[message], [message]]); - expect(res.generations.length).toBe(2); - for (const generation of res.generations) { - expect(generation.length).toBe(1); - for (const message of generation) { - console.log(message.text); - expect(typeof message.text).toBe("string"); - } - } - console.log({ res }); -}); - -test.skip("Test ChatMinimax Generate throws when one of the calls fails", async () => { - const chat = new ChatMinimax({ - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }); - const message = new HumanMessage("Hello!"); - await expect(() => - chat.generate([[message], [message]], { - signal: AbortSignal.timeout(10), - }) - ).rejects.toThrow("TimeoutError: The operation was aborted due to timeout"); -}); - -test.skip("Test ChatMinimax tokenUsage", async () => { - let tokenUsage = { - totalTokens: 0, - }; - - const model = new ChatMinimax({ - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const message = new HumanMessage("Hello"); - const res = await model.call([message]); - console.log({ res }); - - expect(tokenUsage.totalTokens).toBeGreaterThan(0); -}); - -test.skip("Test ChatMinimax tokenUsage with a batch", async () => { - let tokenUsage = { - totalTokens: 0, - }; - - const model = new ChatMinimax({ - temperature: 0.01, - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - callbackManager: CallbackManager.fromHandlers({ - async handleLLMEnd(output: LLMResult) { - tokenUsage = output.llmOutput?.tokenUsage; - }, - }), - }); - const res = await model.generate([ - [new HumanMessage("Hello")], - [new HumanMessage("Hi")], - ]); - console.log({ tokenUsage }); - console.log(res); - - expect(tokenUsage.totalTokens).toBeGreaterThan(0); -}); - -test.skip("Test ChatMinimax in streaming mode", async () => { - let nrNewTokens = 0; - let streamedCompletion = ""; - - const model = new ChatMinimax({ - streaming: true, - tokensToGenerate: 10, - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - callbacks: [ - { - async handleLLMNewToken(token: string) { - nrNewTokens += 1; - streamedCompletion += token; - }, - }, - ], - }); - const message = new HumanMessage("Hello!"); - const result = await model.call([message]); - console.log(result); - - expect(nrNewTokens > 0).toBe(true); - expect(result.content).toBe(streamedCompletion); -}, 10000); - -test.skip("OpenAI Chat, docs, prompt templates", async () => { - const chat = new ChatMinimax({ - temperature: 0.01, - tokensToGenerate: 10, - }); - - const systemPrompt = PromptTemplate.fromTemplate( - "You are a helpful assistant that translates {input_language} to {output_language}." - ); - - const chatPrompt = ChatPromptTemplate.fromMessages([ - new SystemMessagePromptTemplate(systemPrompt), - HumanMessagePromptTemplate.fromTemplate("{text}"), - ]); - - const responseA = await chat.generatePrompt([ - await chatPrompt.formatPromptValue({ - input_language: "English", - output_language: "French", - text: "I love programming.", - }), - ]); - - console.log(responseA.generations); -}, 5000); - -test.skip("Test OpenAI with signal in call options", async () => { - const model = new ChatMinimax({ tokensToGenerate: 5 }); - const controller = new AbortController(); - await expect(() => { - const ret = model.call([new HumanMessage("Print hello world")], { - signal: controller.signal, - }); - - controller.abort(); - - return ret; - }).rejects.toThrow(); -}, 5000); - -test.skip("Test OpenAI with specific roles in ChatMessage", async () => { - const chat = new ChatMinimax({ tokensToGenerate: 10 }); - const system_message = new ChatMessage( - "You are to chat with a user.", - "system" - ); - const user_message = new ChatMessage("Hello!", "user"); - const res = await chat.call([system_message, user_message]); - console.log({ res }); -}); - -test.skip("Function calling ", async () => { - const weatherFunction = { - name: "get_weather", - description: " Get weather information.", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: " The location to get the weather", - }, - }, - required: ["location"], - }, - }; - - const model = new ChatMinimax({ - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }).bind({ - functions: [weatherFunction], - }); - - const result = await model.invoke([ - new HumanMessage({ - content: " What is the weather like in NewYork tomorrow?", - name: "I", - }), - ]); - - console.log(result); - expect(result.additional_kwargs.function_call?.name).toBe("get_weather"); -}); -test.skip("Test ChatMinimax Function calling ", async () => { - const weatherFunction = { - name: "get_weather", - description: " Get weather information.", - parameters: { - type: "object", - properties: { - location: { - type: "string", - description: " The location to get the weather", - }, - }, - required: ["location"], - }, - }; - - const model = new ChatMinimax({ - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }).bind({ - functions: [weatherFunction], - }); - - const result = await model.invoke([ - new HumanMessage({ - content: " What is the weather like in NewYork tomorrow?", - name: "I", - }), - ]); - - console.log(result); - expect(result.additional_kwargs.function_call?.name).toBe("get_weather"); -}); - -test.skip("Test ChatMinimax Glyph", async () => { - const model = new ChatMinimax({ - modelName: "abab5.5-chat", - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }).bind({ - replyConstraints: { - sender_type: "BOT", - sender_name: "MM Assistant", - glyph: { - type: "raw", - raw_glyph: "The translated text:{{gen 'content'}}", - }, - }, - }); - - const messagesTemplate = ChatPromptTemplate.fromMessages([ - HumanMessagePromptTemplate.fromTemplate( - " Please help me translate the following sentence in English: {text}" - ), - ]); - - const messages = await messagesTemplate.formatMessages({ text: "你好" }); - const result = await model.invoke(messages); - - console.log(result); - expect(result.content).toMatch(/The translated text:.*/); -}); -test.skip("Test ChatMinimax Plugins", async () => { - const model = new ChatMinimax({ - modelName: "abab5.5-chat", - botSetting: [ - { - bot_name: "MM Assistant", - content: "MM Assistant is an AI Assistant developed by minimax.", - }, - ], - }).bind({ - plugins: ["plugin_web_search"], - }); - - const result = await model.invoke([ - new HumanMessage({ - content: " What is the weather like in NewYork tomorrow?", - }), - ]); - - console.log(result); -}); diff --git a/libs/langchain-community/src/document_transformers/html_to_text.ts b/libs/langchain-community/src/document_transformers/html_to_text.ts index 9021ba68976a..8676517b65a9 100644 --- a/libs/langchain-community/src/document_transformers/html_to_text.ts +++ b/libs/langchain-community/src/document_transformers/html_to_text.ts @@ -1,7 +1,8 @@ -import { htmlToText } from "html-to-text"; -import type { HtmlToTextOptions } from "html-to-text"; -import { Document } from "@langchain/core/documents"; -import { MappingDocumentTransformer } from "@langchain/core/documents"; +import { htmlToText, type HtmlToTextOptions } from "html-to-text"; +import { + MappingDocumentTransformer, + Document, +} from "@langchain/core/documents"; /** * A transformer that converts HTML content to plain text. diff --git a/libs/langchain-community/src/document_transformers/mozilla_readability.ts b/libs/langchain-community/src/document_transformers/mozilla_readability.ts index 5d295ac85ddf..a26b42a6d6c7 100644 --- a/libs/langchain-community/src/document_transformers/mozilla_readability.ts +++ b/libs/langchain-community/src/document_transformers/mozilla_readability.ts @@ -1,8 +1,10 @@ import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; -import { Options } from "mozilla-readability"; -import { Document } from "@langchain/core/documents"; -import { MappingDocumentTransformer } from "@langchain/core/documents"; +import type { Options } from "mozilla-readability"; +import { + MappingDocumentTransformer, + Document, +} from "@langchain/core/documents"; /** * A transformer that uses the Mozilla Readability library to extract the diff --git a/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts b/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts deleted file mode 100644 index 3b47800fc323..000000000000 --- a/libs/langchain-community/src/graphs/tests/neo4j_graph.int.test.ts +++ /dev/null @@ -1,56 +0,0 @@ -/* eslint-disable no-process-env */ - -import { test } from "@jest/globals"; -import { Neo4jGraph } from "../neo4j_graph.js"; - -describe.skip("Neo4j Graph Tests", () => { - const url = process.env.NEO4J_URI as string; - const username = process.env.NEO4J_USERNAME as string; - const password = process.env.NEO4J_PASSWORD as string; - let graph: Neo4jGraph; - - beforeEach(async () => { - graph = await Neo4jGraph.initialize({ url, username, password }); - }); - afterEach(async () => { - await graph.close(); - }); - - test("Schema generation works correctly", async () => { - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - // Clear the database - await graph.query("MATCH (n) DETACH DELETE n"); - - await graph.query( - "CREATE (a:Actor {name:'Bruce Willis'})" + - "-[:ACTED_IN {roles: ['Butch Coolidge']}]->(:Movie {title: 'Pulp Fiction'})" - ); - - await graph.refreshSchema(); - console.log(graph.getSchema()); - - // expect(graph.getSchema()).toMatchInlineSnapshot(` - // "Node properties are the following: - // Actor {name: STRING}, Movie {title: STRING} - // Relationship properties are the following: - // ACTED_IN {roles: LIST} - // The relationships are the following: - // (:Actor)-[:ACTED_IN]->(:Movie)" - // `); - }); - - test("Test that Neo4j database is correctly instantiated and connected", async () => { - expect(url).toBeDefined(); - expect(username).toBeDefined(); - expect(password).toBeDefined(); - - // Integers are casted to strings in the output - const expectedOutput = [{ output: { str: "test", int: "1" } }]; - const res = await graph.query('RETURN {str: "test", int: 1} AS output'); - await graph.close(); - expect(res).toEqual(expectedOutput); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/ai21.int.test.ts b/libs/langchain-community/src/llms/tests/ai21.int.test.ts deleted file mode 100644 index a922bb68a33b..000000000000 --- a/libs/langchain-community/src/llms/tests/ai21.int.test.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { test, describe, expect } from "@jest/globals"; -import { AI21 } from "../ai21.js"; - -describe.skip("AI21", () => { - test("test call", async () => { - const ai21 = new AI21({}); - const result = await ai21.call( - "What is a good name for a company that makes colorful socks?" - ); - console.log({ result }); - }); - - test("test translation call", async () => { - const ai21 = new AI21({}); - const result = await ai21.call( - `Translate "I love programming" into German.` - ); - console.log({ result }); - }); - - test("test JSON output call", async () => { - const ai21 = new AI21({}); - const result = await ai21.call( - `Output a JSON object with three string fields: "name", "birthplace", "bio".` - ); - console.log({ result }); - }); - - test("should abort the request", async () => { - const ai21 = new AI21({}); - const controller = new AbortController(); - - await expect(() => { - const ret = ai21.call("Respond with an extremely verbose response", { - signal: controller.signal, - }); - controller.abort(); - return ret; - }).rejects.toThrow("AbortError: This operation was aborted"); - }); - - test("throws an error when response status is not ok", async () => { - const ai21 = new AI21({ - ai21ApiKey: "BAD_KEY", - }); - - await expect(ai21.call("Test prompt")).rejects.toThrow( - "AI21 call failed with status code 401" - ); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts b/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts deleted file mode 100644 index 203e5ca3a12f..000000000000 --- a/libs/langchain-community/src/llms/tests/aleph_alpha.int.test.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { test, describe, expect } from "@jest/globals"; -import { AlephAlpha } from "../aleph_alpha.js"; - -describe("Aleph Alpha", () => { - test("test call", async () => { - const aleph_alpha = new AlephAlpha({}); - const result = await aleph_alpha.call( - "What is a good name for a company that makes colorful socks?" - ); - console.log({ result }); - }); - - test("test translation call", async () => { - const aleph_alpha = new AlephAlpha({}); - const result = await aleph_alpha.call( - `Translate "I love programming" into German.` - ); - console.log({ result }); - }); - - test("test JSON output call", async () => { - const aleph_alpha = new AlephAlpha({}); - const result = await aleph_alpha.call( - `Output a JSON object with three string fields: "name", "birthplace", "bio".` - ); - console.log({ result }); - }); - - test("should abort the request", async () => { - const aleph_alpha = new AlephAlpha({}); - const controller = new AbortController(); - - await expect(() => { - const ret = aleph_alpha.call( - "Respond with an extremely verbose response", - { - signal: controller.signal, - } - ); - controller.abort(); - return ret; - }).rejects.toThrow("AbortError: This operation was aborted"); - }); - - test("throws an error when response status is not ok", async () => { - const aleph_alpha = new AlephAlpha({ - aleph_alpha_api_key: "BAD_KEY", - }); - - await expect(aleph_alpha.call("Test prompt")).rejects.toThrow( - 'Aleph Alpha call failed with status 401 and body {"error":"InvalidToken","code":"UNAUTHENTICATED"}' - ); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/bedrock.int.test.ts b/libs/langchain-community/src/llms/tests/bedrock.int.test.ts deleted file mode 100644 index feb2e133028a..000000000000 --- a/libs/langchain-community/src/llms/tests/bedrock.int.test.ts +++ /dev/null @@ -1,182 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ - -import { test, expect } from "@jest/globals"; -import { Bedrock } from "../bedrock/index.js"; - -test("Test Bedrock LLM: AI21", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "ai21.j2-grande-instruct"; - const prompt = "Human: What is your name?"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const res = await bedrock.call(prompt); - expect(typeof res).toBe("string"); - - console.log(res); -}); - -test.skip("Test Bedrock LLM: Meta Llama2", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "meta.llama2-13b-chat-v1"; - const prompt = "Human: What is your name?"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const res = await bedrock.call(prompt); - expect(typeof res).toBe("string"); - - console.log(res); -}); - -test.skip("Test Bedrock LLM streaming: Meta Llama2", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "meta.llama2-13b-chat-v1"; - const prompt = "What is your name?"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const stream = await bedrock.stream(prompt); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); - -test("Test Bedrock LLM: Claude-v2", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "anthropic.claude-v2"; - const prompt = "Human: What is your name?\n\nAssistant:"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const res = await bedrock.call(prompt); - expect(typeof res).toBe("string"); - console.log(res); -}); - -test("Test Bedrock LLM streaming: AI21", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "ai21.j2-grande-instruct"; - const prompt = "Human: What is your name?"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const stream = await bedrock.stream(prompt); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toEqual(1); -}); - -test("Test Bedrock LLM handleLLMNewToken: Claude-v2", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "anthropic.claude-v2"; - const prompt = "Human: What is your name?\n\nAssistant:"; - const tokens: string[] = []; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - streaming: true, - callbacks: [ - { - handleLLMNewToken(token) { - tokens.push(token); - }, - }, - ], - }); - - const stream = await bedrock.call(prompt); - expect(tokens.length).toBeGreaterThan(1); - expect(stream).toEqual(tokens.join("")); -}); - -test("Test Bedrock LLM streaming: Claude-v2", async () => { - const region = process.env.BEDROCK_AWS_REGION!; - const model = "anthropic.claude-v2"; - const prompt = "Human: What is your name?\n\nAssistant:"; - - const bedrock = new Bedrock({ - maxTokens: 20, - region, - model, - maxRetries: 0, - credentials: { - accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!, - sessionToken: process.env.BEDROCK_AWS_SESSION_TOKEN, - }, - }); - - const stream = await bedrock.stream(prompt); - const chunks = []; - for await (const chunk of stream) { - console.log(chunk); - chunks.push(chunk); - } - expect(chunks.length).toBeGreaterThan(1); -}); diff --git a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts b/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts deleted file mode 100644 index e43b953cee0c..000000000000 --- a/libs/langchain-community/src/llms/tests/cloudflare_workersai.int.test.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { test } from "@jest/globals"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { CloudflareWorkersAI } from "../cloudflare_workersai.js"; - -test("Test CloudflareWorkersAI", async () => { - const model = new CloudflareWorkersAI({}); - const res = await model.call("1 + 1 ="); - console.log(res); -}, 50000); - -test("generate with streaming true", async () => { - const model = new CloudflareWorkersAI({ - streaming: true, - }); - const tokens: string[] = []; - const res = await model.call("What is 2 + 2?", { - callbacks: [ - { - handleLLMNewToken: (token) => { - console.log(token); - tokens.push(token); - }, - }, - ], - }); - expect(tokens.length).toBeGreaterThan(1); - expect(tokens.join("")).toEqual(res); -}); - -test("Test CloudflareWorkersAI streaming", async () => { - const model = new CloudflareWorkersAI({}); - const stream = await model.stream("What is 2 + 2?"); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - console.log(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - console.log(chunks.join("")); -}, 50000); - -test.skip("Test custom base url", async () => { - const model = new CloudflareWorkersAI({ - baseUrl: `https://gateway.ai.cloudflare.com/v1/${getEnvironmentVariable( - "CLOUDFLARE_ACCOUNT_ID" - )}/lang-chainjs/workers-ai/`, - }); - const res = await model.call("1 + 1 ="); - console.log(res); -}); diff --git a/libs/langchain-community/src/llms/tests/cohere.int.test.ts b/libs/langchain-community/src/llms/tests/cohere.int.test.ts deleted file mode 100644 index 2964d63804e2..000000000000 --- a/libs/langchain-community/src/llms/tests/cohere.int.test.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { test } from "@jest/globals"; -import { Cohere } from "../cohere.js"; - -test("Test Cohere", async () => { - const model = new Cohere({ maxTokens: 20 }); - const res = await model.call("1 + 1 ="); - console.log(res); -}, 50000); diff --git a/libs/langchain-community/src/llms/tests/fireworks.int.test.ts b/libs/langchain-community/src/llms/tests/fireworks.int.test.ts deleted file mode 100644 index 9eb2f604288f..000000000000 --- a/libs/langchain-community/src/llms/tests/fireworks.int.test.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { Fireworks } from "../fireworks.js"; - -describe("Fireworks", () => { - test("call", async () => { - const model = new Fireworks({ maxTokens: 50 }); - const res = await model.call("1 + 1 = "); - console.log({ res }); - }); - - test("generate", async () => { - const model = new Fireworks({ maxTokens: 50 }); - const res = await model.generate(["1 + 1 = "]); - console.log(JSON.stringify(res, null, 2)); - - await expect( - async () => await model.generate(["1 + 1 = ", "2 + 2 = "]) - ).rejects.toThrow(); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts b/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts deleted file mode 100644 index 653d955db68a..000000000000 --- a/libs/langchain-community/src/llms/tests/googlepalm.int.test.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { test } from "@jest/globals"; -import { GooglePaLM } from "../googlepalm.js"; - -test.skip("Test Google Palm", async () => { - const model = new GooglePaLM(); - const res = await model.call("what is 1 + 1?"); - console.log({ res }); - expect(res).toBeTruthy(); -}); - -test.skip("Test Google Palm generation", async () => { - const model = new GooglePaLM(); - const res = await model.generate(["what is 1 + 1?"]); - console.log(JSON.stringify(res, null, 2)); - expect(res).toBeTruthy(); -}); - -test.skip("Test Google Palm generation", async () => { - const model = new GooglePaLM(); - const res = await model.generate(["Print hello world."]); - console.log(JSON.stringify(res, null, 2)); - expect(res).toBeTruthy(); -}); - -test.skip("Test Google Palm generation", async () => { - const model = new GooglePaLM(); - const res = await model.generate([ - `Translate "I love programming" into Korean.`, - ]); - console.log(JSON.stringify(res, null, 2)); - expect(res).toBeTruthy(); -}); diff --git a/libs/langchain-community/src/llms/tests/googlepalm.test.ts b/libs/langchain-community/src/llms/tests/googlepalm.test.ts deleted file mode 100644 index ac28f763b40a..000000000000 --- a/libs/langchain-community/src/llms/tests/googlepalm.test.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { test } from "@jest/globals"; -import { GooglePaLM } from "../googlepalm.js"; - -test("Google Palm - `temperature` must be in range [0.0,1.0]", async () => { - expect( - () => - new GooglePaLM({ - temperature: -1.0, - }) - ).toThrow(); - expect( - () => - new GooglePaLM({ - temperature: 1.1, - }) - ).toThrow(); -}); - -test("Google Palm - `maxOutputTokens` must be positive", async () => { - expect( - () => - new GooglePaLM({ - maxOutputTokens: -1, - }) - ).toThrow(); -}); - -test("Google Palm - `topP` must be positive", async () => { - expect( - () => - new GooglePaLM({ - topP: -1, - }) - ).toThrow(); -}); - -test("Google Palm - `topP` must be in the range [0,1]", async () => { - expect( - () => - new GooglePaLM({ - topP: 3, - }) - ).toThrow(); -}); - -test("Google Palm - `topK` must be positive", async () => { - expect( - () => - new GooglePaLM({ - topK: -1, - }) - ).toThrow(); -}); - -test("Google Palm - `safetySettings` category array must be unique", async () => { - expect( - () => - new GooglePaLM({ - safetySettings: [ - { - category: "HARM_CATEGORY_DANGEROUS", - threshold: 1, - }, - { - category: "HARM_CATEGORY_DANGEROUS", - threshold: 2, - }, - { - category: "HARM_CATEGORY_DEROGATORY", - threshold: 1, - }, - ], - }) - ).toThrow(); -}); diff --git a/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts b/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts deleted file mode 100644 index 8d601101d290..000000000000 --- a/libs/langchain-community/src/llms/tests/googlevertexai.int.test.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { GoogleVertexAI } from "../googlevertexai/index.js"; - -describe("Vertex AI", () => { - test("Test Google Vertex", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.call("1 + 1 = "); - console.log({ res }); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate(["1 + 1 = "]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate(["Print hello world."]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate([ - `Translate "I love programming" into Korean.`, - ]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex Codey gecko model", async () => { - const model = new GoogleVertexAI({ model: "code-gecko" }); - expect(model.model).toEqual("code-gecko"); - expect(model.temperature).toEqual(0.2); - expect(model.maxOutputTokens).toEqual(64); - - const res = await model.call("for( let co = 0"); - console.log(res); - }); - - test("Test Google Vertex Codey bison model", async () => { - const model = new GoogleVertexAI({ - model: "code-bison", - maxOutputTokens: 2048, - }); - expect(model.model).toEqual("code-bison"); - - const res = await model.call("Count to 10 in JavaScript."); - console.log(res); - }); - - test("Test Google Vertex bison-32k model", async () => { - const model = new GoogleVertexAI({ - model: "text-bison-32k", - maxOutputTokens: 50, - }); - const res = await model.call("1 + 1 = "); - console.log({ res }); - }); - - test("streaming text", async () => { - const model = new GoogleVertexAI({ - model: "text-bison", - maxOutputTokens: 2048, - }); - - const stream = await model.stream( - "What is the answer to life, the universe, and everything. Be Verbose." - ); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - console.log("chunk", chunk); - } - expect(chunks.length).toBeGreaterThan(1); - expect(chunks[chunks.length - 1]).toEqual(""); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts b/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts deleted file mode 100644 index 553c47d75106..000000000000 --- a/libs/langchain-community/src/llms/tests/googlevertexai_web.int.test.ts +++ /dev/null @@ -1,77 +0,0 @@ -import { expect, test } from "@jest/globals"; -import { GoogleVertexAI } from "../googlevertexai/web.js"; - -describe("Web Vertex AI", () => { - test("Test Google Vertex", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.call("1 + 1 = "); - console.log({ res }); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate(["1 + 1 = "]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate(["Print hello world."]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex generation", async () => { - const model = new GoogleVertexAI({ maxOutputTokens: 50 }); - const res = await model.generate([ - `Translate "I love programming" into Korean.`, - ]); - console.log(JSON.stringify(res, null, 2)); - }); - - test("Test Google Vertex Codey gecko model", async () => { - const model = new GoogleVertexAI({ model: "code-gecko" }); - expect(model.model).toEqual("code-gecko"); - expect(model.temperature).toEqual(0.2); - expect(model.maxOutputTokens).toEqual(64); - - const res = await model.call("for( let co = 0"); - console.log(res); - }); - - test("Test Google Vertex Codey bison model", async () => { - const model = new GoogleVertexAI({ - model: "code-bison", - maxOutputTokens: 2048, - }); - expect(model.model).toEqual("code-bison"); - - const res = await model.call("Count to 10 in JavaScript."); - console.log(res); - }); - - test("Test Google Vertex bison-32k model", async () => { - const model = new GoogleVertexAI({ - model: "text-bison-32k", - maxOutputTokens: 50, - }); - const res = await model.call("1 + 1 = "); - console.log({ res }); - }); - - test("Test Google Vertex stream returns one chunk", async () => { - const model = new GoogleVertexAI({ - model: "text-bison", - maxOutputTokens: 2048, - }); - - const stream = await model.stream( - "What is the answer to life, the universe, and everything?" - ); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - console.log(chunk); - } - expect(chunks.length).toBeGreaterThan(1); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts b/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts deleted file mode 100644 index f2d061bb339b..000000000000 --- a/libs/langchain-community/src/llms/tests/huggingface_hub.int.test.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { test } from "@jest/globals"; -import { HuggingFaceInference } from "../hf.js"; - -test("Test HuggingFace", async () => { - const model = new HuggingFaceInference({ temperature: 0.1, topP: 0.5 }); - const res = await model.call("1 + 1 ="); - console.log(res); -}, 50000); diff --git a/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts b/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts deleted file mode 100644 index a1686ae0814f..000000000000 --- a/libs/langchain-community/src/llms/tests/llama_cpp.int.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { test } from "@jest/globals"; -import { getEnvironmentVariable } from "@langchain/core/utils/env"; -import { LlamaCpp } from "../llama_cpp.js"; - -const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; - -test.skip("Test Llama_CPP", async () => { - const model = new LlamaCpp({ modelPath: llamaPath }); - const res = await model.call("Where do Llamas live?"); - console.log(res); -}, 100000); - -test.skip("Test Llama_CPP", async () => { - const model = new LlamaCpp({ modelPath: llamaPath }); - const res = await model.call("Where do Pandas live?"); - console.log(res); -}, 100000); - -test.skip("Test Llama_CPP", async () => { - const model = new LlamaCpp({ modelPath: llamaPath }); - - // Attempt to make several queries and make sure that the system prompt - // is not returned as part of any follow-on query. - for (let i = 0; i < 5; i += 1) { - const res = await model.call("Where do Pandas live?"); - expect(res).not.toContain( - "You are a helpful, respectful and honest assistant." - ); - } -}, 100000); - -test.skip("Test Llama_CPP", async () => { - const model = new LlamaCpp({ modelPath: llamaPath, temperature: 0.7 }); - - const stream = await model.stream( - "Tell me a short story about a happy Llama." - ); - - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - process.stdout.write(chunks.join("")); - } - - expect(chunks.length).toBeGreaterThan(1); -}); diff --git a/libs/langchain-community/src/llms/tests/ollama.int.test.ts b/libs/langchain-community/src/llms/tests/ollama.int.test.ts deleted file mode 100644 index b4c3d66118b4..000000000000 --- a/libs/langchain-community/src/llms/tests/ollama.int.test.ts +++ /dev/null @@ -1,113 +0,0 @@ -import { test } from "@jest/globals"; -import { PromptTemplate } from "@langchain/core/prompts"; -import { - BytesOutputParser, - StringOutputParser, -} from "@langchain/core/output_parsers"; -import { Ollama } from "../ollama.js"; - -test.skip("test call", async () => { - const ollama = new Ollama({}); - const result = await ollama.call( - "What is a good name for a company that makes colorful socks?" - ); - console.log({ result }); -}); - -test.skip("test call with callback", async () => { - const ollama = new Ollama({ - baseUrl: "http://localhost:11434", - }); - const tokens: string[] = []; - const result = await ollama.predict( - "What is a good name for a company that makes colorful socks?", - { - callbacks: [ - { - handleLLMNewToken(token) { - tokens.push(token); - }, - }, - ], - } - ); - expect(tokens.length).toBeGreaterThan(1); - expect(result).toEqual(tokens.join("")); -}); - -test.skip("test streaming call", async () => { - const ollama = new Ollama({ - baseUrl: "http://localhost:11434", - }); - const stream = await ollama.stream( - `Translate "I love programming" into German.` - ); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - console.log(chunks.join("")); - expect(chunks.length).toBeGreaterThan(1); -}); - -test.skip("should abort the request", async () => { - const ollama = new Ollama({ - baseUrl: "http://localhost:11434", - }); - const controller = new AbortController(); - - await expect(() => { - const ret = ollama.call("Respond with an extremely verbose response", { - signal: controller.signal, - }); - controller.abort(); - return ret; - }).rejects.toThrow("This operation was aborted"); -}); - -test.skip("should stream through with a bytes output parser", async () => { - const TEMPLATE = `You are a pirate named Patchy. All responses must be extremely verbose and in pirate dialect. - - User: {input} - AI:`; - - const prompt = PromptTemplate.fromTemplate(TEMPLATE); - - const ollama = new Ollama({ - model: "llama2", - baseUrl: "http://127.0.0.1:11434", - }); - const outputParser = new BytesOutputParser(); - const chain = prompt.pipe(ollama).pipe(outputParser); - const stream = await chain.stream({ - input: `Translate "I love programming" into German.`, - }); - const chunks = []; - for await (const chunk of stream) { - chunks.push(chunk); - } - console.log(chunks.join("")); - expect(chunks.length).toBeGreaterThan(1); -}); - -test.skip("JSON mode", async () => { - const TEMPLATE = `You are a pirate named Patchy. All responses must be in pirate dialect and in JSON format, with a property named "response" followed by the value. - - User: {input} - AI:`; - - // Infer the input variables from the template - const prompt = PromptTemplate.fromTemplate(TEMPLATE); - - const ollama = new Ollama({ - model: "llama2", - baseUrl: "http://127.0.0.1:11434", - format: "json", - }); - const outputParser = new StringOutputParser(); - const chain = prompt.pipe(ollama).pipe(outputParser); - const res = await chain.invoke({ - input: `Translate "I love programming" into German.`, - }); - expect(JSON.parse(res).response).toBeDefined(); -}); diff --git a/libs/langchain-community/src/llms/tests/replicate.int.test.ts b/libs/langchain-community/src/llms/tests/replicate.int.test.ts deleted file mode 100644 index c4c389277a4e..000000000000 --- a/libs/langchain-community/src/llms/tests/replicate.int.test.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { test, expect } from "@jest/globals"; -import { Replicate } from "../replicate.js"; - -// Test skipped because Replicate appears to be timing out often when called -test("Test Replicate", async () => { - const model = new Replicate({ - model: - "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", - input: { - max_length: 10, - }, - }); - - const res = await model.call("Hello, my name is "); - - console.log({ res }); - - expect(typeof res).toBe("string"); -}); - -test("Serialise Replicate", () => { - const model = new Replicate({ - model: - "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", - input: { - max_length: 10, - }, - }); - - const serialised = JSON.stringify(model.toJSON()); - - expect(JSON.parse(serialised)).toMatchInlineSnapshot(` - { - "id": [ - "langchain", - "llms", - "replicate", - "Replicate", - ], - "kwargs": { - "api_key": { - "id": [ - "REPLICATE_API_TOKEN", - ], - "lc": 1, - "type": "secret", - }, - "input": { - "max_length": 10, - }, - "model": "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5", - }, - "lc": 1, - "type": "constructor", - } - `); -}); diff --git a/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts b/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts deleted file mode 100644 index ff7d4dd8a96c..000000000000 --- a/libs/langchain-community/src/llms/tests/sagemaker_endpoint.int.test.ts +++ /dev/null @@ -1,133 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { expect, test } from "@jest/globals"; -import { - SageMakerEndpoint, - SageMakerLLMContentHandler, -} from "../sagemaker_endpoint.js"; - -// yarn test:single /{path_to}/langchain/src/llms/tests/sagemaker.int.test.ts -describe.skip("Test SageMaker LLM", () => { - test("without streaming", async () => { - interface ResponseJsonInterface { - generation: { - content: string; - }; - } - - class LLama213BHandler implements SageMakerLLMContentHandler { - contentType = "application/json"; - - accepts = "application/json"; - - async transformInput( - prompt: string, - modelKwargs: Record - ): Promise { - const payload = { - inputs: [[{ role: "user", content: prompt }]], - parameters: modelKwargs, - }; - - const input_str = JSON.stringify(payload); - - return new TextEncoder().encode(input_str); - } - - async transformOutput(output: Uint8Array): Promise { - const response_json = JSON.parse( - new TextDecoder("utf-8").decode(output) - ) as ResponseJsonInterface[]; - const content = response_json[0]?.generation.content ?? ""; - return content; - } - } - - const contentHandler = new LLama213BHandler(); - const model = new SageMakerEndpoint({ - endpointName: "aws-productbot-ai-dev-llama-2-13b-chat", - streaming: false, - modelKwargs: { - temperature: 0.5, - max_new_tokens: 700, - top_p: 0.9, - }, - endpointKwargs: { - CustomAttributes: "accept_eula=true", - }, - contentHandler, - clientOptions: { - region: "us-east-1", - credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, - }, - }, - }); - - const response = await model.call( - "hello, my name is John Doe, tell me a fun story about llamas." - ); - - expect(response.length).toBeGreaterThan(0); - }); - - test("with streaming", async () => { - class LLama213BHandler implements SageMakerLLMContentHandler { - contentType = "application/json"; - - accepts = "application/json"; - - async transformInput( - prompt: string, - modelKwargs: Record - ): Promise { - const payload = { - inputs: [[{ role: "user", content: prompt }]], - parameters: modelKwargs, - }; - - const input_str = JSON.stringify(payload); - - return new TextEncoder().encode(input_str); - } - - async transformOutput(output: Uint8Array): Promise { - return new TextDecoder("utf-8").decode(output); - } - } - - const contentHandler = new LLama213BHandler(); - const model = new SageMakerEndpoint({ - endpointName: "aws-productbot-ai-dev-llama-2-13b-chat", - streaming: true, // specify streaming - modelKwargs: { - temperature: 0.5, - max_new_tokens: 700, - top_p: 0.9, - }, - endpointKwargs: { - CustomAttributes: "accept_eula=true", - }, - contentHandler, - clientOptions: { - region: "us-east-1", - credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, - }, - }, - }); - - const response = await model.call( - "hello, my name is John Doe, tell me a fun story about llamas in 3 paragraphs" - ); - - const chunks = []; - for await (const chunk of response) { - chunks.push(chunk); - } - - expect(response.length).toBeGreaterThan(0); - }); -}); diff --git a/libs/langchain-community/src/llms/tests/writer.int.test.ts b/libs/langchain-community/src/llms/tests/writer.int.test.ts deleted file mode 100644 index 9c45c789aafe..000000000000 --- a/libs/langchain-community/src/llms/tests/writer.int.test.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { test } from "@jest/globals"; -import { Writer } from "../writer.js"; - -test.skip("Test Writer", async () => { - const model = new Writer({ maxTokens: 20 }); - const res = await model.invoke("1 + 1 ="); - console.log(res); -}, 50000); diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index ed5bdb480ebb..9be7c339c8ef 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -32,23 +32,37 @@ export const optionalImportEntrypoints = [ "langchain_community/vectorstores/cassandra", "langchain_community/vectorstores/chroma", "langchain_community/vectorstores/clickhouse", + "langchain_community/vectorstores/closevector/node", + "langchain_community/vectorstores/closevector/web", "langchain_community/vectorstores/cloudflare_vectorize", "langchain_community/vectorstores/convex", "langchain_community/vectorstores/elasticsearch", + "langchain_community/vectorstores/faiss", + "langchain_community/vectorstores/googlevertexai", + "langchain_community/vectorstores/hnswlib", "langchain_community/vectorstores/lancedb", "langchain_community/vectorstores/milvus", + "langchain_community/vectorstores/momento_vector_index", + "langchain_community/vectorstores/mongodb_atlas", "langchain_community/vectorstores/myscale", "langchain_community/vectorstores/neo4j_vector", "langchain_community/vectorstores/opensearch", + "langchain_community/vectorstores/pgvector", + "langchain_community/vectorstores/pinecone", "langchain_community/vectorstores/qdrant", "langchain_community/vectorstores/redis", "langchain_community/vectorstores/rockset", "langchain_community/vectorstores/singlestore", + "langchain_community/vectorstores/supabase", "langchain_community/vectorstores/tigris", "langchain_community/vectorstores/typeorm", "langchain_community/vectorstores/typesense", + "langchain_community/vectorstores/usearch", "langchain_community/vectorstores/vercel_postgres", "langchain_community/vectorstores/voy", + "langchain_community/vectorstores/weaviate", + "langchain_community/vectorstores/xata", + "langchain_community/vectorstores/zep", "langchain_community/chat_models/bedrock", "langchain_community/chat_models/bedrock/web", "langchain_community/chat_models/googlevertexai", diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 67f965138d37..4d41c56fcf5f 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -27,7 +27,6 @@ export * as llms__ollama from "../llms/ollama.js"; export * as llms__yandex from "../llms/yandex.js"; export * as vectorstores__prisma from "../vectorstores/prisma.js"; export * as vectorstores__vectara from "../vectorstores/vectara.js"; -export * as vectorstores__xata from "../vectorstores/xata.js"; export * as chat_models__baiduwenxin from "../chat_models/baiduwenxin.js"; export * as chat_models__cloudflare_workersai from "../chat_models/cloudflare_workersai.js"; export * as chat_models__fireworks from "../chat_models/fireworks.js"; @@ -41,3 +40,5 @@ export * as caches__cloudflare_kv from "../caches/cloudflare_kv.js"; export * as caches__momento from "../caches/momento.js"; export * as caches__upstash_redis from "../caches/upstash_redis.js"; export * as utils__event_source_parse from "../utils/event_source_parse.js"; +export * as stores__doc__base from "../stores/doc/base.js"; +export * as stores__doc__in_memory from "../stores/doc/in_memory.js"; diff --git a/libs/langchain-community/src/load/import_type.d.ts b/libs/langchain-community/src/load/import_type.d.ts index 71530d0e8030..01637a94f0b2 100644 --- a/libs/langchain-community/src/load/import_type.d.ts +++ b/libs/langchain-community/src/load/import_type.d.ts @@ -94,6 +94,12 @@ export interface OptionalImportMap { "@langchain/community/vectorstores/clickhouse"?: | typeof import("../vectorstores/clickhouse.js") | Promise; + "@langchain/community/vectorstores/closevector/node"?: + | typeof import("../vectorstores/closevector/node.js") + | Promise; + "@langchain/community/vectorstores/closevector/web"?: + | typeof import("../vectorstores/closevector/web.js") + | Promise; "@langchain/community/vectorstores/cloudflare_vectorize"?: | typeof import("../vectorstores/cloudflare_vectorize.js") | Promise; @@ -103,12 +109,27 @@ export interface OptionalImportMap { "@langchain/community/vectorstores/elasticsearch"?: | typeof import("../vectorstores/elasticsearch.js") | Promise; + "@langchain/community/vectorstores/faiss"?: + | typeof import("../vectorstores/faiss.js") + | Promise; + "@langchain/community/vectorstores/googlevertexai"?: + | typeof import("../vectorstores/googlevertexai.js") + | Promise; + "@langchain/community/vectorstores/hnswlib"?: + | typeof import("../vectorstores/hnswlib.js") + | Promise; "@langchain/community/vectorstores/lancedb"?: | typeof import("../vectorstores/lancedb.js") | Promise; "@langchain/community/vectorstores/milvus"?: | typeof import("../vectorstores/milvus.js") | Promise; + "@langchain/community/vectorstores/momento_vector_index"?: + | typeof import("../vectorstores/momento_vector_index.js") + | Promise; + "@langchain/community/vectorstores/mongodb_atlas"?: + | typeof import("../vectorstores/mongodb_atlas.js") + | Promise; "@langchain/community/vectorstores/myscale"?: | typeof import("../vectorstores/myscale.js") | Promise; @@ -118,6 +139,12 @@ export interface OptionalImportMap { "@langchain/community/vectorstores/opensearch"?: | typeof import("../vectorstores/opensearch.js") | Promise; + "@langchain/community/vectorstores/pgvector"?: + | typeof import("../vectorstores/pgvector.js") + | Promise; + "@langchain/community/vectorstores/pinecone"?: + | typeof import("../vectorstores/pinecone.js") + | Promise; "@langchain/community/vectorstores/qdrant"?: | typeof import("../vectorstores/qdrant.js") | Promise; @@ -130,6 +157,9 @@ export interface OptionalImportMap { "@langchain/community/vectorstores/singlestore"?: | typeof import("../vectorstores/singlestore.js") | Promise; + "@langchain/community/vectorstores/supabase"?: + | typeof import("../vectorstores/supabase.js") + | Promise; "@langchain/community/vectorstores/tigris"?: | typeof import("../vectorstores/tigris.js") | Promise; @@ -139,12 +169,24 @@ export interface OptionalImportMap { "@langchain/community/vectorstores/typesense"?: | typeof import("../vectorstores/typesense.js") | Promise; + "@langchain/community/vectorstores/usearch"?: + | typeof import("../vectorstores/usearch.js") + | Promise; "@langchain/community/vectorstores/vercel_postgres"?: | typeof import("../vectorstores/vercel_postgres.js") | Promise; "@langchain/community/vectorstores/voy"?: | typeof import("../vectorstores/voy.js") | Promise; + "@langchain/community/vectorstores/weaviate"?: + | typeof import("../vectorstores/weaviate.js") + | Promise; + "@langchain/community/vectorstores/xata"?: + | typeof import("../vectorstores/xata.js") + | Promise; + "@langchain/community/vectorstores/zep"?: + | typeof import("../vectorstores/zep.js") + | Promise; "@langchain/community/chat_models/bedrock"?: | typeof import("../chat_models/bedrock/index.js") | Promise; diff --git a/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts b/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts deleted file mode 100644 index be3f1b3d0701..000000000000 --- a/libs/langchain-community/src/retrievers/tests/amazon_kendra.int.test.ts +++ /dev/null @@ -1,22 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { test } from "@jest/globals"; -import { AmazonKendraRetriever } from "../amazon_kendra.js"; - -test.skip("AmazonKendraRetriever", async () => { - const retriever = new AmazonKendraRetriever({ - topK: 10, - indexId: "5c0fcb10-9573-42df-8846-e30d69004ec5", - region: "us-east-2", - clientOptions: { - credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, - }, - }, - }); - - const docs = await retriever.getRelevantDocuments("How are clouds formed?"); - - console.log(docs); -}); diff --git a/libs/langchain-community/src/retrievers/tests/metal.int.test.ts b/libs/langchain-community/src/retrievers/tests/metal.int.test.ts deleted file mode 100644 index f462984ba66f..000000000000 --- a/libs/langchain-community/src/retrievers/tests/metal.int.test.ts +++ /dev/null @@ -1,22 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { test, expect } from "@jest/globals"; -import Metal from "@getmetal/metal-sdk"; - -import { MetalRetriever } from "../metal.js"; - -test("MetalRetriever", async () => { - const MetalSDK = Metal; - const client = new MetalSDK( - process.env.METAL_API_KEY!, - process.env.METAL_CLIENT_ID!, - process.env.METAL_INDEX_ID - ); - const retriever = new MetalRetriever({ client }); - - const docs = await retriever.getRelevantDocuments("hello"); - - expect(docs.length).toBeGreaterThan(0); - - console.log(docs); -}); diff --git a/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts b/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts deleted file mode 100644 index a6ff76833074..000000000000 --- a/libs/langchain-community/src/retrievers/tests/supabase.int.test.ts +++ /dev/null @@ -1,27 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { test, expect } from "@jest/globals"; -import { createClient } from "@supabase/supabase-js"; -import { OpenAIEmbeddings } from "@langchain/openai"; -import { SupabaseHybridSearch } from "../supabase.js"; - -test("Supabase hybrid keyword search", async () => { - const client = createClient( - process.env.SUPABASE_URL!, - process.env.SUPABASE_PRIVATE_KEY! - ); - - const embeddings = new OpenAIEmbeddings(); - - const retriever = new SupabaseHybridSearch(embeddings, { - client, - similarityK: 2, - keywordK: 2, - }); - - expect(retriever).toBeDefined(); - - const results = await retriever.getRelevantDocuments("hello bye"); - - expect(results.length).toBeGreaterThan(0); -}); diff --git a/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts b/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts deleted file mode 100644 index fd71faed7ee3..000000000000 --- a/libs/langchain-community/src/retrievers/tests/tavily_search_api.int.test.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* eslint-disable no-process-env */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -import { test, expect } from "@jest/globals"; - -import { TavilySearchAPIRetriever } from "../tavily_search_api.js"; - -test.skip("TavilySearchAPIRetriever", async () => { - const retriever = new TavilySearchAPIRetriever({ - includeImages: true, - includeRawContent: true, - }); - - const docs = await retriever.getRelevantDocuments("what bear is best?"); - expect(docs.length).toBeGreaterThan(0); - - console.log(docs); -}); diff --git a/libs/langchain-community/src/stores/doc/base.ts b/libs/langchain-community/src/stores/doc/base.ts new file mode 100644 index 000000000000..17b0c354c0f0 --- /dev/null +++ b/libs/langchain-community/src/stores/doc/base.ts @@ -0,0 +1,11 @@ +import { Document } from "@langchain/core/documents"; + +/** + * Abstract class for a document store. All document stores should extend + * this class. + */ +export abstract class Docstore { + abstract search(search: string): Promise; + + abstract add(texts: Record): Promise; +} diff --git a/libs/langchain-community/src/stores/doc/in_memory.ts b/libs/langchain-community/src/stores/doc/in_memory.ts new file mode 100644 index 000000000000..3a5be2949ec4 --- /dev/null +++ b/libs/langchain-community/src/stores/doc/in_memory.ts @@ -0,0 +1,113 @@ +import { Document } from "@langchain/core/documents"; +import { BaseStoreInterface } from "@langchain/core/stores"; +import { Docstore } from "./base.js"; + +/** + * Class for storing and retrieving documents in memory asynchronously. + * Extends the Docstore class. + */ +export class InMemoryDocstore + extends Docstore + implements BaseStoreInterface +{ + _docs: Map; + + constructor(docs?: Map) { + super(); + this._docs = docs ?? new Map(); + } + + /** + * Searches for a document in the store based on its ID. + * @param search The ID of the document to search for. + * @returns The document with the given ID. + */ + async search(search: string): Promise { + const result = this._docs.get(search); + if (!result) { + throw new Error(`ID ${search} not found.`); + } else { + return result; + } + } + + /** + * Adds new documents to the store. + * @param texts An object where the keys are document IDs and the values are the documents themselves. + * @returns Void + */ + async add(texts: Record): Promise { + const keys = [...this._docs.keys()]; + const overlapping = Object.keys(texts).filter((x) => keys.includes(x)); + + if (overlapping.length > 0) { + throw new Error(`Tried to add ids that already exist: ${overlapping}`); + } + + for (const [key, value] of Object.entries(texts)) { + this._docs.set(key, value); + } + } + + async mget(keys: string[]): Promise { + return Promise.all(keys.map((key) => this.search(key))); + } + + async mset(keyValuePairs: [string, Document][]): Promise { + await Promise.all( + keyValuePairs.map(([key, value]) => this.add({ [key]: value })) + ); + } + + async mdelete(_keys: string[]): Promise { + throw new Error("Not implemented."); + } + + // eslint-disable-next-line require-yield + async *yieldKeys(_prefix?: string): AsyncGenerator { + throw new Error("Not implemented"); + } +} + +/** + * Class for storing and retrieving documents in memory synchronously. + */ +export class SynchronousInMemoryDocstore { + _docs: Map; + + constructor(docs?: Map) { + this._docs = docs ?? new Map(); + } + + /** + * Searches for a document in the store based on its ID. + * @param search The ID of the document to search for. + * @returns The document with the given ID. + */ + search(search: string): Document { + const result = this._docs.get(search); + if (!result) { + throw new Error(`ID ${search} not found.`); + } else { + return result; + } + } + + /** + * Adds new documents to the store. + * @param texts An object where the keys are document IDs and the values are the documents themselves. + * @returns Void + */ + add(texts: Record): void { + const keys = [...this._docs.keys()]; + const overlapping = Object.keys(texts).filter((x) => keys.includes(x)); + + if (overlapping.length > 0) { + throw new Error(`Tried to add ids that already exist: ${overlapping}`); + } + + for (const [key, value] of Object.entries(texts)) { + this._docs.set(key, value); + } + } +} diff --git a/libs/langchain-community/src/stores/message/cassandra.ts b/libs/langchain-community/src/stores/message/cassandra.ts index 32dab99e166e..c648d43dd42a 100644 --- a/libs/langchain-community/src/stores/message/cassandra.ts +++ b/libs/langchain-community/src/stores/message/cassandra.ts @@ -1,9 +1,11 @@ import { Client, DseClientOptions } from "cassandra-driver"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; export interface CassandraChatMessageHistoryOptions extends DseClientOptions { keyspace: string; diff --git a/libs/langchain-community/src/stores/message/cloudflare_d1.ts b/libs/langchain-community/src/stores/message/cloudflare_d1.ts index 654c90f594b0..4b8a99ff1904 100644 --- a/libs/langchain-community/src/stores/message/cloudflare_d1.ts +++ b/libs/langchain-community/src/stores/message/cloudflare_d1.ts @@ -1,10 +1,13 @@ import { v4 } from "uuid"; import type { D1Database } from "@cloudflare/workers-types"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + StoredMessageData, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type definition for the input parameters required when instantiating a * CloudflareD1MessageHistory object. diff --git a/libs/langchain-community/src/stores/message/convex.ts b/libs/langchain-community/src/stores/message/convex.ts index f0433c2a69a7..9a7f5d120e1c 100644 --- a/libs/langchain-community/src/stores/message/convex.ts +++ b/libs/langchain-community/src/stores/message/convex.ts @@ -13,11 +13,12 @@ import { IndexNames, makeFunctionReference, } from "convex/server"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type that defines the config required to initialize the diff --git a/libs/langchain-community/src/stores/message/dynamodb.ts b/libs/langchain-community/src/stores/message/dynamodb.ts index b9b257bf8f75..6bef93aeb88f 100644 --- a/libs/langchain-community/src/stores/message/dynamodb.ts +++ b/libs/langchain-community/src/stores/message/dynamodb.ts @@ -10,11 +10,13 @@ import { AttributeValue, } from "@aws-sdk/client-dynamodb"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Interface defining the fields required to create an instance of diff --git a/libs/langchain-community/src/stores/message/firestore.ts b/libs/langchain-community/src/stores/message/firestore.ts index 52f4f14eb999..24d4af56c99c 100644 --- a/libs/langchain-community/src/stores/message/firestore.ts +++ b/libs/langchain-community/src/stores/message/firestore.ts @@ -8,11 +8,13 @@ import { FieldValue, } from "firebase-admin/firestore"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Interface for FirestoreDBChatMessageHistory. It includes the collection diff --git a/libs/langchain-community/src/stores/message/ioredis.ts b/libs/langchain-community/src/stores/message/ioredis.ts index 9b634aa87071..3772250ee2ac 100644 --- a/libs/langchain-community/src/stores/message/ioredis.ts +++ b/libs/langchain-community/src/stores/message/ioredis.ts @@ -1,9 +1,10 @@ import { Redis, RedisOptions } from "ioredis"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type for the input parameter of the RedisChatMessageHistory diff --git a/libs/langchain-community/src/stores/message/momento.ts b/libs/langchain-community/src/stores/message/momento.ts index 92473c98cfa9..a5c9fe323b3f 100644 --- a/libs/langchain-community/src/stores/message/momento.ts +++ b/libs/langchain-community/src/stores/message/momento.ts @@ -7,11 +7,13 @@ import { InvalidArgumentError, CollectionTtl, } from "@gomomento/sdk-core"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; import { ensureCacheExists } from "../../utils/momento.js"; /** diff --git a/libs/langchain-community/src/stores/message/mongodb.ts b/libs/langchain-community/src/stores/message/mongodb.ts index 8addfb842c7f..e68ff65f4121 100644 --- a/libs/langchain-community/src/stores/message/mongodb.ts +++ b/libs/langchain-community/src/stores/message/mongodb.ts @@ -1,9 +1,10 @@ import { Collection, Document as MongoDBDocument, ObjectId } from "mongodb"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; export interface MongoDBChatMessageHistoryInput { collection: Collection; diff --git a/libs/langchain-community/src/stores/message/planetscale.ts b/libs/langchain-community/src/stores/message/planetscale.ts index a18a5dbc338d..ef72166ab6d7 100644 --- a/libs/langchain-community/src/stores/message/planetscale.ts +++ b/libs/langchain-community/src/stores/message/planetscale.ts @@ -3,11 +3,14 @@ import { Config as PlanetScaleConfig, Connection as PlanetScaleConnection, } from "@planetscale/database"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + StoredMessageData, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type definition for the input parameters required when instantiating a diff --git a/libs/langchain-community/src/stores/message/redis.ts b/libs/langchain-community/src/stores/message/redis.ts index 5bfab7222262..8aad2b248ae3 100644 --- a/libs/langchain-community/src/stores/message/redis.ts +++ b/libs/langchain-community/src/stores/message/redis.ts @@ -7,11 +7,12 @@ import { RedisFunctions, RedisScripts, } from "redis"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type for the input to the `RedisChatMessageHistory` constructor. diff --git a/libs/langchain-community/src/stores/message/upstash_redis.ts b/libs/langchain-community/src/stores/message/upstash_redis.ts index 591921237b77..b1ca897a0df0 100644 --- a/libs/langchain-community/src/stores/message/upstash_redis.ts +++ b/libs/langchain-community/src/stores/message/upstash_redis.ts @@ -1,9 +1,11 @@ import { Redis, type RedisConfigNodejs } from "@upstash/redis"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * Type definition for the input parameters required to initialize an diff --git a/libs/langchain-community/src/stores/message/xata.ts b/libs/langchain-community/src/stores/message/xata.ts index 6ce75337a3a9..705282ef049a 100644 --- a/libs/langchain-community/src/stores/message/xata.ts +++ b/libs/langchain-community/src/stores/message/xata.ts @@ -6,11 +6,14 @@ import { XataApiClient, parseWorkspacesUrlParts, } from "@xata.io/client"; +import { BaseListChatMessageHistory } from "@langchain/core/chat_history"; import { - BaseListChatMessageHistory -} from "@langchain/core/chat_history"; -import { BaseMessage, StoredMessage, StoredMessageData, mapChatMessagesToStoredMessages, - mapStoredMessagesToChatMessages, } from "@langchain/core/messages"; + BaseMessage, + StoredMessage, + StoredMessageData, + mapChatMessagesToStoredMessages, + mapStoredMessagesToChatMessages, +} from "@langchain/core/messages"; /** * An object type that represents the input for the XataChatMessageHistory diff --git a/libs/langchain-community/src/types/assemblyai-types.ts b/libs/langchain-community/src/types/assemblyai-types.ts deleted file mode 100644 index 583248940a1d..000000000000 --- a/libs/langchain-community/src/types/assemblyai-types.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { BaseServiceParams } from "assemblyai"; -import { Optional } from "./type-utils.js"; - -export type * from "assemblyai"; - -export type AssemblyAIOptions = Optional; diff --git a/langchain/src/vectorstores/closevector/common.ts b/libs/langchain-community/src/vectorstores/closevector/common.ts similarity index 96% rename from langchain/src/vectorstores/closevector/common.ts rename to libs/langchain-community/src/vectorstores/closevector/common.ts index 8afef62324cc..ec62f28b7987 100644 --- a/langchain/src/vectorstores/closevector/common.ts +++ b/libs/langchain-community/src/vectorstores/closevector/common.ts @@ -1,8 +1,8 @@ import type { CloseVectorSaveableVectorStore } from "closevector-common"; -import { Embeddings } from "../../embeddings/base.js"; -import { Document } from "../../document.js"; -import { SaveableVectorStore } from "../base.js"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { SaveableVectorStore } from "@langchain/core/vectorstores"; type CloseVectorCredentials = { key?: string; diff --git a/libs/langchain-community/src/vectorstores/closevector/node.ts b/libs/langchain-community/src/vectorstores/closevector/node.ts new file mode 100644 index 000000000000..c45bf012bcdc --- /dev/null +++ b/libs/langchain-community/src/vectorstores/closevector/node.ts @@ -0,0 +1,182 @@ +import { + CloseVectorHNSWNode, + HierarchicalNSWT, + CloseVectorHNSWLibArgs, + CloseVectorCredentials, +} from "closevector-node"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; + +import { CloseVector } from "./common.js"; + +/** + * package closevector-node is largely based on hnswlib.ts in the current folder with the following exceptions: + * 1. It uses a modified version of hnswlib-node to ensure the generated index can be loaded by closevector_web.ts. + * 2. It adds features to upload and download the index to/from the CDN provided by CloseVector. + * + * For more information, check out https://closevector-docs.getmegaportal.com/ + */ + +/** + * Arguments for creating a CloseVectorNode instance, extending CloseVectorHNSWLibArgs. + */ +export interface CloseVectorNodeArgs + extends CloseVectorHNSWLibArgs { + instance?: CloseVectorHNSWNode; +} + +/** + * Class that implements a vector store using Hierarchical Navigable Small + * World (HNSW) graphs. It extends the SaveableVectorStore class and + * provides methods for adding documents and vectors, performing + * similarity searches, and saving and loading the vector store. + */ +export class CloseVectorNode extends CloseVector { + declare FilterType: (doc: Document) => boolean; + + constructor( + embeddings: Embeddings, + args: CloseVectorNodeArgs, + credentials?: CloseVectorCredentials + ) { + super(embeddings, args, credentials); + if (args.instance) { + this.instance = args.instance; + } else { + this.instance = new CloseVectorHNSWNode(embeddings, args); + } + if (this.credentials?.key) { + this.instance.accessKey = this.credentials.key; + } + if (this.credentials?.secret) { + this.instance.secret = this.credentials.secret; + } + } + + /** + * Method to save the index to the CloseVector CDN. + * @param options + * @param options.description A description of the index. + * @param options.public Whether the index should be public or private. Defaults to false. + * @param options.uuid A UUID for the index. If not provided, a new index will be created. + * @param options.onProgress A callback function that will be called with the progress of the upload. + */ + async saveToCloud( + options: Parameters[0] + ) { + await this.instance.saveToCloud(options); + } + + /** + * Method to load the index from the CloseVector CDN. + * @param options + * @param options.uuid The UUID of the index to be downloaded. + * @param options.credentials The credentials to be used by the CloseVectorNode instance. + * @param options.embeddings The embeddings to be used by the CloseVectorNode instance. + * @param options.onProgress A callback function that will be called with the progress of the download. + */ + static async loadFromCloud( + options: Omit< + Parameters<(typeof CloseVectorHNSWNode)["loadFromCloud"]>[0] & { + embeddings: Embeddings; + credentials: CloseVectorCredentials; + }, + "accessKey" | "secret" + > + ) { + if (!options.credentials.key || !options.credentials.secret) { + throw new Error("key and secret must be provided"); + } + const instance = await CloseVectorHNSWNode.loadFromCloud({ + ...options, + accessKey: options.credentials.key, + secret: options.credentials.secret, + }); + const vectorstore = new this( + options.embeddings, + instance.args, + options.credentials + ); + return vectorstore; + } + + /** + * Static method to load a vector store from a directory. It reads the + * HNSW index, the arguments, and the document store from the directory, + * then creates a new HNSWLib instance with these values. + * @param directory The directory from which to load the vector store. + * @param embeddings The embeddings to be used by the CloseVectorNode instance. + * @returns A Promise that resolves to a new CloseVectorNode instance. + */ + static async load( + directory: string, + embeddings: Embeddings, + credentials?: CloseVectorCredentials + ) { + const instance = await CloseVectorHNSWNode.load(directory, embeddings); + const vectorstore = new this(embeddings, instance.args, credentials); + return vectorstore; + } + + /** + * Static method to create a new CloseVectorWeb instance from texts and metadata. + * It creates a new Document instance for each text and metadata, then + * calls the fromDocuments method to create the CloseVectorWeb instance. + * @param texts The texts to be used to create the documents. + * @param metadatas The metadata to be used to create the documents. + * @param embeddings The embeddings to be used by the CloseVectorWeb instance. + * @param args An optional configuration object for the CloseVectorWeb instance. + * @param credential An optional credential object for the CloseVector API. + * @returns A Promise that resolves to a new CloseVectorWeb instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + args?: Record, + credential?: CloseVectorCredentials + ): Promise { + const docs = CloseVector.textsToDocuments(texts, metadatas); + return await CloseVectorNode.fromDocuments( + docs, + embeddings, + args, + credential + ); + } + + /** + * Static method to create a new CloseVectorNode instance from documents. It + * creates a new CloseVectorNode instance, adds the documents to it, then returns + * the instance. + * @param docs The documents to be added to the HNSWLib instance. + * @param embeddings The embeddings to be used by the HNSWLib instance. + * @param args An optional configuration object for the HNSWLib instance. + * @param credentials An optional credential object for the CloseVector API. + * @returns A Promise that resolves to a new CloseVectorNode instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args?: Record, + credentials?: CloseVectorCredentials + ): Promise { + const _args: Record = args || { + space: "cosine", + }; + const instance = new this( + embeddings, + _args as unknown as CloseVectorNodeArgs, + credentials + ); + await instance.addDocuments(docs); + return instance; + } + + static async imports(): Promise<{ + HierarchicalNSW: typeof HierarchicalNSWT; + }> { + return CloseVectorHNSWNode.imports(); + } +} diff --git a/libs/langchain-community/src/vectorstores/closevector/web.ts b/libs/langchain-community/src/vectorstores/closevector/web.ts new file mode 100644 index 000000000000..9a0896041df5 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/closevector/web.ts @@ -0,0 +1,179 @@ +import { + CloseVectorHNSWWeb, + HierarchicalNSWT, + CloseVectorHNSWLibArgs, + CloseVectorCredentials, + HnswlibModule, +} from "closevector-web"; + +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; + +import { CloseVector } from "./common.js"; + +/** + * package closevector-node is largely based on hnswlib.ts in the current folder with the following exceptions: + * 1. It uses a modified version of hnswlib-node to ensure the generated index can be loaded by closevector_web.ts. + * 2. It adds features to upload and download the index to/from the CDN provided by CloseVector. + * + * For more information, check out https://closevector-docs.getmegaportal.com/ + */ + +/** + * Arguments for creating a CloseVectorWeb instance, extending CloseVectorHNSWLibArgs. + */ +export interface CloseVectorWebArgs + extends CloseVectorHNSWLibArgs { + instance?: CloseVectorHNSWWeb; +} + +/** + * Class that implements a vector store using CloseVector, It extends the SaveableVectorStore class and + * provides methods for adding documents and vectors, performing + * similarity searches, and saving and loading the vector store. + */ +export class CloseVectorWeb extends CloseVector { + declare FilterType: (doc: Document) => boolean; + + constructor( + embeddings: Embeddings, + args: CloseVectorWebArgs, + credentials?: CloseVectorCredentials + ) { + super(embeddings, args, credentials); + if (args.instance) { + this.instance = args.instance; + } else { + this.instance = new CloseVectorHNSWWeb(embeddings, args); + } + } + + /** + * Method to save the index to the CloseVector CDN. + * @param options + * @param options.url the upload url generated by the CloseVector API: https://closevector-docs.getmegaportal.com/docs/api/http-api/file-url + * @param options.onProgress a callback function to track the upload progress + */ + async saveToCloud( + options: Parameters[0] & { + uuid?: string; + } + ) { + if (!this.instance.uuid && !options.uuid) { + throw new Error("No uuid provided"); + } + if (!this.instance.uuid) { + this.instance._uuid = options.uuid; + } + await this.save(this.instance.uuid); + await this.instance.saveToCloud(options); + } + + /** + * Method to load the index from the CloseVector CDN. + * @param options + * @param options.url the upload url generated by the CloseVector API: https://closevector-docs.getmegaportal.com/docs/api/http-api/file-url + * @param options.onProgress a callback function to track the upload progress + * @param options.uuid the uuid of the index to be downloaded + * @param options.embeddings the embeddings to be used by the CloseVectorWeb instance + */ + static async loadFromCloud( + options: Parameters[0] & { + embeddings: Embeddings; + credentials?: CloseVectorCredentials; + } + ) { + const instance = await CloseVectorHNSWWeb.loadFromCloud(options); + const vectorstore = new this( + options.embeddings, + instance.args, + options.credentials + ); + return vectorstore; + } + + /** + * Static method to load a vector store from a directory. It reads the + * HNSW index, the arguments, and the document store from the directory, + * then creates a new CloseVectorWeb instance with these values. + * @param directory The directory from which to load the vector store. + * @param embeddings The embeddings to be used by the CloseVectorWeb instance. + * @returns A Promise that resolves to a new CloseVectorWeb instance. + */ + static async load( + directory: string, + embeddings: Embeddings, + credentials?: CloseVectorCredentials + ) { + const instance = await CloseVectorHNSWWeb.load(directory, embeddings); + const vectorstore = new this(embeddings, instance.args, credentials); + return vectorstore; + } + + /** + * Static method to create a new CloseVectorWeb instance from texts and metadata. + * It creates a new Document instance for each text and metadata, then + * calls the fromDocuments method to create the CloseVectorWeb instance. + * @param texts The texts to be used to create the documents. + * @param metadatas The metadata to be used to create the documents. + * @param embeddings The embeddings to be used by the CloseVectorWeb instance. + * @param args An optional configuration object for the CloseVectorWeb instance. + * @param credential An optional credential object for the CloseVector API. + * @returns A Promise that resolves to a new CloseVectorWeb instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + args?: Record, + credential?: CloseVectorCredentials + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return await CloseVectorWeb.fromDocuments( + docs, + embeddings, + args, + credential + ); + } + + /** + * Static method to create a new CloseVectorWeb instance from documents. It + * creates a new CloseVectorWeb instance, adds the documents to it, then returns + * the instance. + * @param docs The documents to be added to the CloseVectorWeb instance. + * @param embeddings The embeddings to be used by the CloseVectorWeb instance. + * @param args An optional configuration object for the CloseVectorWeb instance. + * @param credentials An optional credential object for the CloseVector API. + * @returns A Promise that resolves to a new CloseVectorWeb instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args?: Record, + credentials?: CloseVectorCredentials + ): Promise { + const _args: Record = args || { + space: "cosine", + }; + const instance = new this( + embeddings, + _args as unknown as CloseVectorWebArgs, + credentials + ); + await instance.addDocuments(docs); + return instance; + } + + static async imports(): Promise { + return CloseVectorHNSWWeb.imports(); + } +} diff --git a/libs/langchain-community/src/vectorstores/faiss.ts b/libs/langchain-community/src/vectorstores/faiss.ts new file mode 100644 index 000000000000..0403aa89c726 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/faiss.ts @@ -0,0 +1,461 @@ +import type { IndexFlatL2 } from "faiss-node"; +import type { NameRegistry, Parser } from "pickleparser"; +import * as uuid from "uuid"; +import { Embeddings } from "@langchain/core/embeddings"; +import { SaveableVectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; +import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; + +/** + * Interface for the arguments required to initialize a FaissStore + * instance. + */ +export interface FaissLibArgs { + docstore?: SynchronousInMemoryDocstore; + index?: IndexFlatL2; + mapping?: Record; +} + +/** + * A class that wraps the FAISS (Facebook AI Similarity Search) vector + * database for efficient similarity search and clustering of dense + * vectors. + */ +export class FaissStore extends SaveableVectorStore { + _index?: IndexFlatL2; + + _mapping: Record; + + docstore: SynchronousInMemoryDocstore; + + args: FaissLibArgs; + + _vectorstoreType(): string { + return "faiss"; + } + + getMapping(): Record { + return this._mapping; + } + + getDocstore(): SynchronousInMemoryDocstore { + return this.docstore; + } + + constructor(embeddings: Embeddings, args: FaissLibArgs) { + super(embeddings, args); + this.args = args; + this._index = args.index; + this._mapping = args.mapping ?? {}; + this.embeddings = embeddings; + this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); + } + + /** + * Adds an array of Document objects to the store. + * @param documents An array of Document objects. + * @returns A Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[], options?: { ids?: string[] }) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + public get index(): IndexFlatL2 { + if (!this._index) { + throw new Error( + "Vector store not initialised yet. Try calling `fromTexts`, `fromDocuments` or `fromIndex` first." + ); + } + return this._index; + } + + private set index(index: IndexFlatL2) { + this._index = index; + } + + /** + * Adds an array of vectors and their corresponding Document objects to + * the store. + * @param vectors An array of vectors. + * @param documents An array of Document objects corresponding to the vectors. + * @returns A Promise that resolves with an array of document IDs when the vectors and documents have been added. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { + if (vectors.length === 0) { + return []; + } + if (vectors.length !== documents.length) { + throw new Error(`Vectors and documents must have the same length`); + } + const dv = vectors[0].length; + if (!this._index) { + const { IndexFlatL2 } = await FaissStore.importFaiss(); + this._index = new IndexFlatL2(dv); + } + const d = this.index.getDimension(); + if (dv !== d) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${d})` + ); + } + + const docstoreSize = this.index.ntotal(); + const documentIds = options?.ids ?? documents.map(() => uuid.v4()); + for (let i = 0; i < vectors.length; i += 1) { + const documentId = documentIds[i]; + const id = docstoreSize + i; + this.index.add(vectors[i]); + this._mapping[id] = documentId; + this.docstore.add({ [documentId]: documents[i] }); + } + return documentIds; + } + + /** + * Performs a similarity search in the vector store using a query vector + * and returns the top k results along with their scores. + * @param query A query vector. + * @param k The number of top results to return. + * @returns A Promise that resolves with an array of tuples, each containing a Document and its corresponding score. + */ + async similaritySearchVectorWithScore(query: number[], k: number) { + const d = this.index.getDimension(); + if (query.length !== d) { + throw new Error( + `Query vector must have the same length as the number of dimensions (${d})` + ); + } + if (k > this.index.ntotal()) { + const total = this.index.ntotal(); + console.warn( + `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` + ); + // eslint-disable-next-line no-param-reassign + k = total; + } + const result = this.index.search(query, k); + return result.labels.map((id, index) => { + const uuid = this._mapping[id]; + return [this.docstore.search(uuid), result.distances[index]] as [ + Document, + number + ]; + }); + } + + /** + * Saves the current state of the FaissStore to a specified directory. + * @param directory The directory to save the state to. + * @returns A Promise that resolves when the state has been saved. + */ + async save(directory: string) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + await fs.mkdir(directory, { recursive: true }); + await Promise.all([ + this.index.write(path.join(directory, "faiss.index")), + await fs.writeFile( + path.join(directory, "docstore.json"), + JSON.stringify([ + Array.from(this.docstore._docs.entries()), + this._mapping, + ]) + ), + ]); + } + + /** + * Method to delete documents. + * @param params Object containing the IDs of the documents to delete. + * @returns A promise that resolves when the deletion is complete. + */ + async delete(params: { ids: string[] }) { + const documentIds = params.ids; + if (documentIds == null) { + throw new Error("No documentIds provided to delete."); + } + + const mappings = new Map( + Object.entries(this._mapping).map(([key, value]) => [ + parseInt(key, 10), + value, + ]) + ); + const reversedMappings = new Map( + Array.from(mappings, (entry) => [entry[1], entry[0]]) + ); + + const missingIds = new Set( + documentIds.filter((id) => !reversedMappings.has(id)) + ); + if (missingIds.size > 0) { + throw new Error( + `Some specified documentIds do not exist in the current store. DocumentIds not found: ${Array.from( + missingIds + ).join(", ")}` + ); + } + + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const indexIdToDelete = documentIds.map((id) => reversedMappings.get(id)!); + + // remove from index + this.index.removeIds(indexIdToDelete); + // remove from docstore + documentIds.forEach((id) => { + this.docstore._docs.delete(id); + }); + // remove from mappings + indexIdToDelete.forEach((id) => { + mappings.delete(id); + }); + + this._mapping = { ...Array.from(mappings.values()) }; + } + + /** + * Merges the current FaissStore with another FaissStore. + * @param targetIndex The FaissStore to merge with. + * @returns A Promise that resolves with an array of document IDs when the merge is complete. + */ + async mergeFrom(targetIndex: FaissStore) { + const targetIndexDimensions = targetIndex.index.getDimension(); + if (!this._index) { + const { IndexFlatL2 } = await FaissStore.importFaiss(); + this._index = new IndexFlatL2(targetIndexDimensions); + } + const d = this.index.getDimension(); + if (targetIndexDimensions !== d) { + throw new Error("Cannot merge indexes with different dimensions."); + } + const targetMapping = targetIndex.getMapping(); + const targetDocstore = targetIndex.getDocstore(); + const targetSize = targetIndex.index.ntotal(); + const documentIds = []; + const currentDocstoreSize = this.index.ntotal(); + for (let i = 0; i < targetSize; i += 1) { + const targetId = targetMapping[i]; + documentIds.push(targetId); + const targetDocument = targetDocstore.search(targetId); + const id = currentDocstoreSize + i; + this._mapping[id] = targetId; + this.docstore.add({ [targetId]: targetDocument }); + } + this.index.mergeFrom(targetIndex.index); + return documentIds; + } + + /** + * Loads a FaissStore from a specified directory. + * @param directory The directory to load the FaissStore from. + * @param embeddings An Embeddings object. + * @returns A Promise that resolves with a new FaissStore instance. + */ + static async load(directory: string, embeddings: Embeddings) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + const readStore = (directory: string) => + fs + .readFile(path.join(directory, "docstore.json"), "utf8") + .then(JSON.parse) as Promise< + [Map, Record] + >; + const readIndex = async (directory: string) => { + const { IndexFlatL2 } = await this.importFaiss(); + return IndexFlatL2.read(path.join(directory, "faiss.index")); + }; + const [[docstoreFiles, mapping], index] = await Promise.all([ + readStore(directory), + readIndex(directory), + ]); + const docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles)); + return new this(embeddings, { docstore, index, mapping }); + } + + static async loadFromPython(directory: string, embeddings: Embeddings) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + const { Parser, NameRegistry } = await this.importPickleparser(); + + class PyDocument extends Map { + toDocument(): Document { + return new Document({ + pageContent: this.get("page_content"), + metadata: this.get("metadata"), + }); + } + } + + class PyInMemoryDocstore { + _dict: Map; + + toInMemoryDocstore(): SynchronousInMemoryDocstore { + const s = new SynchronousInMemoryDocstore(); + for (const [key, value] of Object.entries(this._dict)) { + s._docs.set(key, value.toDocument()); + } + return s; + } + } + + const readStore = async (directory: string) => { + const pkl = await fs.readFile( + path.join(directory, "index.pkl"), + "binary" + ); + const buffer = Buffer.from(pkl, "binary"); + + const registry = new NameRegistry() + .register( + "langchain.docstore.in_memory", + "InMemoryDocstore", + PyInMemoryDocstore + ) + .register("langchain.schema", "Document", PyDocument) + .register("langchain.docstore.document", "Document", PyDocument) + .register("langchain.schema.document", "Document", PyDocument) + .register("pathlib", "WindowsPath", (...args) => args.join("\\")) + .register("pathlib", "PosixPath", (...args) => args.join("/")); + + const pickleparser = new Parser({ + nameResolver: registry, + }); + const [rawStore, mapping] = + pickleparser.parse<[PyInMemoryDocstore, Record]>( + buffer + ); + const store = rawStore.toInMemoryDocstore(); + return { store, mapping }; + }; + const readIndex = async (directory: string) => { + const { IndexFlatL2 } = await this.importFaiss(); + return IndexFlatL2.read(path.join(directory, "index.faiss")); + }; + const [store, index] = await Promise.all([ + readStore(directory), + readIndex(directory), + ]); + return new this(embeddings, { + docstore: store.store, + index, + mapping: store.mapping, + }); + } + + /** + * Creates a new FaissStore from an array of texts, their corresponding + * metadata, and an Embeddings object. + * @param texts An array of texts. + * @param metadatas An array of metadata corresponding to the texts, or a single metadata object to be used for all texts. + * @param embeddings An Embeddings object. + * @param dbConfig An optional configuration object for the document store. + * @returns A Promise that resolves with a new FaissStore instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return this.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new FaissStore from an array of Document objects and an + * Embeddings object. + * @param docs An array of Document objects. + * @param embeddings An Embeddings object. + * @param dbConfig An optional configuration object for the document store. + * @returns A Promise that resolves with a new FaissStore instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const args: FaissLibArgs = { + docstore: dbConfig?.docstore, + }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Creates a new FaissStore from an existing FaissStore and an Embeddings + * object. + * @param targetIndex An existing FaissStore. + * @param embeddings An Embeddings object. + * @param dbConfig An optional configuration object for the document store. + * @returns A Promise that resolves with a new FaissStore instance. + */ + static async fromIndex( + targetIndex: FaissStore, + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const args: FaissLibArgs = { + docstore: dbConfig?.docstore, + }; + const instance = new this(embeddings, args); + await instance.mergeFrom(targetIndex); + return instance; + } + + static async importFaiss(): Promise<{ IndexFlatL2: typeof IndexFlatL2 }> { + try { + const { + default: { IndexFlatL2 }, + } = await import("faiss-node"); + + return { IndexFlatL2 }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + throw new Error( + `Could not import faiss-node. Please install faiss-node as a dependency with, e.g. \`npm install -S faiss-node\`.\n\nError: ${err?.message}` + ); + } + } + + static async importPickleparser(): Promise<{ + Parser: typeof Parser; + NameRegistry: typeof NameRegistry; + }> { + try { + const { + default: { Parser, NameRegistry }, + } = await import("pickleparser"); + + return { Parser, NameRegistry }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + throw new Error( + `Could not import pickleparser. Please install pickleparser as a dependency with, e.g. \`npm install -S pickleparser\`.\n\nError: ${err?.message}` + ); + } + } +} diff --git a/libs/langchain-community/src/vectorstores/googlevertexai.ts b/libs/langchain-community/src/vectorstores/googlevertexai.ts new file mode 100644 index 000000000000..d693f460f30d --- /dev/null +++ b/libs/langchain-community/src/vectorstores/googlevertexai.ts @@ -0,0 +1,738 @@ +import * as uuid from "uuid"; +import flatten from "flat"; +import { GoogleAuth, GoogleAuthOptions } from "google-auth-library"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document, DocumentInput } from "@langchain/core/documents"; +import { + AsyncCaller, + AsyncCallerCallOptions, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; + +import { GoogleVertexAIConnection } from "../utils/googlevertexai-connection.js"; +import { Docstore } from "../stores/doc/base.js"; +import { + GoogleVertexAIConnectionParams, + GoogleResponse, + GoogleAbstractedClientOpsMethod, +} from "../types/googlevertexai-types.js"; + +/** + * Allows us to create IdDocument classes that contain the ID. + */ +export interface IdDocumentInput extends DocumentInput { + id?: string; +} + +/** + * A Document that optionally includes the ID of the document. + */ +export class IdDocument extends Document implements IdDocumentInput { + id?: string; + + constructor(fields: IdDocumentInput) { + super(fields); + this.id = fields.id; + } +} + +interface IndexEndpointConnectionParams + extends GoogleVertexAIConnectionParams { + indexEndpoint: string; +} + +interface DeployedIndex { + id: string; + index: string; + // There are other attributes, but we don't care about them right now +} + +interface IndexEndpointResponse extends GoogleResponse { + data: { + deployedIndexes: DeployedIndex[]; + publicEndpointDomainName: string; + // There are other attributes, but we don't care about them right now + }; +} + +class IndexEndpointConnection extends GoogleVertexAIConnection< + AsyncCallerCallOptions, + IndexEndpointResponse, + GoogleAuthOptions +> { + indexEndpoint: string; + + constructor(fields: IndexEndpointConnectionParams, caller: AsyncCaller) { + super(fields, caller, new GoogleAuth(fields.authOptions)); + + this.indexEndpoint = fields.indexEndpoint; + } + + async buildUrl(): Promise { + const projectId = await this.client.getProjectId(); + const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}`; + return url; + } + + buildMethod(): GoogleAbstractedClientOpsMethod { + return "GET"; + } + + async request( + options: AsyncCallerCallOptions + ): Promise { + return this._request(undefined, options); + } +} + +/** + * Used to represent parameters that are necessary to delete documents + * from the matching engine. These must be a list of string IDs + */ +export interface MatchingEngineDeleteParams { + ids: string[]; +} + +interface RemoveDatapointParams + extends GoogleVertexAIConnectionParams { + index: string; +} + +interface RemoveDatapointRequest { + datapointIds: string[]; +} + +interface RemoveDatapointResponse extends GoogleResponse { + // Should be empty +} + +class RemoveDatapointConnection extends GoogleVertexAIConnection< + AsyncCallerCallOptions, + RemoveDatapointResponse, + GoogleAuthOptions +> { + index: string; + + constructor(fields: RemoveDatapointParams, caller: AsyncCaller) { + super(fields, caller, new GoogleAuth(fields.authOptions)); + + this.index = fields.index; + } + + async buildUrl(): Promise { + const projectId = await this.client.getProjectId(); + const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:removeDatapoints`; + return url; + } + + buildMethod(): GoogleAbstractedClientOpsMethod { + return "POST"; + } + + async request( + datapointIds: string[], + options: AsyncCallerCallOptions + ): Promise { + const data: RemoveDatapointRequest = { + datapointIds, + }; + return this._request(data, options); + } +} + +interface UpsertDatapointParams + extends GoogleVertexAIConnectionParams { + index: string; +} + +export interface Restriction { + namespace: string; + allowList?: string[]; + denyList?: string[]; +} + +interface CrowdingTag { + crowdingAttribute: string; +} + +interface IndexDatapoint { + datapointId: string; + featureVector: number[]; + restricts?: Restriction[]; + crowdingTag?: CrowdingTag; +} + +interface UpsertDatapointRequest { + datapoints: IndexDatapoint[]; +} + +interface UpsertDatapointResponse extends GoogleResponse { + // Should be empty +} + +class UpsertDatapointConnection extends GoogleVertexAIConnection< + AsyncCallerCallOptions, + UpsertDatapointResponse, + GoogleAuthOptions +> { + index: string; + + constructor(fields: UpsertDatapointParams, caller: AsyncCaller) { + super(fields, caller, new GoogleAuth(fields.authOptions)); + + this.index = fields.index; + } + + async buildUrl(): Promise { + const projectId = await this.client.getProjectId(); + const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexes/${this.index}:upsertDatapoints`; + return url; + } + + buildMethod(): GoogleAbstractedClientOpsMethod { + return "POST"; + } + + async request( + datapoints: IndexDatapoint[], + options: AsyncCallerCallOptions + ): Promise { + const data: UpsertDatapointRequest = { + datapoints, + }; + return this._request(data, options); + } +} + +interface FindNeighborsConnectionParams + extends GoogleVertexAIConnectionParams { + indexEndpoint: string; + + deployedIndexId: string; +} + +interface FindNeighborsRequestQuery { + datapoint: { + datapointId: string; + featureVector: number[]; + restricts?: Restriction[]; + }; + neighborCount: number; +} + +interface FindNeighborsRequest { + deployedIndexId: string; + queries: FindNeighborsRequestQuery[]; +} + +interface FindNeighborsResponseNeighbor { + datapoint: { + datapointId: string; + crowdingTag: { + crowdingTagAttribute: string; + }; + }; + distance: number; +} + +interface FindNeighborsResponseNearestNeighbor { + id: string; + neighbors: FindNeighborsResponseNeighbor[]; +} + +interface FindNeighborsResponse extends GoogleResponse { + data: { + nearestNeighbors: FindNeighborsResponseNearestNeighbor[]; + }; +} + +class FindNeighborsConnection + extends GoogleVertexAIConnection< + AsyncCallerCallOptions, + FindNeighborsResponse, + GoogleAuthOptions + > + implements FindNeighborsConnectionParams +{ + indexEndpoint: string; + + deployedIndexId: string; + + constructor(params: FindNeighborsConnectionParams, caller: AsyncCaller) { + super(params, caller, new GoogleAuth(params.authOptions)); + + this.indexEndpoint = params.indexEndpoint; + this.deployedIndexId = params.deployedIndexId; + } + + async buildUrl(): Promise { + const projectId = await this.client.getProjectId(); + const url = `https://${this.endpoint}/${this.apiVersion}/projects/${projectId}/locations/${this.location}/indexEndpoints/${this.indexEndpoint}:findNeighbors`; + return url; + } + + buildMethod(): GoogleAbstractedClientOpsMethod { + return "POST"; + } + + async request( + request: FindNeighborsRequest, + options: AsyncCallerCallOptions + ): Promise { + return this._request(request, options); + } +} + +/** + * Information about the Matching Engine public API endpoint. + * Primarily exported to allow for testing. + */ +export interface PublicAPIEndpointInfo { + apiEndpoint?: string; + + deployedIndexId?: string; +} + +/** + * Parameters necessary to configure the Matching Engine. + */ +export interface MatchingEngineArgs + extends GoogleVertexAIConnectionParams, + IndexEndpointConnectionParams, + UpsertDatapointParams { + docstore: Docstore; + + callerParams?: AsyncCallerParams; + + callerOptions?: AsyncCallerCallOptions; + + apiEndpoint?: string; + + deployedIndexId?: string; +} + +/** + * A class that represents a connection to a Google Vertex AI Matching Engine + * instance. + */ +export class MatchingEngine extends VectorStore implements MatchingEngineArgs { + declare FilterType: Restriction[]; + + /** + * Docstore that retains the document, stored by ID + */ + docstore: Docstore; + + /** + * The host to connect to for queries and upserts. + */ + apiEndpoint: string; + + apiVersion = "v1"; + + endpoint = "us-central1-aiplatform.googleapis.com"; + + location = "us-central1"; + + /** + * The id for the index endpoint + */ + indexEndpoint: string; + + /** + * The id for the index + */ + index: string; + + /** + * The id for the "deployed index", which is an identifier in the + * index endpoint that references the index (but is not the index id) + */ + deployedIndexId: string; + + callerParams: AsyncCallerParams; + + callerOptions: AsyncCallerCallOptions; + + caller: AsyncCaller; + + indexEndpointClient: IndexEndpointConnection; + + removeDatapointClient: RemoveDatapointConnection; + + upsertDatapointClient: UpsertDatapointConnection; + + constructor(embeddings: Embeddings, args: MatchingEngineArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + this.docstore = args.docstore; + + this.apiEndpoint = args.apiEndpoint ?? this.apiEndpoint; + this.deployedIndexId = args.deployedIndexId ?? this.deployedIndexId; + + this.apiVersion = args.apiVersion ?? this.apiVersion; + this.endpoint = args.endpoint ?? this.endpoint; + this.location = args.location ?? this.location; + this.indexEndpoint = args.indexEndpoint ?? this.indexEndpoint; + this.index = args.index ?? this.index; + + this.callerParams = args.callerParams ?? this.callerParams; + this.callerOptions = args.callerOptions ?? this.callerOptions; + this.caller = new AsyncCaller(this.callerParams || {}); + + const indexClientParams: IndexEndpointConnectionParams = { + endpoint: this.endpoint, + location: this.location, + apiVersion: this.apiVersion, + indexEndpoint: this.indexEndpoint, + }; + this.indexEndpointClient = new IndexEndpointConnection( + indexClientParams, + this.caller + ); + + const removeClientParams: RemoveDatapointParams = { + endpoint: this.endpoint, + location: this.location, + apiVersion: this.apiVersion, + index: this.index, + }; + this.removeDatapointClient = new RemoveDatapointConnection( + removeClientParams, + this.caller + ); + + const upsertClientParams: UpsertDatapointParams = { + endpoint: this.endpoint, + location: this.location, + apiVersion: this.apiVersion, + index: this.index, + }; + this.upsertDatapointClient = new UpsertDatapointConnection( + upsertClientParams, + this.caller + ); + } + + _vectorstoreType(): string { + return "googlevertexai"; + } + + async addDocuments(documents: Document[]): Promise { + const texts: string[] = documents.map((doc) => doc.pageContent); + const vectors: number[][] = await this.embeddings.embedDocuments(texts); + return this.addVectors(vectors, documents); + } + + async addVectors(vectors: number[][], documents: Document[]): Promise { + if (vectors.length !== documents.length) { + throw new Error(`Vectors and metadata must have the same length`); + } + const datapoints: IndexDatapoint[] = vectors.map((vector, idx) => + this.buildDatapoint(vector, documents[idx]) + ); + const options = {}; + const response = await this.upsertDatapointClient.request( + datapoints, + options + ); + if (Object.keys(response?.data ?? {}).length === 0) { + // Nothing in the response in the body means we saved it ok + const idDoc = documents as IdDocument[]; + const docsToStore: Record = {}; + idDoc.forEach((doc) => { + if (doc.id) { + docsToStore[doc.id] = doc; + } + }); + await this.docstore.add(docsToStore); + } + } + + // TODO: Refactor this into a utility type and use with pinecone as well? + // eslint-disable-next-line @typescript-eslint/no-explicit-any + cleanMetadata(documentMetadata: Record): { + [key: string]: string | number | boolean | string[] | null; + } { + type metadataType = { + [key: string]: string | number | boolean | string[] | null; + }; + + function getStringArrays( + prefix: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + m: Record + ): Record { + let ret: Record = {}; + + Object.keys(m).forEach((key) => { + const newPrefix = prefix.length > 0 ? `${prefix}.${key}` : key; + const val = m[key]; + if (!val) { + // Ignore it + } else if (Array.isArray(val)) { + // Make sure everything in the array is a string + ret[newPrefix] = val.map((v) => `${v}`); + } else if (typeof val === "object") { + const subArrays = getStringArrays(newPrefix, val); + ret = { ...ret, ...subArrays }; + } + }); + + return ret; + } + + const stringArrays: Record = getStringArrays( + "", + documentMetadata + ); + + const flatMetadata: metadataType = flatten(documentMetadata); + Object.keys(flatMetadata).forEach((key) => { + Object.keys(stringArrays).forEach((arrayKey) => { + const matchKey = `${arrayKey}.`; + if (key.startsWith(matchKey)) { + delete flatMetadata[key]; + } + }); + }); + + const metadata: metadataType = { + ...flatMetadata, + ...stringArrays, + }; + return metadata; + } + + /** + * Given the metadata from a document, convert it to an array of Restriction + * objects that may be passed to the Matching Engine and stored. + * The default implementation flattens any metadata and includes it as + * an "allowList". Subclasses can choose to convert some of these to + * "denyList" items or to add additional restrictions (for example, to format + * dates into a different structure or to add additional restrictions + * based on the date). + * @param documentMetadata - The metadata from a document + * @returns a Restriction[] (or an array of a subclass, from the FilterType) + */ + metadataToRestrictions( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + documentMetadata: Record + ): this["FilterType"] { + const metadata = this.cleanMetadata(documentMetadata); + + const restrictions: this["FilterType"] = []; + for (const key of Object.keys(metadata)) { + // Make sure the value is an array (or that we'll ignore it) + let valArray; + const val = metadata[key]; + if (val === null) { + valArray = null; + } else if (Array.isArray(val) && val.length > 0) { + valArray = val; + } else { + valArray = [`${val}`]; + } + + // Add to the restrictions if we do have a valid value + if (valArray) { + // Determine if this key is for the allowList or denyList + // TODO: get which ones should be on the deny list + const listType = "allowList"; + + // Create the restriction + const restriction: Restriction = { + namespace: key, + [listType]: valArray, + }; + + // Add it to the restriction list + restrictions.push(restriction); + } + } + return restrictions; + } + + /** + * Create an index datapoint for the vector and document id. + * If an id does not exist, create it and set the document to its value. + * @param vector + * @param document + */ + buildDatapoint(vector: number[], document: IdDocument): IndexDatapoint { + if (!document.id) { + // eslint-disable-next-line no-param-reassign + document.id = uuid.v4(); + } + const ret: IndexDatapoint = { + datapointId: document.id, + featureVector: vector, + }; + const restrictions = this.metadataToRestrictions(document.metadata); + if (restrictions?.length > 0) { + ret.restricts = restrictions; + } + return ret; + } + + async delete(params: MatchingEngineDeleteParams): Promise { + const options = {}; + await this.removeDatapointClient.request(params.ids, options); + } + + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + // Format the query into the request + const deployedIndexId = await this.getDeployedIndexId(); + const requestQuery: FindNeighborsRequestQuery = { + neighborCount: k, + datapoint: { + datapointId: `0`, + featureVector: query, + }, + }; + if (filter) { + requestQuery.datapoint.restricts = filter; + } + const request: FindNeighborsRequest = { + deployedIndexId, + queries: [requestQuery], + }; + + // Build the connection. + // Has to be done here, since we defer getting the endpoint until + // we need it. + const apiEndpoint = await this.getPublicAPIEndpoint(); + const findNeighborsParams: FindNeighborsConnectionParams = { + endpoint: apiEndpoint, + indexEndpoint: this.indexEndpoint, + apiVersion: this.apiVersion, + location: this.location, + deployedIndexId, + }; + const connection = new FindNeighborsConnection( + findNeighborsParams, + this.caller + ); + + // Make the call + const options = {}; + const response = await connection.request(request, options); + + // Get the document for each datapoint id and return them + const nearestNeighbors = response?.data?.nearestNeighbors ?? []; + const nearestNeighbor = nearestNeighbors[0]; + const neighbors = nearestNeighbor?.neighbors ?? []; + const ret: [Document, number][] = await Promise.all( + neighbors.map(async (neighbor) => { + const id = neighbor?.datapoint?.datapointId; + const distance = neighbor?.distance; + let doc: IdDocument; + try { + doc = await this.docstore.search(id); + } catch (xx) { + // Documents that are in the index are returned, even if they + // are not in the document store, to allow for some way to get + // the id so they can be deleted. + console.error(xx); + console.warn( + [ + `Document with id "${id}" is missing from the backing docstore.`, + `This can occur if you clear the docstore without deleting from the corresponding Matching Engine index.`, + `To resolve this, you should call .delete() with this id as part of the "ids" parameter.`, + ].join("\n") + ); + doc = new Document({ pageContent: `Missing document ${id}` }); + } + doc.id ??= id; + return [doc, distance]; + }) + ); + + return ret; + } + + /** + * For this index endpoint, figure out what API Endpoint URL and deployed + * index ID should be used to do upserts and queries. + * Also sets the `apiEndpoint` and `deployedIndexId` property for future use. + * @return The URL + */ + async determinePublicAPIEndpoint(): Promise { + const response: IndexEndpointResponse = + await this.indexEndpointClient.request(this.callerOptions); + + // Get the endpoint + const publicEndpointDomainName = response?.data?.publicEndpointDomainName; + this.apiEndpoint = publicEndpointDomainName; + + // Determine which of the deployed indexes match the index id + // and get the deployed index id. The list of deployed index ids + // contain the "index name" or path, but not the index id by itself, + // so we need to extract it from the name + const indexPathPattern = /projects\/.+\/locations\/.+\/indexes\/(.+)$/; + const deployedIndexes = response?.data?.deployedIndexes ?? []; + const deployedIndex = deployedIndexes.find((index) => { + const deployedIndexPath = index.index; + const match = deployedIndexPath.match(indexPathPattern); + if (match) { + const [, potentialIndexId] = match; + if (potentialIndexId === this.index) { + return true; + } + } + return false; + }); + if (deployedIndex) { + this.deployedIndexId = deployedIndex.id; + } + + return { + apiEndpoint: this.apiEndpoint, + deployedIndexId: this.deployedIndexId, + }; + } + + async getPublicAPIEndpoint(): Promise { + return ( + this.apiEndpoint ?? (await this.determinePublicAPIEndpoint()).apiEndpoint + ); + } + + async getDeployedIndexId(): Promise { + return ( + this.deployedIndexId ?? + (await this.determinePublicAPIEndpoint()).deployedIndexId + ); + } + + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: MatchingEngineArgs + ): Promise { + const docs: Document[] = texts.map( + (text, index): Document => ({ + pageContent: text, + metadata: Array.isArray(metadatas) ? metadatas[index] : metadatas, + }) + ); + return this.fromDocuments(docs, embeddings, dbConfig); + } + + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: MatchingEngineArgs + ): Promise { + const ret = new MatchingEngine(embeddings, dbConfig); + await ret.addDocuments(docs); + return ret; + } +} diff --git a/libs/langchain-community/src/vectorstores/hnswlib.ts b/libs/langchain-community/src/vectorstores/hnswlib.ts new file mode 100644 index 000000000000..86d896a70566 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/hnswlib.ts @@ -0,0 +1,354 @@ +import type { + HierarchicalNSW as HierarchicalNSWT, + SpaceName, +} from "hnswlib-node"; +import { Embeddings } from "@langchain/core/embeddings"; +import { SaveableVectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; +import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; + +/** + * Interface for the base configuration of HNSWLib. It includes the space + * name and the number of dimensions. + */ +export interface HNSWLibBase { + space: SpaceName; + numDimensions?: number; +} + +/** + * Interface for the arguments that can be passed to the HNSWLib + * constructor. It extends HNSWLibBase and includes properties for the + * document store and HNSW index. + */ +export interface HNSWLibArgs extends HNSWLibBase { + docstore?: SynchronousInMemoryDocstore; + index?: HierarchicalNSWT; +} + +/** + * Class that implements a vector store using Hierarchical Navigable Small + * World (HNSW) graphs. It extends the SaveableVectorStore class and + * provides methods for adding documents and vectors, performing + * similarity searches, and saving and loading the vector store. + */ +export class HNSWLib extends SaveableVectorStore { + declare FilterType: (doc: Document) => boolean; + + _index?: HierarchicalNSWT; + + docstore: SynchronousInMemoryDocstore; + + args: HNSWLibBase; + + _vectorstoreType(): string { + return "hnswlib"; + } + + constructor(embeddings: Embeddings, args: HNSWLibArgs) { + super(embeddings, args); + this._index = args.index; + this.args = args; + this.embeddings = embeddings; + this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); + } + + /** + * Method to add documents to the vector store. It first converts the + * documents to vectors using the embeddings, then adds the vectors to the + * vector store. + * @param documents The documents to be added to the vector store. + * @returns A Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + private static async getHierarchicalNSW(args: HNSWLibBase) { + const { HierarchicalNSW } = await HNSWLib.imports(); + if (!args.space) { + throw new Error("hnswlib-node requires a space argument"); + } + if (args.numDimensions === undefined) { + throw new Error("hnswlib-node requires a numDimensions argument"); + } + return new HierarchicalNSW(args.space, args.numDimensions); + } + + private async initIndex(vectors: number[][]) { + if (!this._index) { + if (this.args.numDimensions === undefined) { + this.args.numDimensions = vectors[0].length; + } + this.index = await HNSWLib.getHierarchicalNSW(this.args); + } + if (!this.index.getCurrentCount()) { + this.index.initIndex(vectors.length); + } + } + + public get index(): HierarchicalNSWT { + if (!this._index) { + throw new Error( + "Vector store not initialised yet. Try calling `addTexts` first." + ); + } + return this._index; + } + + private set index(index: HierarchicalNSWT) { + this._index = index; + } + + /** + * Method to add vectors to the vector store. It first initializes the + * index if it hasn't been initialized yet, then adds the vectors to the + * index and the documents to the document store. + * @param vectors The vectors to be added to the vector store. + * @param documents The documents corresponding to the vectors. + * @returns A Promise that resolves when the vectors and documents have been added. + */ + async addVectors(vectors: number[][], documents: Document[]) { + if (vectors.length === 0) { + return; + } + await this.initIndex(vectors); + + // TODO here we could optionally normalise the vectors to unit length + // so that dot product is equivalent to cosine similarity, like this + // https://github.com/nmslib/hnswlib/issues/384#issuecomment-1155737730 + // While we only support OpenAI embeddings this isn't necessary + if (vectors.length !== documents.length) { + throw new Error(`Vectors and metadatas must have the same length`); + } + if (vectors[0].length !== this.args.numDimensions) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${this.args.numDimensions})` + ); + } + const capacity = this.index.getMaxElements(); + const needed = this.index.getCurrentCount() + vectors.length; + if (needed > capacity) { + this.index.resizeIndex(needed); + } + const docstoreSize = this.index.getCurrentCount(); + const toSave: Record = {}; + for (let i = 0; i < vectors.length; i += 1) { + this.index.addPoint(vectors[i], docstoreSize + i); + toSave[docstoreSize + i] = documents[i]; + } + this.docstore.add(toSave); + } + + /** + * Method to perform a similarity search in the vector store using a query + * vector. It returns the k most similar documents along with their + * similarity scores. An optional filter function can be provided to + * filter the documents. + * @param query The query vector. + * @param k The number of most similar documents to return. + * @param filter An optional filter function to filter the documents. + * @returns A Promise that resolves to an array of tuples, where each tuple contains a document and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ) { + if (this.args.numDimensions && !this._index) { + await this.initIndex([[]]); + } + if (query.length !== this.args.numDimensions) { + throw new Error( + `Query vector must have the same length as the number of dimensions (${this.args.numDimensions})` + ); + } + if (k > this.index.getCurrentCount()) { + const total = this.index.getCurrentCount(); + console.warn( + `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` + ); + // eslint-disable-next-line no-param-reassign + k = total; + } + const filterFunction = (label: number): boolean => { + if (!filter) { + return true; + } + const document = this.docstore.search(String(label)); + // eslint-disable-next-line no-instanceof/no-instanceof + if (typeof document !== "string") { + return filter(document); + } + return false; + }; + const result = this.index.searchKnn( + query, + k, + filter ? filterFunction : undefined + ); + return result.neighbors.map( + (docIndex, resultIndex) => + [ + this.docstore.search(String(docIndex)), + result.distances[resultIndex], + ] as [Document, number] + ); + } + + /** + * Method to delete the vector store from a directory. It deletes the + * hnswlib.index file, the docstore.json file, and the args.json file from + * the directory. + * @param params An object with a directory property that specifies the directory from which to delete the vector store. + * @returns A Promise that resolves when the vector store has been deleted. + */ + async delete(params: { directory: string }) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + try { + await fs.access(path.join(params.directory, "hnswlib.index")); + } catch (err) { + throw new Error( + `Directory ${params.directory} does not contain a hnswlib.index file.` + ); + } + + await Promise.all([ + await fs.rm(path.join(params.directory, "hnswlib.index"), { + force: true, + }), + await fs.rm(path.join(params.directory, "docstore.json"), { + force: true, + }), + await fs.rm(path.join(params.directory, "args.json"), { force: true }), + ]); + } + + /** + * Method to save the vector store to a directory. It saves the HNSW + * index, the arguments, and the document store to the directory. + * @param directory The directory to which to save the vector store. + * @returns A Promise that resolves when the vector store has been saved. + */ + async save(directory: string) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + await fs.mkdir(directory, { recursive: true }); + await Promise.all([ + this.index.writeIndex(path.join(directory, "hnswlib.index")), + await fs.writeFile( + path.join(directory, "args.json"), + JSON.stringify(this.args) + ), + await fs.writeFile( + path.join(directory, "docstore.json"), + JSON.stringify(Array.from(this.docstore._docs.entries())) + ), + ]); + } + + /** + * Static method to load a vector store from a directory. It reads the + * HNSW index, the arguments, and the document store from the directory, + * then creates a new HNSWLib instance with these values. + * @param directory The directory from which to load the vector store. + * @param embeddings The embeddings to be used by the HNSWLib instance. + * @returns A Promise that resolves to a new HNSWLib instance. + */ + static async load(directory: string, embeddings: Embeddings) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + const args = JSON.parse( + await fs.readFile(path.join(directory, "args.json"), "utf8") + ); + const index = await HNSWLib.getHierarchicalNSW(args); + const [docstoreFiles] = await Promise.all([ + fs + .readFile(path.join(directory, "docstore.json"), "utf8") + .then(JSON.parse), + index.readIndex(path.join(directory, "hnswlib.index")), + ]); + args.docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles)); + + args.index = index; + + return new HNSWLib(embeddings, args); + } + + /** + * Static method to create a new HNSWLib instance from texts and metadata. + * It creates a new Document instance for each text and metadata, then + * calls the fromDocuments method to create the HNSWLib instance. + * @param texts The texts to be used to create the documents. + * @param metadatas The metadata to be used to create the documents. + * @param embeddings The embeddings to be used by the HNSWLib instance. + * @param dbConfig An optional configuration object for the document store. + * @returns A Promise that resolves to a new HNSWLib instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return HNSWLib.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a new HNSWLib instance from documents. It + * creates a new HNSWLib instance, adds the documents to it, then returns + * the instance. + * @param docs The documents to be added to the HNSWLib instance. + * @param embeddings The embeddings to be used by the HNSWLib instance. + * @param dbConfig An optional configuration object for the document store. + * @returns A Promise that resolves to a new HNSWLib instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const args: HNSWLibArgs = { + docstore: dbConfig?.docstore, + space: "cosine", + }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + static async imports(): Promise<{ + HierarchicalNSW: typeof HierarchicalNSWT; + }> { + try { + const { + default: { HierarchicalNSW }, + } = await import("hnswlib-node"); + + return { HierarchicalNSW }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + throw new Error( + `Could not import hnswlib-node. Please install hnswlib-node as a dependency with, e.g. \`npm install -S hnswlib-node\`.\n\nError: ${err?.message}` + ); + } + } +} diff --git a/libs/langchain-community/src/vectorstores/momento_vector_index.ts b/libs/langchain-community/src/vectorstores/momento_vector_index.ts new file mode 100644 index 000000000000..e9623ee1a131 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/momento_vector_index.ts @@ -0,0 +1,402 @@ +/* eslint-disable no-instanceof/no-instanceof */ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { + ALL_VECTOR_METADATA, + IVectorIndexClient, + VectorIndexItem, + CreateVectorIndex, + VectorUpsertItemBatch, + VectorDeleteItemBatch, + VectorSearch, + VectorSearchAndFetchVectors, +} from "@gomomento/sdk-core"; +import * as uuid from "uuid"; +import { Document } from "@langchain/core/documents"; +import { Embeddings } from "@langchain/core/embeddings"; +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; + +export interface DocumentProps { + ids: string[]; +} + +export interface MomentoVectorIndexLibArgs { + /** + * The Momento Vector Index client. + */ + client: IVectorIndexClient; + /** + * The name of the index to use to store the data. + * Defaults to "default". + */ + indexName?: string; + /** + * The name of the metadata field to use to store the text of the document. + * Defaults to "text". + */ + textField?: string; + /** + * Whether to create the index if it does not already exist. + * Defaults to true. + */ + ensureIndexExists?: boolean; +} + +export interface DeleteProps { + /** + * The ids of the documents to delete. + */ + ids: string[]; +} + +/** + * A vector store that uses the Momento Vector Index. + * + * @remarks + * To sign up for a free Momento account, visit https://console.gomomento.com. + */ +export class MomentoVectorIndex extends VectorStore { + private client: IVectorIndexClient; + + private indexName: string; + + private textField: string; + + private _ensureIndexExists: boolean; + + _vectorstoreType(): string { + return "momento"; + } + + /** + * Creates a new `MomentoVectorIndex` instance. + * @param embeddings The embeddings instance to use to generate embeddings from documents. + * @param args The arguments to use to configure the vector store. + */ + constructor(embeddings: Embeddings, args: MomentoVectorIndexLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + this.client = args.client; + this.indexName = args.indexName ?? "default"; + this.textField = args.textField ?? "text"; + this._ensureIndexExists = args.ensureIndexExists ?? true; + } + + /** + * Returns the Momento Vector Index client. + * @returns The Momento Vector Index client. + */ + public getClient(): IVectorIndexClient { + return this.client; + } + + /** + * Creates the index if it does not already exist. + * @param numDimensions The number of dimensions of the vectors to be stored in the index. + * @returns Promise that resolves to true if the index was created, false if it already existed. + */ + private async ensureIndexExists(numDimensions: number): Promise { + const response = await this.client.createIndex( + this.indexName, + numDimensions + ); + if (response instanceof CreateVectorIndex.Success) { + return true; + } else if (response instanceof CreateVectorIndex.AlreadyExists) { + return false; + } else if (response instanceof CreateVectorIndex.Error) { + throw new Error(response.toString()); + } else { + throw new Error(`Unknown response type: ${response.toString()}`); + } + } + + /** + * Converts the documents to a format that can be stored in the index. + * + * This is necessary because the Momento Vector Index requires that the metadata + * be a map of strings to strings. + * @param vectors The vectors to convert. + * @param documents The documents to convert. + * @param ids The ids to convert. + * @returns The converted documents. + */ + private prepareItemBatch( + vectors: number[][], + documents: Document>[], + ids: string[] + ): VectorIndexItem[] { + return vectors.map((vector, idx) => ({ + id: ids[idx], + vector, + metadata: { + ...documents[idx].metadata, + [this.textField]: documents[idx].pageContent, + }, + })); + } + + /** + * Adds vectors to the index. + * + * @remarks If the index does not already exist, it will be created if `ensureIndexExists` is true. + * @param vectors The vectors to add to the index. + * @param documents The documents to add to the index. + * @param documentProps The properties of the documents to add to the index, specifically the ids. + * @returns Promise that resolves when the vectors have been added to the index. Also returns the ids of the + * documents that were added. + */ + public async addVectors( + vectors: number[][], + documents: Document>[], + documentProps?: DocumentProps + ): Promise { + if (vectors.length === 0) { + return; + } + + if (documents.length !== vectors.length) { + throw new Error( + `Number of vectors (${vectors.length}) does not equal number of documents (${documents.length})` + ); + } + + if (vectors.some((v) => v.length !== vectors[0].length)) { + throw new Error("All vectors must have the same length"); + } + + if ( + documentProps?.ids !== undefined && + documentProps.ids.length !== vectors.length + ) { + throw new Error( + `Number of ids (${ + documentProps?.ids?.length || "null" + }) does not equal number of vectors (${vectors.length})` + ); + } + + if (this._ensureIndexExists) { + await this.ensureIndexExists(vectors[0].length); + } + const documentIds = documentProps?.ids ?? documents.map(() => uuid.v4()); + + const batchSize = 128; + const numBatches = Math.ceil(vectors.length / batchSize); + + // Add each batch of vectors to the index + for (let i = 0; i < numBatches; i += 1) { + const [startIndex, endIndex] = [ + i * batchSize, + Math.min((i + 1) * batchSize, vectors.length), + ]; + + const batchVectors = vectors.slice(startIndex, endIndex); + const batchDocuments = documents.slice(startIndex, endIndex); + const batchDocumentIds = documentIds.slice(startIndex, endIndex); + + // Insert the items to the index + const response = await this.client.upsertItemBatch( + this.indexName, + this.prepareItemBatch(batchVectors, batchDocuments, batchDocumentIds) + ); + if (response instanceof VectorUpsertItemBatch.Success) { + // eslint-disable-next-line no-continue + continue; + } else if (response instanceof VectorUpsertItemBatch.Error) { + throw new Error(response.toString()); + } else { + throw new Error(`Unknown response type: ${response.toString()}`); + } + } + } + + /** + * Adds vectors to the index. Generates embeddings from the documents + * using the `Embeddings` instance passed to the constructor. + * @param documents Array of `Document` instances to be added to the index. + * @returns Promise that resolves when the documents have been added to the index. + */ + async addDocuments( + documents: Document[], + documentProps?: DocumentProps + ): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + await this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + documentProps + ); + } + + /** + * Deletes vectors from the index by id. + * @param params The parameters to use to delete the vectors, specifically the ids. + */ + public async delete(params: DeleteProps): Promise { + const response = await this.client.deleteItemBatch( + this.indexName, + params.ids + ); + if (response instanceof VectorDeleteItemBatch.Success) { + // pass + } else if (response instanceof VectorDeleteItemBatch.Error) { + throw new Error(response.toString()); + } else { + throw new Error(`Unknown response type: ${response.toString()}`); + } + } + + /** + * Searches the index for the most similar vectors to the query vector. + * @param query The query vector. + * @param k The number of results to return. + * @returns Promise that resolves to the documents of the most similar vectors + * to the query vector. + */ + public async similaritySearchVectorWithScore( + query: number[], + k: number + ): Promise<[Document>, number][]> { + const response = await this.client.search(this.indexName, query, { + topK: k, + metadataFields: ALL_VECTOR_METADATA, + }); + if (response instanceof VectorSearch.Success) { + if (response.hits === undefined) { + return []; + } + + return response.hits().map((hit) => [ + new Document({ + pageContent: hit.metadata[this.textField]?.toString() ?? "", + metadata: Object.fromEntries( + Object.entries(hit.metadata).filter( + ([key]) => key !== this.textField + ) + ), + }), + hit.score, + ]); + } else if (response instanceof VectorSearch.Error) { + throw new Error(response.toString()); + } else { + throw new Error(`Unknown response type: ${response.toString()}`); + } + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {this["FilterType"]} options.filter - Optional filter + * @param _callbacks + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const queryEmbedding = await this.embeddings.embedQuery(query); + const response = await this.client.searchAndFetchVectors( + this.indexName, + queryEmbedding, + { topK: options.fetchK ?? 20, metadataFields: ALL_VECTOR_METADATA } + ); + + if (response instanceof VectorSearchAndFetchVectors.Success) { + const hits = response.hits(); + + // Gather the embeddings of the search results + const embeddingList = hits.map((hit) => hit.vector); + + // Gather the ids of the most relevant results when applying MMR + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + options.lambda, + options.k + ); + + const finalResult = mmrIndexes.map((index) => { + const hit = hits[index]; + const { [this.textField]: pageContent, ...metadata } = hit.metadata; + return new Document({ metadata, pageContent: pageContent as string }); + }); + return finalResult; + } else if (response instanceof VectorSearchAndFetchVectors.Error) { + throw new Error(response.toString()); + } else { + throw new Error(`Unknown response type: ${response.toString()}`); + } + } + + /** + * Stores the documents in the index. + * + * Converts the documents to vectors using the `Embeddings` instance passed. + * @param texts The texts to store in the index. + * @param metadatas The metadata to store in the index. + * @param embeddings The embeddings instance to use to generate embeddings from the documents. + * @param dbConfig The configuration to use to instantiate the vector store. + * @param documentProps The properties of the documents to add to the index, specifically the ids. + * @returns Promise that resolves to the vector store. + */ + public static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: MomentoVectorIndexLibArgs, + documentProps?: DocumentProps + ): Promise { + if (Array.isArray(metadatas) && texts.length !== metadatas.length) { + throw new Error( + `Number of texts (${texts.length}) does not equal number of metadatas (${metadatas.length})` + ); + } + + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const metadata: object = Array.isArray(metadatas) + ? metadatas[i] + : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return await this.fromDocuments(docs, embeddings, dbConfig, documentProps); + } + + /** + * Stores the documents in the index. + * @param docs The documents to store in the index. + * @param embeddings The embeddings instance to use to generate embeddings from the documents. + * @param dbConfig The configuration to use to instantiate the vector store. + * @param documentProps The properties of the documents to add to the index, specifically the ids. + * @returns Promise that resolves to the vector store. + */ + public static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: MomentoVectorIndexLibArgs, + documentProps?: DocumentProps + ): Promise { + const vectorStore = new MomentoVectorIndex(embeddings, dbConfig); + await vectorStore.addDocuments(docs, documentProps); + return vectorStore; + } +} diff --git a/libs/langchain-community/src/vectorstores/mongodb_atlas.ts b/libs/langchain-community/src/vectorstores/mongodb_atlas.ts new file mode 100755 index 000000000000..10a56dc3f7df --- /dev/null +++ b/libs/langchain-community/src/vectorstores/mongodb_atlas.ts @@ -0,0 +1,282 @@ +import type { Collection, Document as MongoDBDocument } from "mongodb"; +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; + +/** + * Type that defines the arguments required to initialize the + * MongoDBAtlasVectorSearch class. It includes the MongoDB collection, + * index name, text key, and embedding key. + */ +export type MongoDBAtlasVectorSearchLibArgs = { + readonly collection: Collection; + readonly indexName?: string; + readonly textKey?: string; + readonly embeddingKey?: string; +}; + +/** + * Type that defines the filter used in the + * similaritySearchVectorWithScore and maxMarginalRelevanceSearch methods. + * It includes pre-filter, post-filter pipeline, and a flag to include + * embeddings. + */ +type MongoDBAtlasFilter = { + preFilter?: MongoDBDocument; + postFilterPipeline?: MongoDBDocument[]; + includeEmbeddings?: boolean; +} & MongoDBDocument; + +/** + * Class that is a wrapper around MongoDB Atlas Vector Search. It is used + * to store embeddings in MongoDB documents, create a vector search index, + * and perform K-Nearest Neighbors (KNN) search with an approximate + * nearest neighbor algorithm. + */ +export class MongoDBAtlasVectorSearch extends VectorStore { + declare FilterType: MongoDBAtlasFilter; + + private readonly collection: Collection; + + private readonly indexName: string; + + private readonly textKey: string; + + private readonly embeddingKey: string; + + _vectorstoreType(): string { + return "mongodb_atlas"; + } + + constructor(embeddings: Embeddings, args: MongoDBAtlasVectorSearchLibArgs) { + super(embeddings, args); + this.collection = args.collection; + this.indexName = args.indexName ?? "default"; + this.textKey = args.textKey ?? "text"; + this.embeddingKey = args.embeddingKey ?? "embedding"; + } + + /** + * Method to add vectors and their corresponding documents to the MongoDB + * collection. + * @param vectors Vectors to be added. + * @param documents Corresponding documents to be added. + * @returns Promise that resolves when the vectors and documents have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const docs = vectors.map((embedding, idx) => ({ + [this.textKey]: documents[idx].pageContent, + [this.embeddingKey]: embedding, + ...documents[idx].metadata, + })); + await this.collection.insertMany(docs); + } + + /** + * Method to add documents to the MongoDB collection. It first converts + * the documents to vectors using the embeddings and then calls the + * addVectors method. + * @param documents Documents to be added. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Method that performs a similarity search on the vectors stored in the + * MongoDB collection. It returns a list of documents and their + * corresponding similarity scores. + * @param query Query vector for the similarity search. + * @param k Number of nearest neighbors to return. + * @param filter Optional filter to be applied. + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: MongoDBAtlasFilter + ): Promise<[Document, number][]> { + const postFilterPipeline = filter?.postFilterPipeline ?? []; + const preFilter: MongoDBDocument | undefined = + filter?.preFilter || + filter?.postFilterPipeline || + filter?.includeEmbeddings + ? filter.preFilter + : filter; + const removeEmbeddingsPipeline = !filter?.includeEmbeddings + ? [ + { + $project: { + [this.embeddingKey]: 0, + }, + }, + ] + : []; + + const pipeline: MongoDBDocument[] = [ + { + $vectorSearch: { + queryVector: MongoDBAtlasVectorSearch.fixArrayPrecision(query), + index: this.indexName, + path: this.embeddingKey, + limit: k, + numCandidates: 10 * k, + ...(preFilter && { filter: preFilter }), + }, + }, + { + $set: { + score: { $meta: "vectorSearchScore" }, + }, + }, + ...removeEmbeddingsPipeline, + ...postFilterPipeline, + ]; + + const results = this.collection + .aggregate(pipeline) + .map<[Document, number]>((result) => { + const { score, [this.textKey]: text, ...metadata } = result; + return [new Document({ pageContent: text, metadata }), score]; + }); + + return results.toArray(); + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {MongoDBAtlasFilter} options.filter - Optional Atlas Search operator to pre-filter on document fields + * or post-filter following the knnBeta search. + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const { k, fetchK = 20, lambda = 0.5, filter } = options; + + const queryEmbedding = await this.embeddings.embedQuery(query); + + // preserve the original value of includeEmbeddings + const includeEmbeddingsFlag = options.filter?.includeEmbeddings || false; + + // update filter to include embeddings, as they will be used in MMR + const includeEmbeddingsFilter = { + ...filter, + includeEmbeddings: true, + }; + + const resultDocs = await this.similaritySearchVectorWithScore( + MongoDBAtlasVectorSearch.fixArrayPrecision(queryEmbedding), + fetchK, + includeEmbeddingsFilter + ); + + const embeddingList = resultDocs.map( + (doc) => doc[0].metadata[this.embeddingKey] + ); + + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + lambda, + k + ); + + return mmrIndexes.map((idx) => { + const doc = resultDocs[idx][0]; + + // remove embeddings if they were not requested originally + if (!includeEmbeddingsFlag) { + delete doc.metadata[this.embeddingKey]; + } + return doc; + }); + } + + /** + * Static method to create an instance of MongoDBAtlasVectorSearch from a + * list of texts. It first converts the texts to vectors and then adds + * them to the MongoDB collection. + * @param texts List of texts to be converted to vectors. + * @param metadatas Metadata for the texts. + * @param embeddings Embeddings to be used for conversion. + * @param dbConfig Database configuration for MongoDB Atlas. + * @returns Promise that resolves to a new instance of MongoDBAtlasVectorSearch. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: MongoDBAtlasVectorSearchLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return MongoDBAtlasVectorSearch.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create an instance of MongoDBAtlasVectorSearch from a + * list of documents. It first converts the documents to vectors and then + * adds them to the MongoDB collection. + * @param docs List of documents to be converted to vectors. + * @param embeddings Embeddings to be used for conversion. + * @param dbConfig Database configuration for MongoDB Atlas. + * @returns Promise that resolves to a new instance of MongoDBAtlasVectorSearch. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: MongoDBAtlasVectorSearchLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to fix the precision of the array that ensures that + * every number in this array is always float when casted to other types. + * This is needed since MongoDB Atlas Vector Search does not cast integer + * inside vector search to float automatically. + * This method shall introduce a hint of error but should be safe to use + * since introduced error is very small, only applies to integer numbers + * returned by embeddings, and most embeddings shall not have precision + * as high as 15 decimal places. + * @param array Array of number to be fixed. + * @returns + */ + static fixArrayPrecision(array: number[]) { + return array.map((value) => { + if (Number.isInteger(value)) { + return value + 0.000000000000001; + } + return value; + }); + } +} diff --git a/libs/langchain-community/src/vectorstores/pgvector.ts b/libs/langchain-community/src/vectorstores/pgvector.ts new file mode 100644 index 000000000000..6755e365c75c --- /dev/null +++ b/libs/langchain-community/src/vectorstores/pgvector.ts @@ -0,0 +1,440 @@ +import pg, { type Pool, type PoolClient, type PoolConfig } from "pg"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; + +type Metadata = Record; + +/** + * Interface that defines the arguments required to create a + * `PGVectorStore` instance. It includes Postgres connection options, + * table name, filter, and verbosity level. + */ +export interface PGVectorStoreArgs { + postgresConnectionOptions: PoolConfig; + tableName: string; + collectionTableName?: string; + collectionName?: string; + collectionMetadata?: Metadata | null; + columns?: { + idColumnName?: string; + vectorColumnName?: string; + contentColumnName?: string; + metadataColumnName?: string; + }; + filter?: Metadata; + verbose?: boolean; + /** + * The amount of documents to chunk by when + * adding vectors. + * @default 500 + */ + chunkSize?: number; +} + +/** + * Class that provides an interface to a Postgres vector database. It + * extends the `VectorStore` base class and implements methods for adding + * documents and vectors, performing similarity searches, and ensuring the + * existence of a table in the database. + */ +export class PGVectorStore extends VectorStore { + declare FilterType: Metadata; + + tableName: string; + + collectionTableName?: string; + + collectionName = "langchain"; + + collectionMetadata: Metadata | null; + + idColumnName: string; + + vectorColumnName: string; + + contentColumnName: string; + + metadataColumnName: string; + + filter?: Metadata; + + _verbose?: boolean; + + pool: Pool; + + client?: PoolClient; + + chunkSize = 500; + + _vectorstoreType(): string { + return "pgvector"; + } + + private constructor(embeddings: Embeddings, config: PGVectorStoreArgs) { + super(embeddings, config); + this.tableName = config.tableName; + this.collectionTableName = config.collectionTableName; + this.collectionName = config.collectionName ?? "langchain"; + this.collectionMetadata = config.collectionMetadata ?? null; + this.filter = config.filter; + + this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding"; + this.contentColumnName = config.columns?.contentColumnName ?? "text"; + this.idColumnName = config.columns?.idColumnName ?? "id"; + this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata"; + + const pool = new pg.Pool(config.postgresConnectionOptions); + this.pool = pool; + this.chunkSize = config.chunkSize ?? 500; + + this._verbose = + getEnvironmentVariable("LANGCHAIN_VERBOSE") === "true" ?? + !!config.verbose; + } + + /** + * Static method to create a new `PGVectorStore` instance from a + * connection. It creates a table if one does not exist, and calls + * `connect` to return a new instance of `PGVectorStore`. + * + * @param embeddings - Embeddings instance. + * @param fields - `PGVectorStoreArgs` instance. + * @returns A new instance of `PGVectorStore`. + */ + static async initialize( + embeddings: Embeddings, + config: PGVectorStoreArgs + ): Promise { + const postgresqlVectorStore = new PGVectorStore(embeddings, config); + + await postgresqlVectorStore._initializeClient(); + await postgresqlVectorStore.ensureTableInDatabase(); + if (postgresqlVectorStore.collectionTableName) { + await postgresqlVectorStore.ensureCollectionTableInDatabase(); + } + + return postgresqlVectorStore; + } + + protected async _initializeClient() { + this.client = await this.pool.connect(); + } + + /** + * Method to add documents to the vector store. It converts the documents into + * vectors, and adds them to the store. + * + * @param documents - Array of `Document` instances. + * @returns Promise that resolves when the documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Inserts a row for the collectionName provided at initialization if it does not + * exist and returns the collectionId. + * + * @returns The collectionId for the given collectionName. + */ + async getOrCreateCollection(): Promise { + const queryString = ` + SELECT uuid from ${this.collectionTableName} + WHERE name = $1; + `; + const queryResult = await this.pool.query(queryString, [ + this.collectionName, + ]); + let collectionId = queryResult.rows[0]?.uuid; + + if (!collectionId) { + const insertString = ` + INSERT INTO ${this.collectionTableName}( + uuid, + name, + cmetadata + ) + VALUES ( + uuid_generate_v4(), + $1, + $2 + ) + RETURNING uuid; + `; + const insertResult = await this.pool.query(insertString, [ + this.collectionName, + this.collectionMetadata, + ]); + collectionId = insertResult.rows[0]?.uuid; + } + + return collectionId; + } + + /** + * Generates the SQL placeholders for a specific row at the provided index. + * + * @param index - The index of the row for which placeholders need to be generated. + * @param numOfColumns - The number of columns we are inserting data into. + * @returns The SQL placeholders for the row values. + */ + private generatePlaceholderForRowAt( + index: number, + numOfColumns: number + ): string { + const placeholders = []; + for (let i = 0; i < numOfColumns; i += 1) { + placeholders.push(`$${index * numOfColumns + i + 1}`); + } + return `(${placeholders.join(", ")})`; + } + + /** + * Constructs the SQL query for inserting rows into the specified table. + * + * @param rows - The rows of data to be inserted, consisting of values and records. + * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. + * @returns The complete SQL INSERT INTO query string. + */ + private async buildInsertQuery(rows: (string | Record)[][]) { + let collectionId; + if (this.collectionTableName) { + collectionId = await this.getOrCreateCollection(); + } + + const columns = [ + this.contentColumnName, + this.vectorColumnName, + this.metadataColumnName, + ]; + + if (collectionId) { + columns.push("collection_id"); + } + + const valuesPlaceholders = rows + .map((_, j) => this.generatePlaceholderForRowAt(j, columns.length)) + .join(", "); + + const text = ` + INSERT INTO ${this.tableName}( + ${columns} + ) + VALUES ${valuesPlaceholders} + `; + return text; + } + + /** + * Method to add vectors to the vector store. It converts the vectors into + * rows and inserts them into the database. + * + * @param vectors - Array of vectors. + * @param documents - Array of `Document` instances. + * @returns Promise that resolves when the vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const rows = []; + let collectionId; + if (this.collectionTableName) { + collectionId = await this.getOrCreateCollection(); + } + + for (let i = 0; i < vectors.length; i += 1) { + const values = []; + const embedding = vectors[i]; + const embeddingString = `[${embedding.join(",")}]`; + values.push( + documents[i].pageContent, + embeddingString, + documents[i].metadata + ); + if (collectionId) { + values.push(collectionId); + } + rows.push(values); + } + + for (let i = 0; i < rows.length; i += this.chunkSize) { + const chunk = rows.slice(i, i + this.chunkSize); + const insertQuery = await this.buildInsertQuery(chunk); + const flatValues = chunk.flat(); + try { + await this.pool.query(insertQuery, flatValues); + } catch (e) { + console.error(e); + throw new Error(`Error inserting: ${(e as Error).message}`); + } + } + } + + /** + * Method to perform a similarity search in the vector store. It returns + * the `k` most similar documents to the query vector, along with their + * similarity scores. + * + * @param query - Query vector. + * @param k - Number of most similar documents to return. + * @param filter - Optional filter to apply to the search. + * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const embeddingString = `[${query.join(",")}]`; + const _filter = filter ?? "{}"; + let collectionId; + if (this.collectionTableName) { + collectionId = await this.getOrCreateCollection(); + } + + const parameters = [embeddingString, _filter, k]; + if (collectionId) { + parameters.push(collectionId); + } + + const queryString = ` + SELECT *, ${this.vectorColumnName} <=> $1 as "_distance" + FROM ${this.tableName} + WHERE ${this.metadataColumnName}::jsonb @> $2 + ${collectionId ? "AND collection_id = $4" : ""} + ORDER BY "_distance" ASC + LIMIT $3; + `; + + const documents = (await this.pool.query(queryString, parameters)).rows; + + const results = [] as [Document, number][]; + for (const doc of documents) { + if (doc._distance != null && doc[this.contentColumnName] != null) { + const document = new Document({ + pageContent: doc[this.contentColumnName], + metadata: doc[this.metadataColumnName], + }); + results.push([document, doc._distance]); + } + } + return results; + } + + /** + * Method to ensure the existence of the table in the database. It creates + * the table if it does not already exist. + * + * @returns Promise that resolves when the table has been ensured. + */ + async ensureTableInDatabase(): Promise { + await this.pool.query("CREATE EXTENSION IF NOT EXISTS vector;"); + await this.pool.query('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";'); + + await this.pool.query(` + CREATE TABLE IF NOT EXISTS ${this.tableName} ( + "${this.idColumnName}" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, + "${this.contentColumnName}" text, + "${this.metadataColumnName}" jsonb, + "${this.vectorColumnName}" vector + ); + `); + } + + /** + * Method to ensure the existence of the collection table in the database. + * It creates the table if it does not already exist. + * + * @returns Promise that resolves when the collection table has been ensured. + */ + async ensureCollectionTableInDatabase(): Promise { + try { + await this.pool.query(` + CREATE TABLE IF NOT EXISTS ${this.collectionTableName} ( + uuid uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, + name character varying, + cmetadata jsonb + ); + + ALTER TABLE ${this.tableName} + ADD COLUMN collection_id uuid; + + ALTER TABLE ${this.tableName} + ADD CONSTRAINT ${this.tableName}_collection_id_fkey + FOREIGN KEY (collection_id) + REFERENCES ${this.collectionTableName}(uuid) + ON DELETE CASCADE; + `); + } catch (e) { + if (!(e as Error).message.includes("already exists")) { + console.error(e); + throw new Error(`Error adding column: ${(e as Error).message}`); + } + } + } + + /** + * Static method to create a new `PGVectorStore` instance from an + * array of texts and their metadata. It converts the texts into + * `Document` instances and adds them to the store. + * + * @param texts - Array of texts. + * @param metadatas - Array of metadata objects or a single metadata object. + * @param embeddings - Embeddings instance. + * @param dbConfig - `PGVectorStoreArgs` instance. + * @returns Promise that resolves with a new instance of `PGVectorStore`. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: PGVectorStoreArgs + ): Promise { + const docs = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + return PGVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a new `PGVectorStore` instance from an + * array of `Document` instances. It adds the documents to the store. + * + * @param docs - Array of `Document` instances. + * @param embeddings - Embeddings instance. + * @param dbConfig - `PGVectorStoreArgs` instance. + * @returns Promise that resolves with a new instance of `PGVectorStore`. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: PGVectorStoreArgs + ): Promise { + const instance = await PGVectorStore.initialize(embeddings, dbConfig); + await instance.addDocuments(docs); + + return instance; + } + + /** + * Closes all the clients in the pool and terminates the pool. + * + * @returns Promise that resolves when all clients are closed and the pool is terminated. + */ + async end(): Promise { + this.client?.release(); + return this.pool.end(); + } +} diff --git a/libs/langchain-community/src/vectorstores/pinecone.ts b/libs/langchain-community/src/vectorstores/pinecone.ts new file mode 100644 index 000000000000..e368fe1670e9 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/pinecone.ts @@ -0,0 +1,366 @@ +/* eslint-disable no-process-env */ +import * as uuid from "uuid"; +import flatten from "flat"; + +import { + RecordMetadata, + PineconeRecord, + Index as PineconeIndex, +} from "@pinecone-database/pinecone"; + +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { + AsyncCaller, + AsyncCallerParams, +} from "@langchain/core/utils/async_caller"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; +import { chunkArray } from "../utils/chunk.js"; + +// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any +type PineconeMetadata = Record; + +export interface PineconeLibArgs extends AsyncCallerParams { + pineconeIndex: PineconeIndex; + textKey?: string; + namespace?: string; + filter?: PineconeMetadata; +} + +/** + * Type that defines the parameters for the delete operation in the + * PineconeStore class. It includes ids, filter, deleteAll flag, and namespace. + */ +export type PineconeDeleteParams = { + ids?: string[]; + deleteAll?: boolean; + filter?: object; + namespace?: string; +}; + +/** + * Class that extends the VectorStore class and provides methods to + * interact with the Pinecone vector database. + */ +export class PineconeStore extends VectorStore { + declare FilterType: PineconeMetadata; + + textKey: string; + + namespace?: string; + + pineconeIndex: PineconeIndex; + + filter?: PineconeMetadata; + + caller: AsyncCaller; + + _vectorstoreType(): string { + return "pinecone"; + } + + constructor(embeddings: Embeddings, args: PineconeLibArgs) { + super(embeddings, args); + + this.embeddings = embeddings; + const { namespace, pineconeIndex, textKey, filter, ...asyncCallerArgs } = + args; + this.namespace = namespace; + this.pineconeIndex = pineconeIndex; + this.textKey = textKey ?? "text"; + this.filter = filter; + this.caller = new AsyncCaller(asyncCallerArgs); + } + + /** + * Method that adds documents to the Pinecone database. + * @param documents Array of documents to add to the Pinecone database. + * @param options Optional ids for the documents. + * @returns Promise that resolves with the ids of the added documents. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] } | string[] + ) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Method that adds vectors to the Pinecone database. + * @param vectors Array of vectors to add to the Pinecone database. + * @param documents Array of documents associated with the vectors. + * @param options Optional ids for the vectors. + * @returns Promise that resolves with the ids of the added vectors. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } | string[] + ) { + const ids = Array.isArray(options) ? options : options?.ids; + const documentIds = ids == null ? documents.map(() => uuid.v4()) : ids; + const pineconeVectors = vectors.map((values, idx) => { + // Pinecone doesn't support nested objects, so we flatten them + const documentMetadata = { ...documents[idx].metadata }; + // preserve string arrays which are allowed + const stringArrays: Record = {}; + for (const key of Object.keys(documentMetadata)) { + if ( + Array.isArray(documentMetadata[key]) && + // eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any + documentMetadata[key].every((el: any) => typeof el === "string") + ) { + stringArrays[key] = documentMetadata[key]; + delete documentMetadata[key]; + } + } + const metadata: { + [key: string]: string | number | boolean | string[] | null; + } = { + ...flatten(documentMetadata), + ...stringArrays, + [this.textKey]: documents[idx].pageContent, + }; + // Pinecone doesn't support null values, so we remove them + for (const key of Object.keys(metadata)) { + if (metadata[key] == null) { + delete metadata[key]; + } else if ( + typeof metadata[key] === "object" && + Object.keys(metadata[key] as unknown as object).length === 0 + ) { + delete metadata[key]; + } + } + + return { + id: documentIds[idx], + metadata, + values, + } as PineconeRecord; + }); + + const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); + // Pinecone recommends a limit of 100 vectors per upsert request + const chunkSize = 100; + const chunkedVectors = chunkArray(pineconeVectors, chunkSize); + const batchRequests = chunkedVectors.map((chunk) => + this.caller.call(async () => namespace.upsert(chunk)) + ); + + await Promise.all(batchRequests); + + return documentIds; + } + + /** + * Method that deletes vectors from the Pinecone database. + * @param params Parameters for the delete operation. + * @returns Promise that resolves when the delete operation is complete. + */ + async delete(params: PineconeDeleteParams): Promise { + const { deleteAll, ids, filter } = params; + const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); + + if (deleteAll) { + await namespace.deleteAll(); + } else if (ids) { + const batchSize = 1000; + for (let i = 0; i < ids.length; i += batchSize) { + const batchIds = ids.slice(i, i + batchSize); + await namespace.deleteMany(batchIds); + } + } else if (filter) { + await namespace.deleteMany(filter); + } else { + throw new Error("Either ids or delete_all must be provided."); + } + } + + protected async _runPineconeQuery( + query: number[], + k: number, + filter?: PineconeMetadata, + options?: { includeValues: boolean } + ) { + if (filter && this.filter) { + throw new Error("cannot provide both `filter` and `this.filter`"); + } + const _filter = filter ?? this.filter; + const namespace = this.pineconeIndex.namespace(this.namespace ?? ""); + + const results = await namespace.query({ + includeMetadata: true, + topK: k, + vector: query, + filter: _filter, + ...options, + }); + + return results; + } + + /** + * Method that performs a similarity search in the Pinecone database and + * returns the results along with their scores. + * @param query Query vector for the similarity search. + * @param k Number of top results to return. + * @param filter Optional filter to apply to the search. + * @returns Promise that resolves with an array of documents and their scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: PineconeMetadata + ): Promise<[Document, number][]> { + const results = await this._runPineconeQuery(query, k, filter); + const result: [Document, number][] = []; + + if (results.matches) { + for (const res of results.matches) { + const { [this.textKey]: pageContent, ...metadata } = (res.metadata ?? + {}) as PineconeMetadata; + if (res.score) { + result.push([new Document({ metadata, pageContent }), res.score]); + } + } + } + + return result; + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK=20 - Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {PineconeMetadata} options.filter - Optional filter to apply to the search. + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const queryEmbedding = await this.embeddings.embedQuery(query); + + const results = await this._runPineconeQuery( + queryEmbedding, + options.fetchK ?? 20, + options.filter, + { includeValues: true } + ); + + const matches = results?.matches ?? []; + const embeddingList = matches.map((match) => match.values); + + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + options.lambda, + options.k + ); + + const topMmrMatches = mmrIndexes.map((idx) => matches[idx]); + + const finalResult: Document[] = []; + for (const res of topMmrMatches) { + const { [this.textKey]: pageContent, ...metadata } = (res.metadata ?? + {}) as PineconeMetadata; + if (res.score) { + finalResult.push(new Document({ metadata, pageContent })); + } + } + + return finalResult; + } + + /** + * Static method that creates a new instance of the PineconeStore class + * from texts. + * @param texts Array of texts to add to the Pinecone database. + * @param metadatas Metadata associated with the texts. + * @param embeddings Embeddings to use for the texts. + * @param dbConfig Configuration for the Pinecone database. + * @returns Promise that resolves with a new instance of the PineconeStore class. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: + | { + pineconeIndex: PineconeIndex; + textKey?: string; + namespace?: string | undefined; + } + | PineconeLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + + const args: PineconeLibArgs = { + pineconeIndex: dbConfig.pineconeIndex, + textKey: dbConfig.textKey, + namespace: dbConfig.namespace, + }; + return PineconeStore.fromDocuments(docs, embeddings, args); + } + + /** + * Static method that creates a new instance of the PineconeStore class + * from documents. + * @param docs Array of documents to add to the Pinecone database. + * @param embeddings Embeddings to use for the documents. + * @param dbConfig Configuration for the Pinecone database. + * @returns Promise that resolves with a new instance of the PineconeStore class. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: PineconeLibArgs + ): Promise { + const args = dbConfig; + args.textKey = dbConfig.textKey ?? "text"; + + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method that creates a new instance of the PineconeStore class + * from an existing index. + * @param embeddings Embeddings to use for the documents. + * @param dbConfig Configuration for the Pinecone database. + * @returns Promise that resolves with a new instance of the PineconeStore class. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: PineconeLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/supabase.ts b/libs/langchain-community/src/vectorstores/supabase.ts new file mode 100644 index 000000000000..5d8e9a5d67cb --- /dev/null +++ b/libs/langchain-community/src/vectorstores/supabase.ts @@ -0,0 +1,313 @@ +import type { SupabaseClient } from "@supabase/supabase-js"; +import type { PostgrestFilterBuilder } from "@supabase/postgrest-js"; +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; + +/** + * Interface for the parameters required for searching embeddings. + */ +interface SearchEmbeddingsParams { + query_embedding: number[]; + match_count: number; // int + filter?: SupabaseMetadata | SupabaseFilterRPCCall; +} + +// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any +export type SupabaseMetadata = Record; +// eslint-disable-next-line @typescript-eslint/ban-types, @typescript-eslint/no-explicit-any +export type SupabaseFilter = PostgrestFilterBuilder; +export type SupabaseFilterRPCCall = (rpcCall: SupabaseFilter) => SupabaseFilter; + +/** + * Interface for the response returned when searching embeddings. + */ +interface SearchEmbeddingsResponse { + id: number; + content: string; + metadata: object; + embedding: number[]; + similarity: number; +} + +/** + * Interface for the arguments required to initialize a Supabase library. + */ +export interface SupabaseLibArgs { + client: SupabaseClient; + tableName?: string; + queryName?: string; + filter?: SupabaseMetadata | SupabaseFilterRPCCall; + upsertBatchSize?: number; +} + +/** + * Class for interacting with a Supabase database to store and manage + * vectors. + */ +export class SupabaseVectorStore extends VectorStore { + declare FilterType: SupabaseMetadata | SupabaseFilterRPCCall; + + client: SupabaseClient; + + tableName: string; + + queryName: string; + + filter?: SupabaseMetadata | SupabaseFilterRPCCall; + + upsertBatchSize = 500; + + _vectorstoreType(): string { + return "supabase"; + } + + constructor(embeddings: Embeddings, args: SupabaseLibArgs) { + super(embeddings, args); + + this.client = args.client; + this.tableName = args.tableName || "documents"; + this.queryName = args.queryName || "match_documents"; + this.filter = args.filter; + this.upsertBatchSize = args.upsertBatchSize ?? this.upsertBatchSize; + } + + /** + * Adds documents to the vector store. + * @param documents The documents to add. + * @param options Optional parameters for adding the documents. + * @returns A promise that resolves when the documents have been added. + */ + async addDocuments( + documents: Document[], + options?: { ids?: string[] | number[] } + ) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents, + options + ); + } + + /** + * Adds vectors to the vector store. + * @param vectors The vectors to add. + * @param documents The documents associated with the vectors. + * @param options Optional parameters for adding the vectors. + * @returns A promise that resolves with the IDs of the added vectors when the vectors have been added. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] | number[] } + ) { + const rows = vectors.map((embedding, idx) => ({ + content: documents[idx].pageContent, + embedding, + metadata: documents[idx].metadata, + })); + + // upsert returns 500/502/504 (yes really any of them) if given too many rows/characters + // ~2000 trips it, but my data is probably smaller than average pageContent and metadata + let returnedIds: string[] = []; + for (let i = 0; i < rows.length; i += this.upsertBatchSize) { + const chunk = rows.slice(i, i + this.upsertBatchSize).map((row, j) => { + if (options?.ids) { + return { id: options.ids[i + j], ...row }; + } + return row; + }); + + const res = await this.client.from(this.tableName).upsert(chunk).select(); + if (res.error) { + throw new Error( + `Error inserting: ${res.error.message} ${res.status} ${res.statusText}` + ); + } + if (res.data) { + returnedIds = returnedIds.concat(res.data.map((row) => row.id)); + } + } + return returnedIds; + } + + /** + * Deletes vectors from the vector store. + * @param params The parameters for deleting vectors. + * @returns A promise that resolves when the vectors have been deleted. + */ + async delete(params: { ids: string[] | number[] }): Promise { + const { ids } = params; + for (const id of ids) { + await this.client.from(this.tableName).delete().eq("id", id); + } + } + + protected async _searchSupabase( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise { + if (filter && this.filter) { + throw new Error("cannot provide both `filter` and `this.filter`"); + } + const _filter = filter ?? this.filter ?? {}; + const matchDocumentsParams: Partial = { + query_embedding: query, + }; + + let filterFunction: SupabaseFilterRPCCall; + + if (typeof _filter === "function") { + filterFunction = (rpcCall) => _filter(rpcCall).limit(k); + } else if (typeof _filter === "object") { + matchDocumentsParams.filter = _filter; + matchDocumentsParams.match_count = k; + filterFunction = (rpcCall) => rpcCall; + } else { + throw new Error("invalid filter type"); + } + + const rpcCall = this.client.rpc(this.queryName, matchDocumentsParams); + + const { data: searches, error } = await filterFunction(rpcCall); + + if (error) { + throw new Error( + `Error searching for documents: ${error.code} ${error.message} ${error.details}` + ); + } + + return searches; + } + + /** + * Performs a similarity search on the vector store. + * @param query The query vector. + * @param k The number of results to return. + * @param filter Optional filter to apply to the search. + * @returns A promise that resolves with the search results when the search is complete. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const searches = await this._searchSupabase(query, k, filter); + const result: [Document, number][] = searches.map((resp) => [ + new Document({ + metadata: resp.metadata, + pageContent: resp.content, + }), + resp.similarity, + ]); + + return result; + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {SupabaseLibArgs} options.filter - Optional filter to apply to the search. + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const queryEmbedding = await this.embeddings.embedQuery(query); + + const searches = await this._searchSupabase( + queryEmbedding, + options.fetchK ?? 20, + options.filter + ); + + const embeddingList = searches.map((searchResp) => searchResp.embedding); + + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + options.lambda, + options.k + ); + + return mmrIndexes.map( + (idx) => + new Document({ + metadata: searches[idx].metadata, + pageContent: searches[idx].content, + }) + ); + } + + /** + * Creates a new SupabaseVectorStore instance from an array of texts. + * @param texts The texts to create documents from. + * @param metadatas The metadata for the documents. + * @param embeddings The embeddings to use. + * @param dbConfig The configuration for the Supabase database. + * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig: SupabaseLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return SupabaseVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Creates a new SupabaseVectorStore instance from an array of documents. + * @param docs The documents to create the instance from. + * @param embeddings The embeddings to use. + * @param dbConfig The configuration for the Supabase database. + * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig: SupabaseLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Creates a new SupabaseVectorStore instance from an existing index. + * @param embeddings The embeddings to use. + * @param dbConfig The configuration for the Supabase database. + * @returns A promise that resolves with a new SupabaseVectorStore instance when the instance has been created. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig: SupabaseLibArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + return instance; + } +} diff --git a/langchain/src/vectorstores/tests/analyticdb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/analyticdb.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts index 4607dc83ec8b..d472ae18136d 100644 --- a/langchain/src/vectorstores/tests/analyticdb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts @@ -3,8 +3,8 @@ import { test } from "@jest/globals"; import { AnalyticDBVectorStore } from "../analyticdb.js"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; const connectionOptions = { host: process.env.ANALYTICDB_HOST || "localhost", diff --git a/langchain/src/vectorstores/tests/cassandra.int.test.ts b/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/cassandra.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts index 66ceb4198c52..d9d4df2d3a3d 100644 --- a/langchain/src/vectorstores/tests/cassandra.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/cassandra.int.test.ts @@ -2,9 +2,9 @@ import { test, expect, describe } from "@jest/globals"; import { Client } from "cassandra-driver"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { CassandraStore } from "../cassandra.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; const cassandraConfig = { cloud: { diff --git a/langchain/src/vectorstores/tests/chroma.int.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/chroma.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts index 7c440c1a1095..af9da7661dc6 100644 --- a/langchain/src/vectorstores/tests/chroma.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/chroma.int.test.ts @@ -4,9 +4,9 @@ import { beforeEach, describe, expect, test } from "@jest/globals"; import { ChromaClient } from "chromadb"; import { faker } from "@faker-js/faker"; import * as uuid from "uuid"; -import { Document } from "../../document.js"; +import { Document } from "@langchain/core/documents"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { Chroma } from "../chroma.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; describe.skip("Chroma", () => { let chromaStore: Chroma; diff --git a/langchain/src/vectorstores/tests/chroma.test.ts b/libs/langchain-community/src/vectorstores/tests/chroma.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/chroma.test.ts rename to libs/langchain-community/src/vectorstores/tests/chroma.test.ts index 3ac187331015..25b78e436c0a 100644 --- a/langchain/src/vectorstores/tests/chroma.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/chroma.test.ts @@ -3,7 +3,7 @@ import { jest, test, expect } from "@jest/globals"; import { type Collection } from "chromadb"; import { Chroma } from "../chroma.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; const mockCollection = { count: jest.fn().mockResolvedValue(5), diff --git a/langchain/src/vectorstores/tests/clickhouse.int.test.ts b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/clickhouse.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts index 36c68d275940..ba3e51ad3669 100644 --- a/langchain/src/vectorstores/tests/clickhouse.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/clickhouse.int.test.ts @@ -1,10 +1,10 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { ClickHouseStore } from "../clickhouse.js"; // Import OpenAIEmbeddings if you have a valid OpenAI API key import { HuggingFaceInferenceEmbeddings } from "../../embeddings/hf.js"; -import { Document } from "../../document.js"; test.skip("ClickHouseStore.fromText", async () => { const vectorStore = await ClickHouseStore.fromTexts( diff --git a/langchain/src/vectorstores/tests/closevector_node.int.test.ts b/libs/langchain-community/src/vectorstores/tests/closevector_node.int.test.ts similarity index 89% rename from langchain/src/vectorstores/tests/closevector_node.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/closevector_node.int.test.ts index b5718dc34015..6c7d05d30c2b 100644 --- a/langchain/src/vectorstores/tests/closevector_node.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/closevector_node.int.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { CloseVectorNode } from "../closevector/node.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { getEnvironmentVariable } from "../../util/env.js"; test.skip("Test CloseVectorNode.fromTexts + addVectors", async () => { const key = getEnvironmentVariable("CLOSEVECTOR_API_KEY"); diff --git a/langchain/src/vectorstores/tests/closevector_node.test.ts b/libs/langchain-community/src/vectorstores/tests/closevector_node.test.ts similarity index 93% rename from langchain/src/vectorstores/tests/closevector_node.test.ts rename to libs/langchain-community/src/vectorstores/tests/closevector_node.test.ts index 207b9ea4879d..b40ea928d2fc 100644 --- a/langchain/src/vectorstores/tests/closevector_node.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/closevector_node.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { CloseVectorNode } from "../closevector/node.js"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; test("Test CloseVectorNode.fromTexts + addVectors", async () => { const vectorStore = await CloseVectorNode.fromTexts( diff --git a/langchain/src/vectorstores/tests/convex.int.test.ts b/libs/langchain-community/src/vectorstores/tests/convex.int.test.ts similarity index 100% rename from langchain/src/vectorstores/tests/convex.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/convex.int.test.ts diff --git a/langchain/src/vectorstores/tests/convex/convex/_generated/api.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/_generated/api.d.ts rename to libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.d.ts diff --git a/langchain/src/vectorstores/tests/convex/convex/_generated/api.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/_generated/api.js rename to libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/api.js diff --git a/langchain/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts rename to libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/dataModel.d.ts diff --git a/langchain/src/vectorstores/tests/convex/convex/_generated/server.d.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/_generated/server.d.ts rename to libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.d.ts diff --git a/langchain/src/vectorstores/tests/convex/convex/_generated/server.js b/libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/_generated/server.js rename to libs/langchain-community/src/vectorstores/tests/convex/convex/_generated/server.js diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts new file mode 100644 index 000000000000..02d53f0c4aff --- /dev/null +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/langchain/db.ts @@ -0,0 +1 @@ +export * from "../../../../../utils/convex.js"; diff --git a/langchain/src/vectorstores/tests/convex/convex/lib.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts similarity index 94% rename from langchain/src/vectorstores/tests/convex/convex/lib.ts rename to libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts index d0cdbb922acc..a47d55a4dce4 100644 --- a/langchain/src/vectorstores/tests/convex/convex/lib.ts +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts @@ -1,6 +1,6 @@ // eslint-disable-next-line import/no-extraneous-dependencies import { v } from "convex/values"; -import { OpenAIEmbeddings } from "../../../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { ConvexVectorStore } from "../../../convex.js"; import { action, mutation } from "./_generated/server.js"; diff --git a/langchain/src/vectorstores/tests/convex/convex/schema.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts similarity index 100% rename from langchain/src/vectorstores/tests/convex/convex/schema.ts rename to libs/langchain-community/src/vectorstores/tests/convex/convex/schema.ts diff --git a/langchain/src/vectorstores/tests/convex/package.json b/libs/langchain-community/src/vectorstores/tests/convex/package.json similarity index 100% rename from langchain/src/vectorstores/tests/convex/package.json rename to libs/langchain-community/src/vectorstores/tests/convex/package.json diff --git a/langchain/src/vectorstores/tests/elasticsearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/elasticsearch.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts index 4aa3be383bfc..7fc968331b9a 100644 --- a/langchain/src/vectorstores/tests/elasticsearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/elasticsearch.int.test.ts @@ -1,9 +1,9 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; import { Client, ClientOptions } from "@elastic/elasticsearch"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { ElasticVectorSearch } from "../elasticsearch.js"; -import { Document } from "../../document.js"; describe("ElasticVectorSearch", () => { let store: ElasticVectorSearch; diff --git a/langchain/src/vectorstores/tests/faiss.int.test.data/faiss.int.test.py b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss.int.test.py similarity index 100% rename from langchain/src/vectorstores/tests/faiss.int.test.data/faiss.int.test.py rename to libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss.int.test.py diff --git a/langchain/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.faiss b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.faiss similarity index 100% rename from langchain/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.faiss rename to libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.faiss diff --git a/langchain/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.pkl b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.pkl similarity index 100% rename from langchain/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.pkl rename to libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/faiss_index/index.pkl diff --git a/langchain/src/vectorstores/tests/faiss.int.test.data/requirements.txt b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/requirements.txt similarity index 100% rename from langchain/src/vectorstores/tests/faiss.int.test.data/requirements.txt rename to libs/langchain-community/src/vectorstores/tests/faiss.int.test.data/requirements.txt diff --git a/langchain/src/vectorstores/tests/faiss.int.test.ts b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/faiss.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/faiss.int.test.ts index c136d5ed3dc1..cf3fc3e894a8 100644 --- a/langchain/src/vectorstores/tests/faiss.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/faiss.int.test.ts @@ -4,9 +4,9 @@ import * as path from "node:path"; import * as os from "node:os"; import { fileURLToPath } from "node:url"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { FaissStore } from "../faiss.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; test("Test FaissStore.fromTexts", async () => { const vectorStore = await FaissStore.fromTexts( diff --git a/langchain/src/vectorstores/tests/faiss.test.ts b/libs/langchain-community/src/vectorstores/tests/faiss.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/faiss.test.ts rename to libs/langchain-community/src/vectorstores/tests/faiss.test.ts index e2708f40d582..bcc0afc347fc 100644 --- a/langchain/src/vectorstores/tests/faiss.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/faiss.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { FaissStore } from "../faiss.js"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; test("Test FaissStore.fromTexts + addVectors", async () => { const vectorStore = await FaissStore.fromTexts( diff --git a/langchain/src/vectorstores/tests/googlevertexai.int.test.ts b/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts similarity index 96% rename from langchain/src/vectorstores/tests/googlevertexai.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts index 681379814149..7004c193069e 100644 --- a/langchain/src/vectorstores/tests/googlevertexai.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts @@ -1,16 +1,16 @@ /* eslint-disable no-process-env */ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { beforeAll, expect, test } from "@jest/globals"; -import { SyntheticEmbeddings } from "../../embeddings/fake.js"; +import { Document } from "@langchain/core/documents"; +import { SyntheticEmbeddings } from "../../utils/testing.js"; import { InMemoryDocstore } from "../../stores/doc/in_memory.js"; -import { Document } from "../../document.js"; import { MatchingEngineArgs, MatchingEngine, IdDocument, Restriction, } from "../googlevertexai.js"; -import { Embeddings } from "../../embeddings/base.js"; +import { Embeddings } from "@langchain/core/embeddings"; describe("Vertex AI matching", () => { let embeddings: Embeddings; diff --git a/langchain/src/vectorstores/tests/googlevertexai.test.ts b/libs/langchain-community/src/vectorstores/tests/googlevertexai.test.ts similarity index 95% rename from langchain/src/vectorstores/tests/googlevertexai.test.ts rename to libs/langchain-community/src/vectorstores/tests/googlevertexai.test.ts index fab9e981c070..07b2acbe2d17 100644 --- a/langchain/src/vectorstores/tests/googlevertexai.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/googlevertexai.test.ts @@ -1,10 +1,10 @@ /* eslint-disable no-process-env */ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { beforeEach, expect, test } from "@jest/globals"; -import { SyntheticEmbeddings } from "../../embeddings/fake.js"; +import { Embeddings } from "@langchain/core/embeddings"; +import { SyntheticEmbeddings } from "../../utils/testing.js"; import { InMemoryDocstore } from "../../stores/doc/in_memory.js"; import { MatchingEngineArgs, MatchingEngine } from "../googlevertexai.js"; -import { Embeddings } from "../../embeddings/base.js"; describe("Vertex AI matching", () => { let embeddings: Embeddings; diff --git a/langchain/src/vectorstores/tests/hnswlib.int.test.ts b/libs/langchain-community/src/vectorstores/tests/hnswlib.int.test.ts similarity index 96% rename from langchain/src/vectorstores/tests/hnswlib.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/hnswlib.int.test.ts index bc5ba7019fbe..79872e8cf474 100644 --- a/langchain/src/vectorstores/tests/hnswlib.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/hnswlib.int.test.ts @@ -3,9 +3,10 @@ import * as fs from "node:fs/promises"; import * as path from "node:path"; import * as os from "node:os"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; + import { HNSWLib } from "../hnswlib.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; test("Test HNSWLib.fromTexts", async () => { const vectorStore = await HNSWLib.fromTexts( diff --git a/langchain/src/vectorstores/tests/hnswlib.test.ts b/libs/langchain-community/src/vectorstores/tests/hnswlib.test.ts similarity index 93% rename from langchain/src/vectorstores/tests/hnswlib.test.ts rename to libs/langchain-community/src/vectorstores/tests/hnswlib.test.ts index b191c7bfd74d..dab197901c42 100644 --- a/langchain/src/vectorstores/tests/hnswlib.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/hnswlib.test.ts @@ -1,7 +1,8 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; + import { HNSWLib } from "../hnswlib.js"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; test("Test HNSWLib.fromTexts + addVectors", async () => { const vectorStore = await HNSWLib.fromTexts( diff --git a/langchain/src/vectorstores/tests/lancedb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts similarity index 91% rename from langchain/src/vectorstores/tests/lancedb.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts index 7ca3bdab9d0d..ec9bb2bb566e 100644 --- a/langchain/src/vectorstores/tests/lancedb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/lancedb.int.test.ts @@ -3,9 +3,11 @@ import * as fs from "node:fs/promises"; import * as path from "node:path"; import * as os from "node:os"; import { connect, Table } from "vectordb"; + +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; + import { LanceDB } from "../lancedb.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; describe("LanceDB", () => { let lanceDBTable: Table; diff --git a/langchain/src/vectorstores/tests/milvus.int.test.ts b/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts similarity index 99% rename from langchain/src/vectorstores/tests/milvus.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts index c7398e250526..bc328304ddbf 100644 --- a/langchain/src/vectorstores/tests/milvus.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/milvus.int.test.ts @@ -1,7 +1,7 @@ import { test, expect, afterAll, beforeAll } from "@jest/globals"; import { ErrorCode, MilvusClient } from "@zilliz/milvus2-sdk-node"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { Milvus } from "../milvus.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; let collectionName: string; let embeddings: OpenAIEmbeddings; diff --git a/langchain/src/vectorstores/tests/momento_vector_index.int.test.ts b/libs/langchain-community/src/vectorstores/tests/momento_vector_index.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/momento_vector_index.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/momento_vector_index.int.test.ts index 79e598111628..0af54f2f92d0 100644 --- a/langchain/src/vectorstores/tests/momento_vector_index.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/momento_vector_index.int.test.ts @@ -8,10 +8,11 @@ import { CredentialProvider, } from "@gomomento/sdk"; import * as uuid from "uuid"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; + +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { sleep } from "../../utils/time.js"; import { MomentoVectorIndex } from "../momento_vector_index.js"; -import { sleep } from "../../util/time.js"; async function withVectorStore( block: (vectorStore: MomentoVectorIndex) => Promise diff --git a/langchain/src/vectorstores/tests/mongodb_atlas.int.test.ts b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/mongodb_atlas.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts index 26b254fd495c..c27ae96c70f1 100755 --- a/langchain/src/vectorstores/tests/mongodb_atlas.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts @@ -6,8 +6,8 @@ import { MongoClient } from "mongodb"; import { setTimeout } from "timers/promises"; import { MongoDBAtlasVectorSearch } from "../mongodb_atlas.js"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; /** * The following json can be used to create an index in atlas for Cohere embeddings. diff --git a/langchain/src/vectorstores/tests/myscale.int.test.ts b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts similarity index 95% rename from langchain/src/vectorstores/tests/myscale.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts index 294e120581c7..4315ab47a215 100644 --- a/langchain/src/vectorstores/tests/myscale.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts @@ -2,8 +2,8 @@ import { test, expect } from "@jest/globals"; import { MyScaleStore } from "../myscale.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; test.skip("MyScaleStore.fromText", async () => { const vectorStore = await MyScaleStore.fromTexts( diff --git a/langchain/src/vectorstores/tests/neo4j_vector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts similarity index 99% rename from langchain/src/vectorstores/tests/neo4j_vector.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts index 5bdfbf02c9ce..7aed797d4cc3 100644 --- a/langchain/src/vectorstores/tests/neo4j_vector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/neo4j_vector.int.test.ts @@ -1,7 +1,7 @@ /* eslint-disable no-process-env */ -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { Neo4jVectorStore } from "../neo4j_vector.js"; -import { Document } from "../../document.js"; const OS_TOKEN_COUNT = 1536; diff --git a/langchain/src/vectorstores/tests/opensearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts similarity index 91% rename from langchain/src/vectorstores/tests/opensearch.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts index 5f04262928b8..f3a497dc5b9d 100644 --- a/langchain/src/vectorstores/tests/opensearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/opensearch.int.test.ts @@ -1,9 +1,9 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; import { Client } from "@opensearch-project/opensearch"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { OpenSearchVectorStore } from "../opensearch.js"; -import { Document } from "../../document.js"; test.skip("OpenSearchVectorStore integration", async () => { if (!process.env.OPENSEARCH_URL) { diff --git a/langchain/src/vectorstores/tests/pgvector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/pgvector.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/pgvector.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/pgvector.int.test.ts index f70a777a41db..5d3ae78bd0fc 100644 --- a/langchain/src/vectorstores/tests/pgvector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/pgvector.int.test.ts @@ -1,6 +1,6 @@ import { expect, test } from "@jest/globals"; import type { PoolConfig } from "pg"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { PGVectorStore } from "../pgvector.js"; describe("PGVectorStore", () => { diff --git a/langchain/src/vectorstores/tests/pinecone.int.test.ts b/libs/langchain-community/src/vectorstores/tests/pinecone.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/pinecone.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/pinecone.int.test.ts index f9c6894b15b5..744c6e73ae31 100644 --- a/langchain/src/vectorstores/tests/pinecone.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/pinecone.int.test.ts @@ -5,8 +5,8 @@ import { describe, expect, test } from "@jest/globals"; import { faker } from "@faker-js/faker"; import { Pinecone } from "@pinecone-database/pinecone"; import * as uuid from "uuid"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { PineconeLibArgs, PineconeStore } from "../pinecone.js"; describe("PineconeStore", () => { diff --git a/langchain/src/vectorstores/tests/pinecone.test.ts b/libs/langchain-community/src/vectorstores/tests/pinecone.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/pinecone.test.ts rename to libs/langchain-community/src/vectorstores/tests/pinecone.test.ts index 8e4dd19439c4..dbd50faf1ef1 100644 --- a/langchain/src/vectorstores/tests/pinecone.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/pinecone.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest, test, expect } from "@jest/globals"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { PineconeStore } from "../pinecone.js"; test("PineconeStore with external ids", async () => { diff --git a/langchain/src/vectorstores/tests/qdrant.int.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts similarity index 94% rename from langchain/src/vectorstores/tests/qdrant.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts index 5779d1acdda7..4b7c0b1208a2 100644 --- a/langchain/src/vectorstores/tests/qdrant.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/qdrant.int.test.ts @@ -3,8 +3,8 @@ import { describe, expect, test } from "@jest/globals"; import { QdrantClient } from "@qdrant/js-client-rest"; import { faker } from "@faker-js/faker"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { QdrantVectorStore } from "../qdrant.js"; import { OllamaEmbeddings } from "../../embeddings/ollama.js"; diff --git a/langchain/src/vectorstores/tests/qdrant.test.ts b/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts similarity index 93% rename from langchain/src/vectorstores/tests/qdrant.test.ts rename to libs/langchain-community/src/vectorstores/tests/qdrant.test.ts index 38d5a8c1c85e..7e3aaf0eab2b 100644 --- a/langchain/src/vectorstores/tests/qdrant.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/qdrant.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest, test, expect } from "@jest/globals"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { QdrantVectorStore } from "../qdrant.js"; diff --git a/langchain/src/vectorstores/tests/redis.int.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts similarity index 95% rename from langchain/src/vectorstores/tests/redis.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/redis.int.test.ts index 92249db7d50a..f44f52b8d273 100644 --- a/langchain/src/vectorstores/tests/redis.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/redis.int.test.ts @@ -5,9 +5,9 @@ import { RedisClientType, createClient } from "redis"; import { v4 as uuidv4 } from "uuid"; import { test, expect } from "@jest/globals"; import { faker } from "@faker-js/faker"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { RedisVectorStore } from "../redis.js"; -import { Document } from "../../document.js"; describe("RedisVectorStore", () => { let vectorStore: RedisVectorStore; diff --git a/langchain/src/vectorstores/tests/redis.test.ts b/libs/langchain-community/src/vectorstores/tests/redis.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/redis.test.ts rename to libs/langchain-community/src/vectorstores/tests/redis.test.ts index 550c4aff7ccb..681e63c67685 100644 --- a/langchain/src/vectorstores/tests/redis.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/redis.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest, test, expect, describe } from "@jest/globals"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { RedisVectorStore } from "../redis.js"; diff --git a/langchain/src/vectorstores/tests/rockset.int.test.ts b/libs/langchain-community/src/vectorstores/tests/rockset.int.test.ts similarity index 92% rename from langchain/src/vectorstores/tests/rockset.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/rockset.int.test.ts index 88d93bac8283..6891aa921a8d 100644 --- a/langchain/src/vectorstores/tests/rockset.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/rockset.int.test.ts @@ -2,12 +2,12 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import rockset from "@rockset/client"; import { test, expect } from "@jest/globals"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { RocksetStore, SimilarityMetric } from "../rockset.js"; -import { Document } from "../../document.js"; -import { formatDocumentsAsString } from "../../util/document.js"; -const getPageContents = formatDocumentsAsString; +const getPageContents = (documents: Document[]) => + documents.map((document) => document.pageContent); const embeddings = new OpenAIEmbeddings(); let store: RocksetStore | undefined; diff --git a/langchain/src/vectorstores/tests/singlestore.int.test.ts b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/singlestore.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts index c07f3e6d38dd..aacfd5dd71b4 100644 --- a/langchain/src/vectorstores/tests/singlestore.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/singlestore.int.test.ts @@ -1,9 +1,9 @@ /* eslint-disable no-process-env */ /* eslint-disable import/no-extraneous-dependencies */ import { test, expect } from "@jest/globals"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { SingleStoreVectorStore } from "../singlestore.js"; -import { Document } from "../../document.js"; test.skip("SingleStoreVectorStore", async () => { expect(process.env.SINGLESTORE_HOST).toBeDefined(); diff --git a/langchain/src/vectorstores/tests/supabase.int.test.ts b/libs/langchain-community/src/vectorstores/tests/supabase.int.test.ts similarity index 99% rename from langchain/src/vectorstores/tests/supabase.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/supabase.int.test.ts index 04739a143767..cc785d39a06b 100644 --- a/langchain/src/vectorstores/tests/supabase.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/supabase.int.test.ts @@ -3,8 +3,8 @@ import { test, expect } from "@jest/globals"; import { createClient } from "@supabase/supabase-js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { SupabaseVectorStore, SupabaseFilterRPCCall } from "../supabase.js"; test("SupabaseVectorStore with external ids", async () => { diff --git a/langchain/src/vectorstores/tests/supabase.test.ts b/libs/langchain-community/src/vectorstores/tests/supabase.test.ts similarity index 96% rename from langchain/src/vectorstores/tests/supabase.test.ts rename to libs/langchain-community/src/vectorstores/tests/supabase.test.ts index 76ed734f1f66..0e82073da3d0 100644 --- a/langchain/src/vectorstores/tests/supabase.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/supabase.test.ts @@ -3,7 +3,7 @@ import { SupabaseClient } from "@supabase/supabase-js"; import { SupabaseVectorStore } from "../supabase.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; test("similaritySearchVectorWithScore should call RPC with the vectorstore filters", async () => { const supabaseClientMock = { diff --git a/langchain/src/vectorstores/tests/tigris.test.ts b/libs/langchain-community/src/vectorstores/tests/tigris.test.ts similarity index 96% rename from langchain/src/vectorstores/tests/tigris.test.ts rename to libs/langchain-community/src/vectorstores/tests/tigris.test.ts index 0e722572e7dc..f838ff10fdc0 100644 --- a/langchain/src/vectorstores/tests/tigris.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/tigris.test.ts @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { jest, test, expect } from "@jest/globals"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { TigrisVectorStore } from "../tigris.js"; diff --git a/langchain/src/vectorstores/tests/typeorm.int.test.ts b/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts similarity index 95% rename from langchain/src/vectorstores/tests/typeorm.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts index be1068437883..1a59fde00409 100644 --- a/langchain/src/vectorstores/tests/typeorm.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/typeorm.int.test.ts @@ -1,6 +1,6 @@ import { expect, test } from "@jest/globals"; import { DataSourceOptions } from "typeorm"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { TypeORMVectorStore } from "../typeorm.js"; test.skip("Test embeddings creation", async () => { diff --git a/langchain/src/vectorstores/tests/typesense.test.ts b/libs/langchain-community/src/vectorstores/tests/typesense.test.ts similarity index 96% rename from langchain/src/vectorstores/tests/typesense.test.ts rename to libs/langchain-community/src/vectorstores/tests/typesense.test.ts index 3ae95690bde0..08c872528001 100644 --- a/langchain/src/vectorstores/tests/typesense.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/typesense.test.ts @@ -1,6 +1,6 @@ import { Client } from "typesense"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { Typesense } from "../typesense.js"; test("documentsToTypesenseRecords should return the correct typesense records", async () => { diff --git a/langchain/src/vectorstores/tests/usearch.int.test.ts b/libs/langchain-community/src/vectorstores/tests/usearch.int.test.ts similarity index 94% rename from langchain/src/vectorstores/tests/usearch.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/usearch.int.test.ts index 08fe7836a51f..88ecf2e3a955 100644 --- a/langchain/src/vectorstores/tests/usearch.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/usearch.int.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { USearch } from "../usearch.js"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; test("Test USearch.fromTexts + addVectors", async () => { const vectorStore = await USearch.fromTexts( diff --git a/langchain/src/vectorstores/tests/vectara.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/vectara.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts index c29ac59a8de6..e8bde2c1bbf5 100644 --- a/langchain/src/vectorstores/tests/vectara.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/vectara.int.test.ts @@ -3,8 +3,8 @@ import fs from "fs"; import { expect, beforeAll } from "@jest/globals"; import { insecureHash } from "@langchain/core/utils/hash"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; -import { Document } from "../../document.js"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { VectaraFile, VectaraLibArgs, VectaraStore } from "../vectara.js"; const getDocs = (): Document[] => { diff --git a/langchain/src/vectorstores/tests/vercel_postgres.int.test.ts b/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/vercel_postgres.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts index 16ce499cd24d..a7a8a43230e4 100644 --- a/langchain/src/vectorstores/tests/vercel_postgres.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/vercel_postgres.int.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "@jest/globals"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; import { VercelPostgres } from "../vercel_postgres.js"; let vercelPostgresStore: VercelPostgres; diff --git a/langchain/src/vectorstores/tests/voy.int.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts similarity index 92% rename from langchain/src/vectorstores/tests/voy.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/voy.int.test.ts index 5c4abfb357af..d8d221100aa3 100644 --- a/langchain/src/vectorstores/tests/voy.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/voy.int.test.ts @@ -1,7 +1,7 @@ import { expect, test } from "@jest/globals"; import { Voy as VoyOriginClient } from "voy-search"; -import { Document } from "../../document.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { VoyVectorStore } from "../voy.js"; const client = new VoyOriginClient(); diff --git a/langchain/src/vectorstores/tests/voy.test.ts b/libs/langchain-community/src/vectorstores/tests/voy.test.ts similarity index 92% rename from langchain/src/vectorstores/tests/voy.test.ts rename to libs/langchain-community/src/vectorstores/tests/voy.test.ts index 5f05a789a2cf..4d8cfcb473a5 100644 --- a/langchain/src/vectorstores/tests/voy.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/voy.test.ts @@ -1,6 +1,6 @@ import { test, expect } from "@jest/globals"; -import { Document } from "../../document.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { Document } from "@langchain/core/documents"; +import { FakeEmbeddings } from "../../utils/testing.js"; import { VoyVectorStore, VoyClient } from "../voy.js"; const fakeClient: VoyClient = { diff --git a/langchain/src/vectorstores/tests/weaviate.int.test.ts b/libs/langchain-community/src/vectorstores/tests/weaviate.int.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/weaviate.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/weaviate.int.test.ts index c12e4e53129c..945bcc7dadbd 100644 --- a/langchain/src/vectorstores/tests/weaviate.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/weaviate.int.test.ts @@ -1,9 +1,9 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; import weaviate from "weaviate-ts-client"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; import { WeaviateStore } from "../weaviate.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; test("WeaviateStore", async () => { // Something wrong with the weaviate-ts-client types, so we need to disable diff --git a/langchain/src/vectorstores/tests/weaviate.test.ts b/libs/langchain-community/src/vectorstores/tests/weaviate.test.ts similarity index 100% rename from langchain/src/vectorstores/tests/weaviate.test.ts rename to libs/langchain-community/src/vectorstores/tests/weaviate.test.ts diff --git a/langchain/src/vectorstores/tests/xata.int.test.ts b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts similarity index 97% rename from langchain/src/vectorstores/tests/xata.int.test.ts rename to libs/langchain-community/src/vectorstores/tests/xata.int.test.ts index 977754d0189c..572f61b83c7b 100644 --- a/langchain/src/vectorstores/tests/xata.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts @@ -3,8 +3,8 @@ import { BaseClient } from "@xata.io/client"; import { XataVectorSearch } from "../xata.js"; -import { OpenAIEmbeddings } from "../../embeddings/openai.js"; -import { Document } from "../../document.js"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; // Tests require a DB with a table called "docs" with: // * a column name content of type Text diff --git a/langchain/src/vectorstores/tests/zep.test.ts b/libs/langchain-community/src/vectorstores/tests/zep.test.ts similarity index 98% rename from langchain/src/vectorstores/tests/zep.test.ts rename to libs/langchain-community/src/vectorstores/tests/zep.test.ts index 5468cc9dab9c..1b6109d64fa6 100644 --- a/langchain/src/vectorstores/tests/zep.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/zep.test.ts @@ -8,10 +8,10 @@ import { NotFoundError, ZepClient, } from "@getzep/zep-js"; -import { Document } from "../../document.js"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; import { IZepConfig, ZepVectorStore } from "../zep.js"; -import { Embeddings } from "../../embeddings/base.js"; -import { FakeEmbeddings } from "../../embeddings/fake.js"; +import { FakeEmbeddings } from "../../utils/testing.js"; jest.mock("@getzep/zep-js"); diff --git a/libs/langchain-community/src/vectorstores/usearch.ts b/libs/langchain-community/src/vectorstores/usearch.ts new file mode 100644 index 000000000000..69711f75e877 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/usearch.ts @@ -0,0 +1,223 @@ +import usearch from "usearch"; +import * as uuid from "uuid"; +import { Embeddings } from "@langchain/core/embeddings"; +import { SaveableVectorStore } from "@langchain/core/vectorstores"; +import { Document } from "@langchain/core/documents"; +import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js"; + +/** + * Interface that defines the arguments that can be passed to the + * `USearch` constructor. It includes optional properties for a + * `docstore`, `index`, and `mapping`. + */ +export interface USearchArgs { + docstore?: SynchronousInMemoryDocstore; + index?: usearch.Index; + mapping?: Record; +} + +/** + * Class that extends `SaveableVectorStore` and provides methods for + * adding documents and vectors to a `usearch` index, performing + * similarity searches, and saving the index. + */ +export class USearch extends SaveableVectorStore { + _index?: usearch.Index; + + _mapping: Record; + + docstore: SynchronousInMemoryDocstore; + + args: USearchArgs; + + _vectorstoreType(): string { + return "usearch"; + } + + constructor(embeddings: Embeddings, args: USearchArgs) { + super(embeddings, args); + this.args = args; + this._index = args.index; + this._mapping = args.mapping ?? {}; + this.embeddings = embeddings; + this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore(); + } + + /** + * Method that adds documents to the `usearch` index. It generates + * embeddings for the documents and adds them to the index. + * @param documents An array of `Document` instances to be added to the index. + * @returns A promise that resolves with an array of document IDs. + */ + async addDocuments(documents: Document[]) { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + public get index(): usearch.Index { + if (!this._index) { + throw new Error( + "Vector store not initialised yet. Try calling `fromTexts` or `fromDocuments` first." + ); + } + return this._index; + } + + private set index(index: usearch.Index) { + this._index = index; + } + + /** + * Method that adds vectors to the `usearch` index. It also updates the + * mapping between vector IDs and document IDs. + * @param vectors An array of vectors to be added to the index. + * @param documents An array of `Document` instances corresponding to the vectors. + * @returns A promise that resolves with an array of document IDs. + */ + async addVectors(vectors: number[][], documents: Document[]) { + if (vectors.length === 0) { + return []; + } + if (vectors.length !== documents.length) { + throw new Error(`Vectors and documents must have the same length`); + } + const dv = vectors[0].length; + if (!this._index) { + this._index = new usearch.Index({ + metric: "l2sq", + connectivity: BigInt(16), + dimensions: BigInt(dv), + }); + } + const d = this.index.dimensions(); + if (BigInt(dv) !== d) { + throw new Error( + `Vectors must have the same length as the number of dimensions (${d})` + ); + } + + const docstoreSize = this.index.size(); + const documentIds = []; + for (let i = 0; i < vectors.length; i += 1) { + const documentId = uuid.v4(); + documentIds.push(documentId); + const id = Number(docstoreSize) + i; + this.index.add(BigInt(id), new Float32Array(vectors[i])); + this._mapping[id] = documentId; + this.docstore.add({ [documentId]: documents[i] }); + } + return documentIds; + } + + /** + * Method that performs a similarity search in the `usearch` index. It + * returns the `k` most similar documents to a given query vector, along + * with their similarity scores. + * @param query The query vector. + * @param k The number of most similar documents to return. + * @returns A promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore(query: number[], k: number) { + const d = this.index.dimensions(); + if (BigInt(query.length) !== d) { + throw new Error( + `Query vector must have the same length as the number of dimensions (${d})` + ); + } + if (k > this.index.size()) { + const total = this.index.size(); + console.warn( + `k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}` + ); + // eslint-disable-next-line no-param-reassign + k = Number(total); + } + const result = this.index.search(new Float32Array(query), BigInt(k)); + + const return_list: [Document, number][] = []; + for (let i = 0; i < result.count; i += 1) { + const uuid = this._mapping[Number(result.keys[i])]; + return_list.push([this.docstore.search(uuid), result.distances[i]]); + } + + return return_list; + } + + /** + * Method that saves the `usearch` index and the document store to disk. + * @param directory The directory where the index and document store should be saved. + * @returns A promise that resolves when the save operation is complete. + */ + async save(directory: string) { + const fs = await import("node:fs/promises"); + const path = await import("node:path"); + await fs.mkdir(directory, { recursive: true }); + await Promise.all([ + this.index.save(path.join(directory, "usearch.index")), + await fs.writeFile( + path.join(directory, "docstore.json"), + JSON.stringify([ + Array.from(this.docstore._docs.entries()), + this._mapping, + ]) + ), + ]); + } + + /** + * Static method that creates a new `USearch` instance from a list of + * texts. It generates embeddings for the texts and adds them to the + * `usearch` index. + * @param texts An array of texts to be added to the index. + * @param metadatas Metadata associated with the texts. + * @param embeddings An instance of `Embeddings` used to generate embeddings for the texts. + * @param dbConfig Optional configuration for the document store. + * @returns A promise that resolves with a new `USearch` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return this.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method that creates a new `USearch` instance from a list of + * documents. It generates embeddings for the documents and adds them to + * the `usearch` index. + * @param docs An array of `Document` instances to be added to the index. + * @param embeddings An instance of `Embeddings` used to generate embeddings for the documents. + * @param dbConfig Optional configuration for the document store. + * @returns A promise that resolves with a new `USearch` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: { + docstore?: SynchronousInMemoryDocstore; + } + ): Promise { + const args: USearchArgs = { + docstore: dbConfig?.docstore, + }; + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } +} diff --git a/libs/langchain-community/src/vectorstores/weaviate.ts b/libs/langchain-community/src/vectorstores/weaviate.ts new file mode 100644 index 000000000000..cf45fcdda489 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/weaviate.ts @@ -0,0 +1,435 @@ +import * as uuid from "uuid"; +import type { + WeaviateClient, + WeaviateObject, + WhereFilter, +} from "weaviate-ts-client"; +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; + +// Note this function is not generic, it is designed specifically for Weaviate +// https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction +export const flattenObjectForWeaviate = ( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + obj: Record +) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const flattenedObject: Record = {}; + + for (const key in obj) { + if (!Object.hasOwn(obj, key)) { + continue; + } + const value = obj[key]; + if (typeof obj[key] === "object" && !Array.isArray(value)) { + const recursiveResult = flattenObjectForWeaviate(value); + + for (const deepKey in recursiveResult) { + if (Object.hasOwn(obj, key)) { + flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey]; + } + } + } else if (Array.isArray(value)) { + if ( + value.length > 0 && + typeof value[0] !== "object" && + // eslint-disable-next-line @typescript-eslint/no-explicit-any + value.every((el: any) => typeof el === typeof value[0]) + ) { + // Weaviate only supports arrays of primitive types, + // where all elements are of the same type + flattenedObject[key] = value; + } + } else { + flattenedObject[key] = value; + } + } + + return flattenedObject; +}; + +/** + * Interface that defines the arguments required to create a new instance + * of the `WeaviateStore` class. It includes the Weaviate client, the name + * of the class in Weaviate, and optional keys for text and metadata. + */ +export interface WeaviateLibArgs { + client: WeaviateClient; + /** + * The name of the class in Weaviate. Must start with a capital letter. + */ + indexName: string; + textKey?: string; + metadataKeys?: string[]; + tenant?: string; +} + +interface ResultRow { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [key: string]: any; +} + +/** + * Interface that defines a filter for querying data from Weaviate. It + * includes a distance and a `WhereFilter`. + */ +export interface WeaviateFilter { + distance?: number; + where: WhereFilter; +} + +/** + * Class that extends the `VectorStore` base class. It provides methods to + * interact with a Weaviate index, including adding vectors and documents, + * deleting data, and performing similarity searches. + */ +export class WeaviateStore extends VectorStore { + declare FilterType: WeaviateFilter; + + private client: WeaviateClient; + + private indexName: string; + + private textKey: string; + + private queryAttrs: string[]; + + private tenant?: string; + + _vectorstoreType(): string { + return "weaviate"; + } + + constructor(public embeddings: Embeddings, args: WeaviateLibArgs) { + super(embeddings, args); + + this.client = args.client; + this.indexName = args.indexName; + this.textKey = args.textKey || "text"; + this.queryAttrs = [this.textKey]; + this.tenant = args.tenant; + + if (args.metadataKeys) { + this.queryAttrs = [ + ...new Set([ + ...this.queryAttrs, + ...args.metadataKeys.filter((k) => { + // https://spec.graphql.org/June2018/#sec-Names + // queryAttrs need to be valid GraphQL Names + const keyIsValid = /^[_A-Za-z][_0-9A-Za-z]*$/.test(k); + if (!keyIsValid) { + console.warn( + `Skipping metadata key ${k} as it is not a valid GraphQL Name` + ); + } + return keyIsValid; + }), + ]), + ]; + } + } + + /** + * Method to add vectors and corresponding documents to the Weaviate + * index. + * @param vectors Array of vectors to be added. + * @param documents Array of documents corresponding to the vectors. + * @param options Optional parameter that can include specific IDs for the documents. + * @returns An array of document IDs. + */ + async addVectors( + vectors: number[][], + documents: Document[], + options?: { ids?: string[] } + ) { + const documentIds = options?.ids ?? documents.map((_) => uuid.v4()); + const batch: WeaviateObject[] = documents.map((document, index) => { + if (Object.hasOwn(document.metadata, "id")) + throw new Error( + "Document inserted to Weaviate vectorstore should not have `id` in their metadata." + ); + + const flattenedMetadata = flattenObjectForWeaviate(document.metadata); + return { + ...(this.tenant ? { tenant: this.tenant } : {}), + class: this.indexName, + id: documentIds[index], + vector: vectors[index], + properties: { + [this.textKey]: document.pageContent, + ...flattenedMetadata, + }, + }; + }); + + try { + const responses = await this.client.batch + .objectsBatcher() + .withObjects(...batch) + .do(); + // if storing vectors fails, we need to know why + const errorMessages: string[] = []; + responses.forEach((response) => { + if (response?.result?.errors?.error) { + errorMessages.push( + ...response.result.errors.error.map( + (err) => + err.message ?? + "!! Unfortunately no error message was presented in the API response !!" + ) + ); + } + }); + if (errorMessages.length > 0) { + throw new Error(errorMessages.join("\n")); + } + } catch (e) { + throw Error(`Error adding vectors: ${e}`); + } + return documentIds; + } + + /** + * Method to add documents to the Weaviate index. It first generates + * vectors for the documents using the embeddings, then adds the vectors + * and documents to the index. + * @param documents Array of documents to be added. + * @param options Optional parameter that can include specific IDs for the documents. + * @returns An array of document IDs. + */ + async addDocuments(documents: Document[], options?: { ids?: string[] }) { + return this.addVectors( + await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), + documents, + options + ); + } + + /** + * Method to delete data from the Weaviate index. It can delete data based + * on specific IDs or a filter. + * @param params Object that includes either an array of IDs or a filter for the data to be deleted. + * @returns Promise that resolves when the deletion is complete. + */ + async delete(params: { + ids?: string[]; + filter?: WeaviateFilter; + }): Promise { + const { ids, filter } = params; + + if (ids && ids.length > 0) { + for (const id of ids) { + let deleter = this.client.data + .deleter() + .withClassName(this.indexName) + .withId(id); + + if (this.tenant) { + deleter = deleter.withTenant(this.tenant); + } + + await deleter.do(); + } + } else if (filter) { + let batchDeleter = this.client.batch + .objectsBatchDeleter() + .withClassName(this.indexName) + .withWhere(filter.where); + + if (this.tenant) { + batchDeleter = batchDeleter.withTenant(this.tenant); + } + + await batchDeleter.do(); + } else { + throw new Error( + `This method requires either "ids" or "filter" to be set in the input object` + ); + } + } + + /** + * Method to perform a similarity search on the stored vectors in the + * Weaviate index. It returns the top k most similar documents and their + * similarity scores. + * @param query The query vector. + * @param k The number of most similar documents to return. + * @param filter Optional filter to apply to the search. + * @returns An array of tuples, where each tuple contains a document and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: WeaviateFilter + ): Promise<[Document, number][]> { + const resultsWithEmbedding = + await this.similaritySearchVectorWithScoreAndEmbedding(query, k, filter); + return resultsWithEmbedding.map(([document, score, _embedding]) => [ + document, + score, + ]); + } + + /** + * Method to perform a similarity search on the stored vectors in the + * Weaviate index. It returns the top k most similar documents, their + * similarity scores and embedding vectors. + * @param query The query vector. + * @param k The number of most similar documents to return. + * @param filter Optional filter to apply to the search. + * @returns An array of tuples, where each tuple contains a document, its similarity score and its embedding vector. + */ + async similaritySearchVectorWithScoreAndEmbedding( + query: number[], + k: number, + filter?: WeaviateFilter + ): Promise<[Document, number, number[]][]> { + try { + let builder = this.client.graphql + .get() + .withClassName(this.indexName) + .withFields( + `${this.queryAttrs.join(" ")} _additional { distance vector }` + ) + .withNearVector({ + vector: query, + distance: filter?.distance, + }) + .withLimit(k); + + if (this.tenant) { + builder = builder.withTenant(this.tenant); + } + + if (filter?.where) { + builder = builder.withWhere(filter.where); + } + + const result = await builder.do(); + + const documents: [Document, number, number[]][] = []; + for (const data of result.data.Get[this.indexName]) { + const { [this.textKey]: text, _additional, ...rest }: ResultRow = data; + + documents.push([ + new Document({ + pageContent: text, + metadata: rest, + }), + _additional.distance, + _additional.vector, + ]); + } + return documents; + } catch (e) { + throw Error(`'Error in similaritySearch' ${e}`); + } + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {this["FilterType"]} options.filter - Optional filter + * @param _callbacks + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + override async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions, + _callbacks?: undefined + ): Promise { + const { k, fetchK = 20, lambda = 0.5, filter } = options; + const queryEmbedding: number[] = await this.embeddings.embedQuery(query); + const allResults: [Document, number, number[]][] = + await this.similaritySearchVectorWithScoreAndEmbedding( + queryEmbedding, + fetchK, + filter + ); + const embeddingList = allResults.map( + ([_doc, _score, embedding]) => embedding + ); + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + lambda, + k + ); + return mmrIndexes + .filter((idx) => idx !== -1) + .map((idx) => allResults[idx][0]); + } + + /** + * Static method to create a new `WeaviateStore` instance from a list of + * texts. It first creates documents from the texts and metadata, then + * adds the documents to the Weaviate index. + * @param texts Array of texts. + * @param metadatas Metadata for the texts. Can be a single object or an array of objects. + * @param embeddings Embeddings to be used for the texts. + * @param args Arguments required to create a new `WeaviateStore` instance. + * @returns A new `WeaviateStore` instance. + */ + static fromTexts( + texts: string[], + metadatas: object | object[], + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return WeaviateStore.fromDocuments(docs, embeddings, args); + } + + /** + * Static method to create a new `WeaviateStore` instance from a list of + * documents. It adds the documents to the Weaviate index. + * @param docs Array of documents. + * @param embeddings Embeddings to be used for the documents. + * @param args Arguments required to create a new `WeaviateStore` instance. + * @returns A new `WeaviateStore` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + const instance = new this(embeddings, args); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create a new `WeaviateStore` instance from an existing + * Weaviate index. + * @param embeddings Embeddings to be used for the Weaviate index. + * @param args Arguments required to create a new `WeaviateStore` instance. + * @returns A new `WeaviateStore` instance. + */ + static async fromExistingIndex( + embeddings: Embeddings, + args: WeaviateLibArgs + ): Promise { + return new this(embeddings, args); + } +} diff --git a/libs/langchain-community/src/vectorstores/zep.ts b/libs/langchain-community/src/vectorstores/zep.ts new file mode 100644 index 000000000000..0606c3437495 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/zep.ts @@ -0,0 +1,427 @@ +import { + DocumentCollection, + IDocument, + NotFoundError, + ZepClient, +} from "@getzep/zep-js"; + +import { + MaxMarginalRelevanceSearchOptions, + VectorStore, +} from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; +import { Callbacks } from "@langchain/core/callbacks/manager"; +import { maximalMarginalRelevance } from "@langchain/core/utils/math"; +import { FakeEmbeddings } from "../utils/testing.js"; + +/** + * Interface for the arguments required to initialize a ZepVectorStore + * instance. + */ +export interface IZepArgs { + collection: DocumentCollection; +} + +/** + * Interface for the configuration options for a ZepVectorStore instance. + */ +export interface IZepConfig { + apiUrl: string; + apiKey?: string; + collectionName: string; + description?: string; + metadata?: Record; + embeddingDimensions?: number; + isAutoEmbedded?: boolean; +} + +/** + * Interface for the parameters required to delete documents from a + * ZepVectorStore instance. + */ +export interface IZepDeleteParams { + uuids: string[]; +} + +/** + * ZepVectorStore is a VectorStore implementation that uses the Zep long-term memory store as a backend. + * + * If the collection does not exist, it will be created automatically. + * + * Requires `zep-js` to be installed: + * ```bash + * npm install @getzep/zep-js + * ``` + * + * @property {ZepClient} client - The ZepClient instance used to interact with Zep's API. + * @property {Promise} initPromise - A promise that resolves when the collection is initialized. + * @property {DocumentCollection} collection - The Zep document collection. + */ +export class ZepVectorStore extends VectorStore { + public client: ZepClient; + + public collection: DocumentCollection; + + private initPromise: Promise; + + private autoEmbed = false; + + constructor(embeddings: Embeddings, args: IZepConfig) { + super(embeddings, args); + + this.embeddings = embeddings; + + // eslint-disable-next-line no-instanceof/no-instanceof + if (this.embeddings instanceof FakeEmbeddings) { + this.autoEmbed = true; + } + + this.initPromise = this.initCollection(args).catch((err) => { + console.error("Error initializing collection:", err); + throw err; + }); + } + + /** + * Initializes the document collection. If the collection does not exist, it creates a new one. + * + * @param {IZepConfig} args - The configuration object for the Zep API. + */ + private async initCollection(args: IZepConfig) { + this.client = await ZepClient.init(args.apiUrl, args.apiKey); + try { + this.collection = await this.client.document.getCollection( + args.collectionName + ); + + // If the Embedding passed in is fake, but the collection is not auto embedded, throw an error + // eslint-disable-next-line no-instanceof/no-instanceof + if (!this.collection.is_auto_embedded && this.autoEmbed) { + throw new Error(`You can't pass in FakeEmbeddings when collection ${args.collectionName} + is not set to auto-embed.`); + } + } catch (err) { + // eslint-disable-next-line no-instanceof/no-instanceof + if (err instanceof Error) { + // eslint-disable-next-line no-instanceof/no-instanceof + if (err instanceof NotFoundError || err.name === "NotFoundError") { + await this.createCollection(args); + } else { + throw err; + } + } + } + } + + /** + * Creates a new document collection. + * + * @param {IZepConfig} args - The configuration object for the Zep API. + */ + private async createCollection(args: IZepConfig) { + if (!args.embeddingDimensions) { + throw new Error(`Collection ${args.collectionName} not found. + You can create a new Collection by providing embeddingDimensions.`); + } + + this.collection = await this.client.document.addCollection({ + name: args.collectionName, + description: args.description, + metadata: args.metadata, + embeddingDimensions: args.embeddingDimensions, + isAutoEmbedded: this.autoEmbed, + }); + + console.info("Created new collection:", args.collectionName); + } + + /** + * Adds vectors and corresponding documents to the collection. + * + * @param {number[][]} vectors - The vectors to add. + * @param {Document[]} documents - The corresponding documents to add. + * @returns {Promise} - A promise that resolves with the UUIDs of the added documents. + */ + async addVectors( + vectors: number[][], + documents: Document[] + ): Promise { + if (!this.autoEmbed && vectors.length === 0) { + throw new Error(`Vectors must be provided if autoEmbed is false`); + } + if (!this.autoEmbed && vectors.length !== documents.length) { + throw new Error(`Vectors and documents must have the same length`); + } + + const docs: Array = []; + for (let i = 0; i < documents.length; i += 1) { + const doc: IDocument = { + content: documents[i].pageContent, + metadata: documents[i].metadata, + embedding: vectors.length > 0 ? vectors[i] : undefined, + }; + docs.push(doc); + } + // Wait for collection to be initialized + await this.initPromise; + return await this.collection.addDocuments(docs); + } + + /** + * Adds documents to the collection. The documents are first embedded into vectors + * using the provided embedding model. + * + * @param {Document[]} documents - The documents to add. + * @returns {Promise} - A promise that resolves with the UUIDs of the added documents. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + let vectors: number[][] = []; + if (!this.autoEmbed) { + vectors = await this.embeddings.embedDocuments(texts); + } + return this.addVectors(vectors, documents); + } + + _vectorstoreType(): string { + return "zep"; + } + + /** + * Deletes documents from the collection. + * + * @param {IZepDeleteParams} params - The list of Zep document UUIDs to delete. + * @returns {Promise} + */ + async delete(params: IZepDeleteParams): Promise { + // Wait for collection to be initialized + await this.initPromise; + for (const uuid of params.uuids) { + await this.collection.deleteDocument(uuid); + } + } + + /** + * Performs a similarity search in the collection and returns the results with their scores. + * + * @param {number[]} query - The query vector. + * @param {number} k - The number of results to return. + * @param {Record} filter - The filter to apply to the search. Zep only supports Record as filter. + * @returns {Promise<[Document, number][]>} - A promise that resolves with the search results and their scores. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: Record | undefined + ): Promise<[Document, number][]> { + await this.initPromise; + const results = await this.collection.search( + { + embedding: new Float32Array(query), + metadata: assignMetadata(filter), + }, + k + ); + return zepDocsToDocumentsAndScore(results); + } + + async _similaritySearchWithScore( + query: string, + k: number, + filter?: Record | undefined + ): Promise<[Document, number][]> { + await this.initPromise; + const results = await this.collection.search( + { + text: query, + metadata: assignMetadata(filter), + }, + k + ); + return zepDocsToDocumentsAndScore(results); + } + + async similaritySearchWithScore( + query: string, + k = 4, + filter: Record | undefined = undefined, + _callbacks = undefined // implement passing to embedQuery later + ): Promise<[Document, number][]> { + if (this.autoEmbed) { + return this._similaritySearchWithScore(query, k, filter); + } else { + return this.similaritySearchVectorWithScore( + await this.embeddings.embedQuery(query), + k, + filter + ); + } + } + + /** + * Performs a similarity search on the Zep collection. + * + * @param {string} query - The query string to search for. + * @param {number} [k=4] - The number of results to return. Defaults to 4. + * @param {this["FilterType"] | undefined} [filter=undefined] - An optional set of JSONPath filters to apply to the search. + * @param {Callbacks | undefined} [_callbacks=undefined] - Optional callbacks. Currently not implemented. + * @returns {Promise} - A promise that resolves to an array of Documents that are similar to the query. + * + * @async + */ + async similaritySearch( + query: string, + k = 4, + filter: this["FilterType"] | undefined = undefined, + _callbacks: Callbacks | undefined = undefined // implement passing to embedQuery later + ): Promise { + await this.initPromise; + + let results: [Document, number][]; + if (this.autoEmbed) { + const zepResults = await this.collection.search( + { text: query, metadata: assignMetadata(filter) }, + k + ); + results = zepDocsToDocumentsAndScore(zepResults); + } else { + results = await this.similaritySearchVectorWithScore( + await this.embeddings.embedQuery(query), + k, + assignMetadata(filter) + ); + } + + return results.map((result) => result[0]); + } + + /** + * Return documents selected using the maximal marginal relevance. + * Maximal marginal relevance optimizes for similarity to the query AND diversity + * among selected documents. + * + * @param {string} query - Text to look up documents similar to. + * @param options + * @param {number} options.k - Number of documents to return. + * @param {number} options.fetchK=20- Number of documents to fetch before passing to the MMR algorithm. + * @param {number} options.lambda=0.5 - Number between 0 and 1 that determines the degree of diversity among the results, + * where 0 corresponds to maximum diversity and 1 to minimum diversity. + * @param {Record} options.filter - Optional Zep JSONPath query to pre-filter on document metadata field + * + * @returns {Promise} - List of documents selected by maximal marginal relevance. + */ + async maxMarginalRelevanceSearch( + query: string, + options: MaxMarginalRelevanceSearchOptions + ): Promise { + const { k, fetchK = 20, lambda = 0.5, filter } = options; + + let queryEmbedding: number[]; + let zepResults: IDocument[]; + if (!this.autoEmbed) { + queryEmbedding = await this.embeddings.embedQuery(query); + zepResults = await this.collection.search( + { + embedding: new Float32Array(queryEmbedding), + metadata: assignMetadata(filter), + }, + fetchK + ); + } else { + let queryEmbeddingArray: Float32Array; + [zepResults, queryEmbeddingArray] = + await this.collection.searchReturnQueryVector( + { text: query, metadata: assignMetadata(filter) }, + fetchK + ); + queryEmbedding = Array.from(queryEmbeddingArray); + } + + const results = zepDocsToDocumentsAndScore(zepResults); + + const embeddingList = zepResults.map((doc) => + Array.from(doc.embedding ? doc.embedding : []) + ); + + const mmrIndexes = maximalMarginalRelevance( + queryEmbedding, + embeddingList, + lambda, + k + ); + + return mmrIndexes.filter((idx) => idx !== -1).map((idx) => results[idx][0]); + } + + /** + * Creates a new ZepVectorStore instance from an array of texts. Each text is converted into a Document and added to the collection. + * + * @param {string[]} texts - The texts to convert into Documents. + * @param {object[] | object} metadatas - The metadata to associate with each Document. If an array is provided, each element is associated with the corresponding Document. If an object is provided, it is associated with all Documents. + * @param {Embeddings} embeddings - The embeddings to use for vectorizing the texts. + * @param {IZepConfig} zepConfig - The configuration object for the Zep API. + * @returns {Promise} - A promise that resolves with the new ZepVectorStore instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + zepConfig: IZepConfig + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return ZepVectorStore.fromDocuments(docs, embeddings, zepConfig); + } + + /** + * Creates a new ZepVectorStore instance from an array of Documents. Each Document is added to a Zep collection. + * + * @param {Document[]} docs - The Documents to add. + * @param {Embeddings} embeddings - The embeddings to use for vectorizing the Document contents. + * @param {IZepConfig} zepConfig - The configuration object for the Zep API. + * @returns {Promise} - A promise that resolves with the new ZepVectorStore instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + zepConfig: IZepConfig + ): Promise { + const instance = new this(embeddings, zepConfig); + // Wait for collection to be initialized + await instance.initPromise; + await instance.addDocuments(docs); + return instance; + } +} + +function zepDocsToDocumentsAndScore( + results: IDocument[] +): [Document, number][] { + return results.map((d) => [ + new Document({ + pageContent: d.content, + metadata: d.metadata, + }), + d.score ? d.score : 0, + ]); +} + +function assignMetadata( + value: string | Record | object | undefined +): Record | undefined { + if (typeof value === "object" && value !== null) { + return value as Record; + } + if (value !== undefined) { + console.warn("Metadata filters must be an object, Record, or undefined."); + } + return undefined; +} diff --git a/yarn.lock b/yarn.lock index d4b4909be14f..b89ffcdb2e65 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8006,17 +8006,14 @@ __metadata: "@aws-sdk/client-dynamodb": ^3.310.0 "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 - "@aws-sdk/client-s3": ^3.310.0 "@aws-sdk/client-sagemaker-runtime": ^3.414.0 "@aws-sdk/client-sfn": ^3.362.0 "@aws-sdk/credential-provider-node": ^3.388.0 "@aws-sdk/types": ^3.357.0 - "@azure/storage-blob": ^12.15.0 "@clickhouse/client": ^0.2.5 "@cloudflare/ai": ^1.0.12 "@cloudflare/workers-types": ^4.20230922.0 "@elastic/elasticsearch": ^8.4.0 - "@faker-js/faker": ^7.6.0 "@getmetal/metal-sdk": ^4.0.0 "@getzep/zep-js": ^0.9.0 "@gomomento/sdk": ^1.51.1 @@ -8026,7 +8023,7 @@ __metadata: "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 "@jest/globals": ^29.5.0 - "@langchain/core": ~0.0.9 + "@langchain/core": ~0.0.11-rc.1 "@langchain/openai": ~0.0.1 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 @@ -8049,15 +8046,11 @@ __metadata: "@tensorflow/tfjs-converter": ^3.6.0 "@tensorflow/tfjs-core": ^3.6.0 "@tsconfig/recommended": ^1.0.2 - "@types/d3-dsv": ^2 - "@types/decamelize": ^1.2.0 "@types/flat": ^5.0.2 "@types/html-to-text": ^9 - "@types/js-yaml": ^4 "@types/jsdom": ^21.1.1 "@types/lodash": ^4 "@types/mozilla-readability": ^0.2.1 - "@types/pdf-parse": ^1.1.1 "@types/pg": ^8 "@types/pg-copy-streams": ^1.2.2 "@types/uuid": ^9 @@ -8071,11 +8064,8 @@ __metadata: "@xata.io/client": ^0.28.0 "@xenova/transformers": ^2.5.4 "@zilliz/milvus2-sdk-node": ">=2.2.11" - apify-client: ^2.7.1 - assemblyai: ^2.0.2 axios: ^0.26.0 cassandra-driver: ^4.7.2 - cheerio: ^1.0.0-rc.12 chromadb: ^1.5.3 closevector-common: 0.1.0-alpha.1 closevector-node: 0.1.0-alpha.10 @@ -8085,7 +8075,6 @@ __metadata: d3-dsv: ^2.0.0 dotenv: ^16.0.3 dpdm: ^3.12.0 - epub2: ^3.0.1 eslint: ^8.33.0 eslint-config-airbnb-base: ^15.0.0 eslint-config-prettier: ^8.6.0 @@ -8096,6 +8085,7 @@ __metadata: faiss-node: ^0.5.1 fast-xml-parser: ^4.2.7 firebase-admin: ^11.9.0 + flat: ^5.0.2 google-auth-library: ^8.9.0 googleapis: ^126.0.1 graphql: ^16.6.0 @@ -8110,31 +8100,21 @@ __metadata: llmonitor: ^0.5.9 lodash: ^4.17.21 mammoth: ^1.5.1 - ml-matrix: ^6.10.4 mongodb: ^5.2.0 mysql2: ^3.3.3 neo4j-driver: ^5.12.0 node-llama-cpp: 2.7.3 - notion-to-md: ^3.1.0 - officeparser: ^4.0.4 - pdf-parse: 1.1.1 - peggy: ^3.0.2 pg: ^8.11.0 pg-copy-streams: ^6.0.5 pickleparser: ^0.2.1 - playwright: ^1.32.1 portkey-ai: ^0.1.11 prettier: ^2.8.3 - puppeteer: ^19.7.2 pyodide: ^0.24.1 redis: ^4.6.6 release-it: ^15.10.1 replicate: ^0.18.0 - rimraf: ^5.0.1 rollup: ^3.19.1 - sonix-speech-recognition: ^2.1.1 sqlite3: ^5.1.4 - srt-parser-2: ^1.2.2 ts-jest: ^29.1.0 typeorm: ^0.3.12 typescript: ~5.1.6 @@ -8145,8 +8125,6 @@ __metadata: voy-search: 0.6.2 weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 - youtube-transcript: ^1.0.6 - youtubei.js: ^5.8.0 zod: ^3.22.3 peerDependencies: "@aws-crypto/sha256-js": ^5.0.0 @@ -8154,11 +8132,9 @@ __metadata: "@aws-sdk/client-dynamodb": ^3.310.0 "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 - "@aws-sdk/client-s3": ^3.310.0 "@aws-sdk/client-sagemaker-runtime": ^3.310.0 "@aws-sdk/client-sfn": ^3.310.0 "@aws-sdk/credential-provider-node": ^3.388.0 - "@azure/storage-blob": ^12.15.0 "@clickhouse/client": ^0.2.5 "@cloudflare/ai": ^1.0.12 "@elastic/elasticsearch": ^8.4.0 @@ -8195,11 +8171,8 @@ __metadata: "@xata.io/client": ^0.28.0 "@xenova/transformers": ^2.5.4 "@zilliz/milvus2-sdk-node": ">=2.2.7" - apify-client: ^2.7.1 - assemblyai: ^2.0.2 axios: "*" cassandra-driver: ^4.7.2 - cheerio: ^1.0.0-rc.12 chromadb: "*" closevector-common: 0.1.0-alpha.1 closevector-node: 0.1.0-alpha.10 @@ -8207,7 +8180,6 @@ __metadata: cohere-ai: ">=6.0.0" convex: ^1.3.1 d3-dsv: ^2.0.0 - epub2: ^3.0.1 faiss-node: ^0.5.1 fast-xml-parser: ^4.2.7 firebase-admin: ^11.9.0 @@ -8225,21 +8197,13 @@ __metadata: mysql2: ^3.3.3 neo4j-driver: "*" node-llama-cpp: "*" - notion-to-md: ^3.1.0 - officeparser: ^4.0.4 - pdf-parse: 1.1.1 - peggy: ^3.0.2 pg: ^8.11.0 pg-copy-streams: ^6.0.5 pickleparser: ^0.2.1 - playwright: ^1.32.1 portkey-ai: ^0.1.11 - puppeteer: ^19.7.2 pyodide: ^0.24.1 redis: ^4.6.4 replicate: ^0.18.0 - sonix-speech-recognition: ^2.1.1 - srt-parser-2: ^1.2.2 typeorm: ^0.3.12 typesense: ^1.5.3 usearch: ^1.1.1 @@ -8248,8 +8212,6 @@ __metadata: weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 ws: ^8.14.2 - youtube-transcript: ^1.0.6 - youtubei.js: ^5.8.0 peerDependenciesMeta: "@aws-crypto/sha256-js": optional: true @@ -8261,16 +8223,12 @@ __metadata: optional: true "@aws-sdk/client-lambda": optional: true - "@aws-sdk/client-s3": - optional: true "@aws-sdk/client-sagemaker-runtime": optional: true "@aws-sdk/client-sfn": optional: true "@aws-sdk/credential-provider-node": optional: true - "@azure/storage-blob": - optional: true "@clickhouse/client": optional: true "@cloudflare/ai": @@ -8343,16 +8301,10 @@ __metadata: optional: true "@zilliz/milvus2-sdk-node": optional: true - apify-client: - optional: true - assemblyai: - optional: true axios: optional: true cassandra-driver: optional: true - cheerio: - optional: true chromadb: optional: true closevector-common: @@ -8367,8 +8319,6 @@ __metadata: optional: true d3-dsv: optional: true - epub2: - optional: true faiss-node: optional: true fast-xml-parser: @@ -8403,36 +8353,20 @@ __metadata: optional: true node-llama-cpp: optional: true - notion-to-md: - optional: true - officeparser: - optional: true - pdf-parse: - optional: true - peggy: - optional: true pg: optional: true pg-copy-streams: optional: true pickleparser: optional: true - playwright: - optional: true portkey-ai: optional: true - puppeteer: - optional: true pyodide: optional: true redis: optional: true replicate: optional: true - sonix-speech-recognition: - optional: true - srt-parser-2: - optional: true typeorm: optional: true typesense: @@ -8449,14 +8383,10 @@ __metadata: optional: true ws: optional: true - youtube-transcript: - optional: true - youtubei.js: - optional: true languageName: unknown linkType: soft -"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.0.10, @langchain/core@~0.0.9": +"@langchain/core@workspace:*, @langchain/core@workspace:langchain-core, @langchain/core@~0.0.11-rc.1": version: 0.0.0-use.local resolution: "@langchain/core@workspace:langchain-core" dependencies: @@ -8478,6 +8408,8 @@ __metadata: jest-environment-node: ^29.6.4 js-tiktoken: ^1.0.8 langsmith: ~0.0.48 + ml-distance: ^4.0.0 + ml-matrix: ^6.10.4 p-queue: ^6.6.2 p-retry: 4 prettier: ^2.8.3 @@ -23109,7 +23041,7 @@ __metadata: "@huggingface/inference": ^2.6.4 "@jest/globals": ^29.5.0 "@langchain/community": ~0.0.0 - "@langchain/core": ~0.0.10 + "@langchain/core": ~0.0.11-rc.1 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 "@opensearch-project/opensearch": ^2.2.0 @@ -23133,7 +23065,6 @@ __metadata: "@tsconfig/recommended": ^1.0.2 "@types/d3-dsv": ^2 "@types/decamelize": ^1.2.0 - "@types/flat": ^5.0.2 "@types/html-to-text": ^9 "@types/js-yaml": ^4 "@types/jsdom": ^21.1.1 @@ -23180,7 +23111,6 @@ __metadata: faiss-node: ^0.5.1 fast-xml-parser: ^4.2.7 firebase-admin: ^11.9.0 - flat: ^5.0.2 google-auth-library: ^8.9.0 googleapis: ^126.0.1 graphql: ^16.6.0 @@ -23200,7 +23130,6 @@ __metadata: lodash: ^4.17.21 mammoth: ^1.5.1 ml-distance: ^4.0.0 - ml-matrix: ^6.10.4 mongodb: ^5.2.0 mysql2: ^3.3.3 neo4j-driver: ^5.12.0 From 444320bb471566c76812c0337af7356bb798e023 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 16:20:47 -0800 Subject: [PATCH 15/22] Move more testsg --- langchain/package.json | 20 -- langchain/src/vectorstores/memory.ts | 193 +----------------- libs/langchain-community/.gitignore | 3 + libs/langchain-community/package.json | 10 + .../scripts/create-entrypoints.js | 1 + .../src/embeddings/tests/bedrock.int.test.ts | 0 .../src/embeddings/tests/cohere.int.test.ts | 0 .../embeddings/tests/googlepalm.int.test.ts | 0 .../tests/googlevertexai.int.test.ts | 0 .../src/embeddings/tests/hf.int.test.ts | 2 +- .../tests/hf_transformers.int.test.ts | 2 +- .../embeddings/tests/llama_cpp.int.test.ts | 2 +- .../src/embeddings/tests/minimax.int.test.ts | 0 .../src/embeddings/tests/ollama.int.test.ts | 0 .../embeddings/tests/tensorflow.int.test.ts | 2 +- .../src/embeddings/tests/voyage.int.test.ts | 0 .../src/load/import_map.ts | 1 + .../src/vectorstores/memory.ts | 192 +++++++++++++++++ yarn.lock | 10 +- 19 files changed, 214 insertions(+), 224 deletions(-) rename {langchain => libs/langchain-community}/src/embeddings/tests/bedrock.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/cohere.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/googlepalm.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/googlevertexai.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/hf.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/embeddings/tests/hf_transformers.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/embeddings/tests/llama_cpp.int.test.ts (94%) rename {langchain => libs/langchain-community}/src/embeddings/tests/minimax.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/ollama.int.test.ts (100%) rename {langchain => libs/langchain-community}/src/embeddings/tests/tensorflow.int.test.ts (95%) rename {langchain => libs/langchain-community}/src/embeddings/tests/voyage.int.test.ts (100%) create mode 100644 libs/langchain-community/src/vectorstores/memory.ts diff --git a/langchain/package.json b/langchain/package.json index 2c73a180a33a..a08340faae16 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -882,10 +882,6 @@ "author": "LangChain", "license": "MIT", "devDependencies": { - "@aws-crypto/sha256-js": "^5.0.0", - "@aws-sdk/client-bedrock-runtime": "^3.422.0", - "@aws-sdk/client-dynamodb": "^3.310.0", - "@aws-sdk/client-kendra": "^3.352.0", "@aws-sdk/client-lambda": "^3.310.0", "@aws-sdk/client-s3": "^3.310.0", "@aws-sdk/client-sagemaker-runtime": "^3.414.0", @@ -1024,10 +1020,6 @@ "youtubei.js": "^5.8.0" }, "peerDependencies": { - "@aws-crypto/sha256-js": "^5.0.0", - "@aws-sdk/client-bedrock-runtime": "^3.422.0", - "@aws-sdk/client-dynamodb": "^3.310.0", - "@aws-sdk/client-kendra": "^3.352.0", "@aws-sdk/client-lambda": "^3.310.0", "@aws-sdk/client-s3": "^3.310.0", "@aws-sdk/client-sagemaker-runtime": "^3.310.0", @@ -1127,18 +1119,6 @@ "youtubei.js": "^5.8.0" }, "peerDependenciesMeta": { - "@aws-crypto/sha256-js": { - "optional": true - }, - "@aws-sdk/client-bedrock-runtime": { - "optional": true - }, - "@aws-sdk/client-dynamodb": { - "optional": true - }, - "@aws-sdk/client-kendra": { - "optional": true - }, "@aws-sdk/client-lambda": { "optional": true }, diff --git a/langchain/src/vectorstores/memory.ts b/langchain/src/vectorstores/memory.ts index c2e396980293..917d18c6b806 100644 --- a/langchain/src/vectorstores/memory.ts +++ b/langchain/src/vectorstores/memory.ts @@ -1,192 +1 @@ -import { similarity as ml_distance_similarity } from "ml-distance"; -import { VectorStore } from "./base.js"; -import { Embeddings } from "../embeddings/base.js"; -import { Document } from "../document.js"; - -/** - * Interface representing a vector in memory. It includes the content - * (text), the corresponding embedding (vector), and any associated - * metadata. - */ -interface MemoryVector { - content: string; - embedding: number[]; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - metadata: Record; -} - -/** - * Interface for the arguments that can be passed to the - * `MemoryVectorStore` constructor. It includes an optional `similarity` - * function. - */ -export interface MemoryVectorStoreArgs { - similarity?: typeof ml_distance_similarity.cosine; -} - -/** - * Class that extends `VectorStore` to store vectors in memory. Provides - * methods for adding documents, performing similarity searches, and - * creating instances from texts, documents, or an existing index. - */ -export class MemoryVectorStore extends VectorStore { - declare FilterType: (doc: Document) => boolean; - - memoryVectors: MemoryVector[] = []; - - similarity: typeof ml_distance_similarity.cosine; - - _vectorstoreType(): string { - return "memory"; - } - - constructor( - embeddings: Embeddings, - { similarity, ...rest }: MemoryVectorStoreArgs = {} - ) { - super(embeddings, rest); - - this.similarity = similarity ?? ml_distance_similarity.cosine; - } - - /** - * Method to add documents to the memory vector store. It extracts the - * text from each document, generates embeddings for them, and adds the - * resulting vectors to the store. - * @param documents Array of `Document` instances to be added to the store. - * @returns Promise that resolves when all documents have been added. - */ - async addDocuments(documents: Document[]): Promise { - const texts = documents.map(({ pageContent }) => pageContent); - return this.addVectors( - await this.embeddings.embedDocuments(texts), - documents - ); - } - - /** - * Method to add vectors to the memory vector store. It creates - * `MemoryVector` instances for each vector and document pair and adds - * them to the store. - * @param vectors Array of vectors to be added to the store. - * @param documents Array of `Document` instances corresponding to the vectors. - * @returns Promise that resolves when all vectors have been added. - */ - async addVectors(vectors: number[][], documents: Document[]): Promise { - const memoryVectors = vectors.map((embedding, idx) => ({ - content: documents[idx].pageContent, - embedding, - metadata: documents[idx].metadata, - })); - - this.memoryVectors = this.memoryVectors.concat(memoryVectors); - } - - /** - * Method to perform a similarity search in the memory vector store. It - * calculates the similarity between the query vector and each vector in - * the store, sorts the results by similarity, and returns the top `k` - * results along with their scores. - * @param query Query vector to compare against the vectors in the store. - * @param k Number of top results to return. - * @param filter Optional filter function to apply to the vectors before performing the search. - * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. - */ - async similaritySearchVectorWithScore( - query: number[], - k: number, - filter?: this["FilterType"] - ): Promise<[Document, number][]> { - const filterFunction = (memoryVector: MemoryVector) => { - if (!filter) { - return true; - } - - const doc = new Document({ - metadata: memoryVector.metadata, - pageContent: memoryVector.content, - }); - return filter(doc); - }; - const filteredMemoryVectors = this.memoryVectors.filter(filterFunction); - const searches = filteredMemoryVectors - .map((vector, index) => ({ - similarity: this.similarity(query, vector.embedding), - index, - })) - .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) - .slice(0, k); - - const result: [Document, number][] = searches.map((search) => [ - new Document({ - metadata: filteredMemoryVectors[search.index].metadata, - pageContent: filteredMemoryVectors[search.index].content, - }), - search.similarity, - ]); - - return result; - } - - /** - * Static method to create a `MemoryVectorStore` instance from an array of - * texts. It creates a `Document` for each text and metadata pair, and - * adds them to the store. - * @param texts Array of texts to be added to the store. - * @param metadatas Array or single object of metadata corresponding to the texts. - * @param embeddings `Embeddings` instance used to generate embeddings for the texts. - * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. - * @returns Promise that resolves with a new `MemoryVectorStore` instance. - */ - static async fromTexts( - texts: string[], - metadatas: object[] | object, - embeddings: Embeddings, - dbConfig?: MemoryVectorStoreArgs - ): Promise { - const docs: Document[] = []; - for (let i = 0; i < texts.length; i += 1) { - const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; - const newDoc = new Document({ - pageContent: texts[i], - metadata, - }); - docs.push(newDoc); - } - return MemoryVectorStore.fromDocuments(docs, embeddings, dbConfig); - } - - /** - * Static method to create a `MemoryVectorStore` instance from an array of - * `Document` instances. It adds the documents to the store. - * @param docs Array of `Document` instances to be added to the store. - * @param embeddings `Embeddings` instance used to generate embeddings for the documents. - * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. - * @returns Promise that resolves with a new `MemoryVectorStore` instance. - */ - static async fromDocuments( - docs: Document[], - embeddings: Embeddings, - dbConfig?: MemoryVectorStoreArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - await instance.addDocuments(docs); - return instance; - } - - /** - * Static method to create a `MemoryVectorStore` instance from an existing - * index. It creates a new `MemoryVectorStore` instance without adding any - * documents or vectors. - * @param embeddings `Embeddings` instance used to generate embeddings for the documents. - * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. - * @returns Promise that resolves with a new `MemoryVectorStore` instance. - */ - static async fromExistingIndex( - embeddings: Embeddings, - dbConfig?: MemoryVectorStoreArgs - ): Promise { - const instance = new this(embeddings, dbConfig); - return instance; - } -} +export * from "@langchain/community/vectorstores/memory"; \ No newline at end of file diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index e252ea2ea09f..9e044f8930e5 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -196,6 +196,9 @@ vectorstores/hnswlib.d.ts vectorstores/lancedb.cjs vectorstores/lancedb.js vectorstores/lancedb.d.ts +vectorstores/memory.cjs +vectorstores/memory.js +vectorstores/memory.d.ts vectorstores/milvus.cjs vectorstores/milvus.js vectorstores/milvus.d.ts diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index cb63dc732e11..37c14ac54263 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -37,6 +37,7 @@ "@langchain/openai": "~0.0.1", "flat": "^5.0.2", "langsmith": "~0.0.48", + "ml-distance": "^4.0.0", "uuid": "^9.0.0", "zod": "^3.22.3" }, @@ -173,6 +174,7 @@ "@clickhouse/client": "^0.2.5", "@cloudflare/ai": "^1.0.12", "@elastic/elasticsearch": "^8.4.0", + "@faker-js/faker": "^7.6.0", "@getmetal/metal-sdk": "*", "@getzep/zep-js": "^0.9.0", "@gomomento/sdk": "^1.51.1", @@ -839,6 +841,11 @@ "import": "./vectorstores/lancedb.js", "require": "./vectorstores/lancedb.cjs" }, + "./vectorstores/memory": { + "types": "./vectorstores/memory.d.ts", + "import": "./vectorstores/memory.js", + "require": "./vectorstores/memory.cjs" + }, "./vectorstores/milvus": { "types": "./vectorstores/milvus.d.ts", "import": "./vectorstores/milvus.js", @@ -1401,6 +1408,9 @@ "vectorstores/lancedb.cjs", "vectorstores/lancedb.js", "vectorstores/lancedb.d.ts", + "vectorstores/memory.cjs", + "vectorstores/memory.js", + "vectorstores/memory.d.ts", "vectorstores/milvus.cjs", "vectorstores/milvus.js", "vectorstores/milvus.d.ts", diff --git a/libs/langchain-community/scripts/create-entrypoints.js b/libs/langchain-community/scripts/create-entrypoints.js index af6925e9c22b..6d371face720 100644 --- a/libs/langchain-community/scripts/create-entrypoints.js +++ b/libs/langchain-community/scripts/create-entrypoints.js @@ -77,6 +77,7 @@ const entrypoints = { "vectorstores/googlevertexai": "vectorstores/googlevertexai", "vectorstores/hnswlib": "vectorstores/hnswlib", "vectorstores/lancedb": "vectorstores/lancedb", + "vectorstores/memory": "vectorstores/memory", "vectorstores/milvus": "vectorstores/milvus", "vectorstores/momento_vector_index": "vectorstores/momento_vector_index", "vectorstores/mongodb_atlas": "vectorstores/mongodb_atlas", diff --git a/langchain/src/embeddings/tests/bedrock.int.test.ts b/libs/langchain-community/src/embeddings/tests/bedrock.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/bedrock.int.test.ts rename to libs/langchain-community/src/embeddings/tests/bedrock.int.test.ts diff --git a/langchain/src/embeddings/tests/cohere.int.test.ts b/libs/langchain-community/src/embeddings/tests/cohere.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/cohere.int.test.ts rename to libs/langchain-community/src/embeddings/tests/cohere.int.test.ts diff --git a/langchain/src/embeddings/tests/googlepalm.int.test.ts b/libs/langchain-community/src/embeddings/tests/googlepalm.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/googlepalm.int.test.ts rename to libs/langchain-community/src/embeddings/tests/googlepalm.int.test.ts diff --git a/langchain/src/embeddings/tests/googlevertexai.int.test.ts b/libs/langchain-community/src/embeddings/tests/googlevertexai.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/googlevertexai.int.test.ts rename to libs/langchain-community/src/embeddings/tests/googlevertexai.int.test.ts diff --git a/langchain/src/embeddings/tests/hf.int.test.ts b/libs/langchain-community/src/embeddings/tests/hf.int.test.ts similarity index 95% rename from langchain/src/embeddings/tests/hf.int.test.ts rename to libs/langchain-community/src/embeddings/tests/hf.int.test.ts index 7de4c63dd40d..24abd5784abe 100644 --- a/langchain/src/embeddings/tests/hf.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/hf.int.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { HuggingFaceInferenceEmbeddings } from "../hf.js"; import { MemoryVectorStore } from "../../vectorstores/memory.js"; -import { Document } from "../../document.js"; test("HuggingFaceInferenceEmbeddings", async () => { const embeddings = new HuggingFaceInferenceEmbeddings(); diff --git a/langchain/src/embeddings/tests/hf_transformers.int.test.ts b/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts similarity index 95% rename from langchain/src/embeddings/tests/hf_transformers.int.test.ts rename to libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts index 0a15a8e1d130..5948d2f0bf90 100644 --- a/langchain/src/embeddings/tests/hf_transformers.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts @@ -1,7 +1,7 @@ import { test, expect } from "@jest/globals"; +import { Document } from "@langchain/core/documents"; import { HuggingFaceTransformersEmbeddings } from "../hf_transformers.js"; import { MemoryVectorStore } from "../../vectorstores/memory.js"; -import { Document } from "../../document.js"; test("HuggingFaceTransformersEmbeddings", async () => { const embeddings = new HuggingFaceTransformersEmbeddings(); diff --git a/langchain/src/embeddings/tests/llama_cpp.int.test.ts b/libs/langchain-community/src/embeddings/tests/llama_cpp.int.test.ts similarity index 94% rename from langchain/src/embeddings/tests/llama_cpp.int.test.ts rename to libs/langchain-community/src/embeddings/tests/llama_cpp.int.test.ts index 5ec6e33d4d6d..b1819f943a21 100644 --- a/langchain/src/embeddings/tests/llama_cpp.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/llama_cpp.int.test.ts @@ -1,7 +1,7 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { test, expect } from "@jest/globals"; -import { getEnvironmentVariable } from "../../util/env.js"; +import { getEnvironmentVariable } from "@langchain/core/utils/env"; import { LlamaCppEmbeddings } from "../llama_cpp.js"; const llamaPath = getEnvironmentVariable("LLAMA_PATH")!; diff --git a/langchain/src/embeddings/tests/minimax.int.test.ts b/libs/langchain-community/src/embeddings/tests/minimax.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/minimax.int.test.ts rename to libs/langchain-community/src/embeddings/tests/minimax.int.test.ts diff --git a/langchain/src/embeddings/tests/ollama.int.test.ts b/libs/langchain-community/src/embeddings/tests/ollama.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/ollama.int.test.ts rename to libs/langchain-community/src/embeddings/tests/ollama.int.test.ts diff --git a/langchain/src/embeddings/tests/tensorflow.int.test.ts b/libs/langchain-community/src/embeddings/tests/tensorflow.int.test.ts similarity index 95% rename from langchain/src/embeddings/tests/tensorflow.int.test.ts rename to libs/langchain-community/src/embeddings/tests/tensorflow.int.test.ts index 4c51ef04a3a1..a471147572d4 100644 --- a/langchain/src/embeddings/tests/tensorflow.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/tensorflow.int.test.ts @@ -1,8 +1,8 @@ import { test, expect } from "@jest/globals"; import "@tensorflow/tfjs-backend-cpu"; +import { Document } from "@langchain/core/documents"; import { TensorFlowEmbeddings } from "../tensorflow.js"; import { MemoryVectorStore } from "../../vectorstores/memory.js"; -import { Document } from "../../document.js"; test("TensorflowEmbeddings", async () => { const embeddings = new TensorFlowEmbeddings(); diff --git a/langchain/src/embeddings/tests/voyage.int.test.ts b/libs/langchain-community/src/embeddings/tests/voyage.int.test.ts similarity index 100% rename from langchain/src/embeddings/tests/voyage.int.test.ts rename to libs/langchain-community/src/embeddings/tests/voyage.int.test.ts diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 4d41c56fcf5f..79e48309c105 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -25,6 +25,7 @@ export * as llms__cloudflare_workersai from "../llms/cloudflare_workersai.js"; export * as llms__fireworks from "../llms/fireworks.js"; export * as llms__ollama from "../llms/ollama.js"; export * as llms__yandex from "../llms/yandex.js"; +export * as vectorstores__memory from "../vectorstores/memory.js"; export * as vectorstores__prisma from "../vectorstores/prisma.js"; export * as vectorstores__vectara from "../vectorstores/vectara.js"; export * as chat_models__baiduwenxin from "../chat_models/baiduwenxin.js"; diff --git a/libs/langchain-community/src/vectorstores/memory.ts b/libs/langchain-community/src/vectorstores/memory.ts new file mode 100644 index 000000000000..9ea9e4aeeeb4 --- /dev/null +++ b/libs/langchain-community/src/vectorstores/memory.ts @@ -0,0 +1,192 @@ +import { similarity as ml_distance_similarity } from "ml-distance"; +import { VectorStore } from "@langchain/core/vectorstores"; +import { Embeddings } from "@langchain/core/embeddings"; +import { Document } from "@langchain/core/documents"; + +/** + * Interface representing a vector in memory. It includes the content + * (text), the corresponding embedding (vector), and any associated + * metadata. + */ +interface MemoryVector { + content: string; + embedding: number[]; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + metadata: Record; +} + +/** + * Interface for the arguments that can be passed to the + * `MemoryVectorStore` constructor. It includes an optional `similarity` + * function. + */ +export interface MemoryVectorStoreArgs { + similarity?: typeof ml_distance_similarity.cosine; +} + +/** + * Class that extends `VectorStore` to store vectors in memory. Provides + * methods for adding documents, performing similarity searches, and + * creating instances from texts, documents, or an existing index. + */ +export class MemoryVectorStore extends VectorStore { + declare FilterType: (doc: Document) => boolean; + + memoryVectors: MemoryVector[] = []; + + similarity: typeof ml_distance_similarity.cosine; + + _vectorstoreType(): string { + return "memory"; + } + + constructor( + embeddings: Embeddings, + { similarity, ...rest }: MemoryVectorStoreArgs = {} + ) { + super(embeddings, rest); + + this.similarity = similarity ?? ml_distance_similarity.cosine; + } + + /** + * Method to add documents to the memory vector store. It extracts the + * text from each document, generates embeddings for them, and adds the + * resulting vectors to the store. + * @param documents Array of `Document` instances to be added to the store. + * @returns Promise that resolves when all documents have been added. + */ + async addDocuments(documents: Document[]): Promise { + const texts = documents.map(({ pageContent }) => pageContent); + return this.addVectors( + await this.embeddings.embedDocuments(texts), + documents + ); + } + + /** + * Method to add vectors to the memory vector store. It creates + * `MemoryVector` instances for each vector and document pair and adds + * them to the store. + * @param vectors Array of vectors to be added to the store. + * @param documents Array of `Document` instances corresponding to the vectors. + * @returns Promise that resolves when all vectors have been added. + */ + async addVectors(vectors: number[][], documents: Document[]): Promise { + const memoryVectors = vectors.map((embedding, idx) => ({ + content: documents[idx].pageContent, + embedding, + metadata: documents[idx].metadata, + })); + + this.memoryVectors = this.memoryVectors.concat(memoryVectors); + } + + /** + * Method to perform a similarity search in the memory vector store. It + * calculates the similarity between the query vector and each vector in + * the store, sorts the results by similarity, and returns the top `k` + * results along with their scores. + * @param query Query vector to compare against the vectors in the store. + * @param k Number of top results to return. + * @param filter Optional filter function to apply to the vectors before performing the search. + * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. + */ + async similaritySearchVectorWithScore( + query: number[], + k: number, + filter?: this["FilterType"] + ): Promise<[Document, number][]> { + const filterFunction = (memoryVector: MemoryVector) => { + if (!filter) { + return true; + } + + const doc = new Document({ + metadata: memoryVector.metadata, + pageContent: memoryVector.content, + }); + return filter(doc); + }; + const filteredMemoryVectors = this.memoryVectors.filter(filterFunction); + const searches = filteredMemoryVectors + .map((vector, index) => ({ + similarity: this.similarity(query, vector.embedding), + index, + })) + .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) + .slice(0, k); + + const result: [Document, number][] = searches.map((search) => [ + new Document({ + metadata: filteredMemoryVectors[search.index].metadata, + pageContent: filteredMemoryVectors[search.index].content, + }), + search.similarity, + ]); + + return result; + } + + /** + * Static method to create a `MemoryVectorStore` instance from an array of + * texts. It creates a `Document` for each text and metadata pair, and + * adds them to the store. + * @param texts Array of texts to be added to the store. + * @param metadatas Array or single object of metadata corresponding to the texts. + * @param embeddings `Embeddings` instance used to generate embeddings for the texts. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromTexts( + texts: string[], + metadatas: object[] | object, + embeddings: Embeddings, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const docs: Document[] = []; + for (let i = 0; i < texts.length; i += 1) { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const newDoc = new Document({ + pageContent: texts[i], + metadata, + }); + docs.push(newDoc); + } + return MemoryVectorStore.fromDocuments(docs, embeddings, dbConfig); + } + + /** + * Static method to create a `MemoryVectorStore` instance from an array of + * `Document` instances. It adds the documents to the store. + * @param docs Array of `Document` instances to be added to the store. + * @param embeddings `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromDocuments( + docs: Document[], + embeddings: Embeddings, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + await instance.addDocuments(docs); + return instance; + } + + /** + * Static method to create a `MemoryVectorStore` instance from an existing + * index. It creates a new `MemoryVectorStore` instance without adding any + * documents or vectors. + * @param embeddings `Embeddings` instance used to generate embeddings for the documents. + * @param dbConfig Optional `MemoryVectorStoreArgs` to configure the `MemoryVectorStore` instance. + * @returns Promise that resolves with a new `MemoryVectorStore` instance. + */ + static async fromExistingIndex( + embeddings: Embeddings, + dbConfig?: MemoryVectorStoreArgs + ): Promise { + const instance = new this(embeddings, dbConfig); + return instance; + } +} diff --git a/yarn.lock b/yarn.lock index b89ffcdb2e65..ee5eca7a6868 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8100,6 +8100,7 @@ __metadata: llmonitor: ^0.5.9 lodash: ^4.17.21 mammoth: ^1.5.1 + ml-distance: ^4.0.0 mongodb: ^5.2.0 mysql2: ^3.3.3 neo4j-driver: ^5.12.0 @@ -8138,6 +8139,7 @@ __metadata: "@clickhouse/client": ^0.2.5 "@cloudflare/ai": ^1.0.12 "@elastic/elasticsearch": ^8.4.0 + "@faker-js/faker": ^7.6.0 "@getmetal/metal-sdk": "*" "@getzep/zep-js": ^0.9.0 "@gomomento/sdk": ^1.51.1 @@ -23015,8 +23017,6 @@ __metadata: resolution: "langchain@workspace:langchain" dependencies: "@anthropic-ai/sdk": ^0.9.1 - "@aws-crypto/sha256-js": ^5.0.0 - "@aws-sdk/client-bedrock-runtime": ^3.422.0 "@aws-sdk/client-dynamodb": ^3.310.0 "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 @@ -23173,8 +23173,6 @@ __metadata: zod: ^3.22.3 zod-to-json-schema: 3.20.3 peerDependencies: - "@aws-crypto/sha256-js": ^5.0.0 - "@aws-sdk/client-bedrock-runtime": ^3.422.0 "@aws-sdk/client-dynamodb": ^3.310.0 "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 @@ -23275,10 +23273,6 @@ __metadata: youtube-transcript: ^1.0.6 youtubei.js: ^5.8.0 peerDependenciesMeta: - "@aws-crypto/sha256-js": - optional: true - "@aws-sdk/client-bedrock-runtime": - optional: true "@aws-sdk/client-dynamodb": optional: true "@aws-sdk/client-kendra": From 8381c76fe01e0984c7e5cc66957381d72dde4106 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 16:42:54 -0800 Subject: [PATCH 16/22] Revert serialization changes --- .../hubs/makersuite/tests/googlemakersuitehub.test.ts | 6 +++--- langchain/src/load/tests/__snapshots__/load.test.ts.snap | 2 +- libs/langchain-community/src/chat_models/baiduwenxin.ts | 2 -- libs/langchain-community/src/chat_models/bedrock/web.ts | 2 -- .../src/chat_models/cloudflare_workersai.ts | 2 -- libs/langchain-community/src/chat_models/fireworks.ts | 2 -- libs/langchain-community/src/chat_models/googlepalm.ts | 2 -- .../src/chat_models/googlevertexai/common.ts | 2 -- .../src/chat_models/iflytek_xinghuo/common.ts | 2 -- libs/langchain-community/src/chat_models/llama_cpp.ts | 2 -- libs/langchain-community/src/chat_models/minimax.ts | 2 -- libs/langchain-community/src/chat_models/ollama.ts | 2 -- libs/langchain-community/src/chat_models/portkey.ts | 2 -- libs/langchain-community/src/llms/ai21.ts | 2 -- libs/langchain-community/src/llms/aleph_alpha.ts | 2 -- libs/langchain-community/src/llms/bedrock/web.ts | 2 -- libs/langchain-community/src/llms/cloudflare_workersai.ts | 2 -- libs/langchain-community/src/llms/cohere.ts | 2 -- libs/langchain-community/src/llms/fireworks.ts | 2 -- libs/langchain-community/src/llms/googlepalm.ts | 2 -- libs/langchain-community/src/llms/googlevertexai/common.ts | 2 -- libs/langchain-community/src/llms/gradient_ai.ts | 2 -- libs/langchain-community/src/llms/hf.ts | 2 -- libs/langchain-community/src/llms/llama_cpp.ts | 2 -- libs/langchain-community/src/llms/ollama.ts | 2 -- libs/langchain-community/src/llms/portkey.ts | 2 -- libs/langchain-community/src/llms/raycast.ts | 2 -- libs/langchain-community/src/llms/replicate.ts | 2 -- libs/langchain-community/src/llms/sagemaker_endpoint.ts | 2 -- libs/langchain-community/src/llms/watsonx_ai.ts | 2 -- libs/langchain-community/src/llms/writer.ts | 2 -- libs/langchain-community/src/llms/yandex.ts | 2 -- libs/langchain-community/src/tools/aiplugin.ts | 4 ---- libs/langchain-community/src/tools/aws_sfn.ts | 4 ---- libs/langchain-community/src/tools/bingserpapi.ts | 4 ---- libs/langchain-community/src/tools/brave_search.ts | 4 ---- libs/langchain-community/src/tools/connery.ts | 4 ---- libs/langchain-community/src/tools/dadjokeapi.ts | 4 ---- libs/langchain-community/src/tools/dataforseo_api_search.ts | 4 ---- libs/langchain-community/src/tools/gmail/base.ts | 4 ---- libs/langchain-community/src/tools/google_custom_search.ts | 4 ---- libs/langchain-community/src/tools/google_places.ts | 4 ---- libs/langchain-community/src/tools/ifttt.ts | 4 ---- libs/langchain-community/src/tools/searchapi.ts | 4 ---- libs/langchain-community/src/tools/searxng_search.ts | 4 ---- libs/langchain-community/src/tools/serpapi.ts | 4 ---- libs/langchain-community/src/tools/serper.ts | 4 ---- libs/langchain-community/src/tools/wikipedia_query_run.ts | 4 ---- libs/langchain-community/src/tools/wolframalpha.ts | 4 ---- 49 files changed, 4 insertions(+), 132 deletions(-) diff --git a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts index 38253bde4037..10e071a4c25e 100644 --- a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts +++ b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts @@ -52,7 +52,7 @@ describe("Google Maker Suite Hub", () => { const model = prompt.toModel(); // console.log(model.lc_namespace); expect(model.lc_namespace).toEqual([ - "langchain_community", + "langchain", "llms", "googlepalm", ]); @@ -82,7 +82,7 @@ describe("Google Maker Suite Hub", () => { const prompt = new MakerSuitePrompt(dataFile); const model = prompt.toModel(); expect(model.lc_namespace).toEqual([ - "langchain_community", + "langchain", "llms", "googlepalm", ]); @@ -97,7 +97,7 @@ describe("Google Maker Suite Hub", () => { const prompt = new MakerSuitePrompt(chatFile); const model = prompt.toModel(); expect(model.lc_namespace).toEqual([ - "langchain_community", + "langchain", "chat_models", "googlepalm", ]); diff --git a/langchain/src/load/tests/__snapshots__/load.test.ts.snap b/langchain/src/load/tests/__snapshots__/load.test.ts.snap index 0b2f7fcbe5e2..cf24e88de6c3 100644 --- a/langchain/src/load/tests/__snapshots__/load.test.ts.snap +++ b/langchain/src/load/tests/__snapshots__/load.test.ts.snap @@ -283,7 +283,7 @@ exports[`serialize + deserialize llm with optional deps 1`] = ` "lc: 1 type: constructor id: - - langchain_community + - langchain - llms - cohere - Cohere diff --git a/libs/langchain-community/src/chat_models/baiduwenxin.ts b/libs/langchain-community/src/chat_models/baiduwenxin.ts index 01a3719bcee7..f79973bed372 100644 --- a/libs/langchain-community/src/chat_models/baiduwenxin.ts +++ b/libs/langchain-community/src/chat_models/baiduwenxin.ts @@ -191,8 +191,6 @@ export class ChatBaiduWenxin }; } - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - get lc_aliases(): { [key: string]: string } | undefined { return undefined; } diff --git a/libs/langchain-community/src/chat_models/bedrock/web.ts b/libs/langchain-community/src/chat_models/bedrock/web.ts index 404cb29cb0d4..e0ac54c39f81 100644 --- a/libs/langchain-community/src/chat_models/bedrock/web.ts +++ b/libs/langchain-community/src/chat_models/bedrock/web.ts @@ -125,8 +125,6 @@ export class BedrockChat extends SimpleChatModel implements BaseBedrockInput { lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - get lc_aliases(): Record { return { model: "model_id", diff --git a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts index f46c0addb47c..c50de33f765a 100644 --- a/libs/langchain-community/src/chat_models/cloudflare_workersai.ts +++ b/libs/langchain-community/src/chat_models/cloudflare_workersai.ts @@ -52,8 +52,6 @@ export class ChatCloudflareWorkersAI lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - model = "@cf/meta/llama-2-7b-chat-int8"; cloudflareAccountId?: string; diff --git a/libs/langchain-community/src/chat_models/fireworks.ts b/libs/langchain-community/src/chat_models/fireworks.ts index ac7290d84570..b9de098bf2b2 100644 --- a/libs/langchain-community/src/chat_models/fireworks.ts +++ b/libs/langchain-community/src/chat_models/fireworks.ts @@ -57,8 +57,6 @@ export class ChatFireworks extends ChatOpenAI { lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - fireworksApiKey?: string; constructor( diff --git a/libs/langchain-community/src/chat_models/googlepalm.ts b/libs/langchain-community/src/chat_models/googlepalm.ts index 77aa99c31266..aec212d7d04d 100644 --- a/libs/langchain-community/src/chat_models/googlepalm.ts +++ b/libs/langchain-community/src/chat_models/googlepalm.ts @@ -124,8 +124,6 @@ export class ChatGooglePaLM lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "GOOGLE_PALM_API_KEY", diff --git a/libs/langchain-community/src/chat_models/googlevertexai/common.ts b/libs/langchain-community/src/chat_models/googlevertexai/common.ts index 760430884fe3..d1208de1c6d9 100644 --- a/libs/langchain-community/src/chat_models/googlevertexai/common.ts +++ b/libs/langchain-community/src/chat_models/googlevertexai/common.ts @@ -214,8 +214,6 @@ export class BaseChatGoogleVertexAI AuthOptions >; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - get lc_aliases(): Record { return { model: "model_name", diff --git a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts index 4501ee2a9fac..af04bd8ec09f 100644 --- a/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts +++ b/libs/langchain-community/src/chat_models/iflytek_xinghuo/common.ts @@ -187,8 +187,6 @@ export abstract class BaseChatIflytekXinghuo return ["stop", "signal", "options"]; } - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - get lc_secrets(): { [key: string]: string } | undefined { return { iflytekApiKey: "IFLYTEK_API_KEY", diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts index 05a95d85ff60..5f2fc95468be 100644 --- a/libs/langchain-community/src/chat_models/llama_cpp.ts +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -84,8 +84,6 @@ export class ChatLlamaCpp extends SimpleChatModel { lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - static lc_name() { return "ChatLlamaCpp"; } diff --git a/libs/langchain-community/src/chat_models/minimax.ts b/libs/langchain-community/src/chat_models/minimax.ts index 12d4347c9739..bb450bf92d38 100644 --- a/libs/langchain-community/src/chat_models/minimax.ts +++ b/libs/langchain-community/src/chat_models/minimax.ts @@ -332,8 +332,6 @@ export class ChatMinimax lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - minimaxGroupId?: string; minimaxApiKey?: string; diff --git a/libs/langchain-community/src/chat_models/ollama.ts b/libs/langchain-community/src/chat_models/ollama.ts index 13c03be23c0d..aa0d413e8ac0 100644 --- a/libs/langchain-community/src/chat_models/ollama.ts +++ b/libs/langchain-community/src/chat_models/ollama.ts @@ -59,8 +59,6 @@ export class ChatOllama lc_serializable = true; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - model = "llama2"; baseUrl = "http://localhost:11434"; diff --git a/libs/langchain-community/src/chat_models/portkey.ts b/libs/langchain-community/src/chat_models/portkey.ts index 67076cc50385..9941e58c05ff 100644 --- a/libs/langchain-community/src/chat_models/portkey.ts +++ b/libs/langchain-community/src/chat_models/portkey.ts @@ -81,8 +81,6 @@ export class PortkeyChat extends BaseChatModel { session: PortkeySession; - lc_namespace = ["langchain_community", "chat_models", this._llmType()]; - constructor(init?: Partial) { super(init ?? {}); this.apiKey = init?.apiKey; diff --git a/libs/langchain-community/src/llms/ai21.ts b/libs/langchain-community/src/llms/ai21.ts index 0dcd1169fcf5..2def8056f650 100644 --- a/libs/langchain-community/src/llms/ai21.ts +++ b/libs/langchain-community/src/llms/ai21.ts @@ -40,8 +40,6 @@ export interface AI21Input extends BaseLLMParams { export class AI21 extends LLM implements AI21Input { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - model = "j2-jumbo-instruct"; temperature = 0.7; diff --git a/libs/langchain-community/src/llms/aleph_alpha.ts b/libs/langchain-community/src/llms/aleph_alpha.ts index 531111ae6092..412604ce5227 100644 --- a/libs/langchain-community/src/llms/aleph_alpha.ts +++ b/libs/langchain-community/src/llms/aleph_alpha.ts @@ -51,8 +51,6 @@ export interface AlephAlphaInput extends BaseLLMParams { export class AlephAlpha extends LLM implements AlephAlphaInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - model = "luminous-base"; maximum_tokens = 64; diff --git a/libs/langchain-community/src/llms/bedrock/web.ts b/libs/langchain-community/src/llms/bedrock/web.ts index 326bbbefc3b2..2c9afa62bfb0 100644 --- a/libs/langchain-community/src/llms/bedrock/web.ts +++ b/libs/langchain-community/src/llms/bedrock/web.ts @@ -52,8 +52,6 @@ export class Bedrock extends LLM implements BaseBedrockInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - get lc_aliases(): Record { return { model: "model_id", diff --git a/libs/langchain-community/src/llms/cloudflare_workersai.ts b/libs/langchain-community/src/llms/cloudflare_workersai.ts index 456271c28737..b7b05ed05f53 100644 --- a/libs/langchain-community/src/llms/cloudflare_workersai.ts +++ b/libs/langchain-community/src/llms/cloudflare_workersai.ts @@ -41,8 +41,6 @@ export class CloudflareWorkersAI lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - constructor(fields?: CloudflareWorkersAIInput & BaseLLMParams) { super(fields ?? {}); diff --git a/libs/langchain-community/src/llms/cohere.ts b/libs/langchain-community/src/llms/cohere.ts index 1fb88505a19f..6d73e684cb91 100644 --- a/libs/langchain-community/src/llms/cohere.ts +++ b/libs/langchain-community/src/llms/cohere.ts @@ -55,8 +55,6 @@ export class Cohere extends LLM implements CohereInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - temperature = 0; maxTokens = 250; diff --git a/libs/langchain-community/src/llms/fireworks.ts b/libs/langchain-community/src/llms/fireworks.ts index 4f1cd411ffdc..5b28b8008af4 100644 --- a/libs/langchain-community/src/llms/fireworks.ts +++ b/libs/langchain-community/src/llms/fireworks.ts @@ -46,8 +46,6 @@ export class Fireworks extends OpenAI { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - fireworksApiKey?: string; constructor( diff --git a/libs/langchain-community/src/llms/googlepalm.ts b/libs/langchain-community/src/llms/googlepalm.ts index 66042a38f67c..0839b2376aac 100644 --- a/libs/langchain-community/src/llms/googlepalm.ts +++ b/libs/langchain-community/src/llms/googlepalm.ts @@ -86,8 +86,6 @@ export interface GooglePaLMTextInput extends BaseLLMParams { export class GooglePaLM extends LLM implements GooglePaLMTextInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "GOOGLE_PALM_API_KEY", diff --git a/libs/langchain-community/src/llms/googlevertexai/common.ts b/libs/langchain-community/src/llms/googlevertexai/common.ts index 99f6301fdeb0..add11058e56a 100644 --- a/libs/langchain-community/src/llms/googlevertexai/common.ts +++ b/libs/langchain-community/src/llms/googlevertexai/common.ts @@ -61,8 +61,6 @@ export class BaseGoogleVertexAI { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - model = "text-bison"; temperature = 0.7; diff --git a/libs/langchain-community/src/llms/gradient_ai.ts b/libs/langchain-community/src/llms/gradient_ai.ts index 6be6fd1e92c3..ad834e104d7f 100644 --- a/libs/langchain-community/src/llms/gradient_ai.ts +++ b/libs/langchain-community/src/llms/gradient_ai.ts @@ -63,8 +63,6 @@ export class GradientLLM extends LLM { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - // Gradient AI does not export the BaseModel type. Once it does, we can use it here. // eslint-disable-next-line @typescript-eslint/no-explicit-any model: any; diff --git a/libs/langchain-community/src/llms/hf.ts b/libs/langchain-community/src/llms/hf.ts index 0acb17179af3..f1ceb58a23f9 100644 --- a/libs/langchain-community/src/llms/hf.ts +++ b/libs/langchain-community/src/llms/hf.ts @@ -58,8 +58,6 @@ export interface HFInput { export class HuggingFaceInference extends LLM implements HFInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "HUGGINGFACEHUB_API_KEY", diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts index 3ce3a543104d..db03ffbf31ed 100644 --- a/libs/langchain-community/src/llms/llama_cpp.ts +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -36,8 +36,6 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions { export class LlamaCpp extends LLM { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - declare CallOptions: LlamaCppCallOptions; static inputs: LlamaCppInputs; diff --git a/libs/langchain-community/src/llms/ollama.ts b/libs/langchain-community/src/llms/ollama.ts index 9df3655f34d0..f97fef1f31ca 100644 --- a/libs/langchain-community/src/llms/ollama.ts +++ b/libs/langchain-community/src/llms/ollama.ts @@ -39,8 +39,6 @@ export class Ollama extends LLM implements OllamaInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - model = "llama2"; baseUrl = "http://localhost:11434"; diff --git a/libs/langchain-community/src/llms/portkey.ts b/libs/langchain-community/src/llms/portkey.ts index cd3d347eb892..71ae1c0725bf 100644 --- a/libs/langchain-community/src/llms/portkey.ts +++ b/libs/langchain-community/src/llms/portkey.ts @@ -97,8 +97,6 @@ export class Portkey extends BaseLLM { session: PortkeySession; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - constructor(init?: Partial) { super(init ?? {}); this.apiKey = init?.apiKey; diff --git a/libs/langchain-community/src/llms/raycast.ts b/libs/langchain-community/src/llms/raycast.ts index d66b762d15b1..233a02f00776 100644 --- a/libs/langchain-community/src/llms/raycast.ts +++ b/libs/langchain-community/src/llms/raycast.ts @@ -21,8 +21,6 @@ const wait = (ms: number) => export class RaycastAI extends LLM implements RaycastAIInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - /** * The model to use for generating text. */ diff --git a/libs/langchain-community/src/llms/replicate.ts b/libs/langchain-community/src/llms/replicate.ts index e062ac10cddb..fd433d412abd 100644 --- a/libs/langchain-community/src/llms/replicate.ts +++ b/libs/langchain-community/src/llms/replicate.ts @@ -51,8 +51,6 @@ export class Replicate extends LLM implements ReplicateInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - model: ReplicateInput["model"]; input: ReplicateInput["input"]; diff --git a/libs/langchain-community/src/llms/sagemaker_endpoint.ts b/libs/langchain-community/src/llms/sagemaker_endpoint.ts index ea3c89a1167f..43f99320cfd9 100644 --- a/libs/langchain-community/src/llms/sagemaker_endpoint.ts +++ b/libs/langchain-community/src/llms/sagemaker_endpoint.ts @@ -112,8 +112,6 @@ export interface SageMakerEndpointInput extends BaseLLMParams { export class SageMakerEndpoint extends LLM { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - static lc_name() { return "SageMakerEndpoint"; } diff --git a/libs/langchain-community/src/llms/watsonx_ai.ts b/libs/langchain-community/src/llms/watsonx_ai.ts index add00a4eb39c..2da98bb86bd7 100644 --- a/libs/langchain-community/src/llms/watsonx_ai.ts +++ b/libs/langchain-community/src/llms/watsonx_ai.ts @@ -58,8 +58,6 @@ const endpointConstructor = (region: string, version: string) => export class WatsonxAI extends LLM { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - static lc_name() { return "WatsonxAI"; } diff --git a/libs/langchain-community/src/llms/writer.ts b/libs/langchain-community/src/llms/writer.ts index 5da73f15c7af..1f72d3273e76 100644 --- a/libs/langchain-community/src/llms/writer.ts +++ b/libs/langchain-community/src/llms/writer.ts @@ -69,8 +69,6 @@ export class Writer extends LLM implements WriterInput { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - apiKey: string; orgId: number; diff --git a/libs/langchain-community/src/llms/yandex.ts b/libs/langchain-community/src/llms/yandex.ts index 0ccd1471d648..58b1f31111d3 100644 --- a/libs/langchain-community/src/llms/yandex.ts +++ b/libs/langchain-community/src/llms/yandex.ts @@ -35,8 +35,6 @@ export interface YandexGPTInputs extends BaseLLMParams { export class YandexGPT extends LLM implements YandexGPTInputs { lc_serializable = true; - lc_namespace = ["langchain_community", "llms", this._llmType()]; - static lc_name() { return "Yandex GPT"; } diff --git a/libs/langchain-community/src/tools/aiplugin.ts b/libs/langchain-community/src/tools/aiplugin.ts index d924bc482418..7e8ebea28cdb 100644 --- a/libs/langchain-community/src/tools/aiplugin.ts +++ b/libs/langchain-community/src/tools/aiplugin.ts @@ -19,10 +19,6 @@ export class AIPluginTool extends Tool implements AIPluginToolParams { return "AIPluginTool"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - private _name: string; private _description: string; diff --git a/libs/langchain-community/src/tools/aws_sfn.ts b/libs/langchain-community/src/tools/aws_sfn.ts index 1b83d1b4a7f5..0614ef959d02 100644 --- a/libs/langchain-community/src/tools/aws_sfn.ts +++ b/libs/langchain-community/src/tools/aws_sfn.ts @@ -42,10 +42,6 @@ export class StartExecutionAWSSfnTool extends Tool { public description: string; - get lc_namespace() { - return ["langchain_community", "tools"]; - } - constructor({ name, description, diff --git a/libs/langchain-community/src/tools/bingserpapi.ts b/libs/langchain-community/src/tools/bingserpapi.ts index 23846cbbd62d..62d82908b41d 100644 --- a/libs/langchain-community/src/tools/bingserpapi.ts +++ b/libs/langchain-community/src/tools/bingserpapi.ts @@ -13,10 +13,6 @@ class BingSerpAPI extends Tool { return "BingSerpAPI"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - /** * Not implemented. Will throw an error if called. */ diff --git a/libs/langchain-community/src/tools/brave_search.ts b/libs/langchain-community/src/tools/brave_search.ts index 8abd0d0aff94..5d360f2b9c29 100644 --- a/libs/langchain-community/src/tools/brave_search.ts +++ b/libs/langchain-community/src/tools/brave_search.ts @@ -20,10 +20,6 @@ export class BraveSearch extends Tool { return "BraveSearch"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "brave-search"; description = diff --git a/libs/langchain-community/src/tools/connery.ts b/libs/langchain-community/src/tools/connery.ts index 90a6f1320a08..30eb21999782 100644 --- a/libs/langchain-community/src/tools/connery.ts +++ b/libs/langchain-community/src/tools/connery.ts @@ -68,10 +68,6 @@ type RunActionResult = { * @extends Tool */ export class ConneryAction extends Tool { - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name: string; description: string; diff --git a/libs/langchain-community/src/tools/dadjokeapi.ts b/libs/langchain-community/src/tools/dadjokeapi.ts index aa6950b27065..2efd934d67e7 100644 --- a/libs/langchain-community/src/tools/dadjokeapi.ts +++ b/libs/langchain-community/src/tools/dadjokeapi.ts @@ -11,10 +11,6 @@ class DadJokeAPI extends Tool { return "DadJokeAPI"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "dadjoke"; description = diff --git a/libs/langchain-community/src/tools/dataforseo_api_search.ts b/libs/langchain-community/src/tools/dataforseo_api_search.ts index bb7f94467547..26b6156dd93b 100644 --- a/libs/langchain-community/src/tools/dataforseo_api_search.ts +++ b/libs/langchain-community/src/tools/dataforseo_api_search.ts @@ -101,10 +101,6 @@ export class DataForSeoAPISearch extends Tool { return "DataForSeoAPISearch"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "dataforseo-api-wrapper"; description = diff --git a/libs/langchain-community/src/tools/gmail/base.ts b/libs/langchain-community/src/tools/gmail/base.ts index 824222fc3828..fa9e122d2dfd 100644 --- a/libs/langchain-community/src/tools/gmail/base.ts +++ b/libs/langchain-community/src/tools/gmail/base.ts @@ -46,10 +46,6 @@ export abstract class GmailBaseTool extends StructuredTool { description = "A tool to send and view emails through Gmail"; - get lc_namespace() { - return ["langchain_community", "tools"]; - } - protected gmail: gmail_v1.Gmail; constructor(fields?: Partial) { diff --git a/libs/langchain-community/src/tools/google_custom_search.ts b/libs/langchain-community/src/tools/google_custom_search.ts index 5a7c8cd8f540..ef6cfd25d184 100644 --- a/libs/langchain-community/src/tools/google_custom_search.ts +++ b/libs/langchain-community/src/tools/google_custom_search.ts @@ -25,10 +25,6 @@ export class GoogleCustomSearch extends Tool { }; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "google-custom-search"; protected apiKey: string; diff --git a/libs/langchain-community/src/tools/google_places.ts b/libs/langchain-community/src/tools/google_places.ts index 8e297ef60c47..6a702e5d6c58 100644 --- a/libs/langchain-community/src/tools/google_places.ts +++ b/libs/langchain-community/src/tools/google_places.ts @@ -16,10 +16,6 @@ export class GooglePlacesAPI extends Tool { return "GooglePlacesAPI"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - get lc_secrets(): { [key: string]: string } | undefined { return { apiKey: "GOOGLE_PLACES_API_KEY", diff --git a/libs/langchain-community/src/tools/ifttt.ts b/libs/langchain-community/src/tools/ifttt.ts index c34f5b566b22..44df4d143d30 100644 --- a/libs/langchain-community/src/tools/ifttt.ts +++ b/libs/langchain-community/src/tools/ifttt.ts @@ -45,10 +45,6 @@ export class IFTTTWebhook extends Tool { return "IFTTTWebhook"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - private url: string; name: string; diff --git a/libs/langchain-community/src/tools/searchapi.ts b/libs/langchain-community/src/tools/searchapi.ts index 6329b892a8e7..c6731b7fc682 100644 --- a/libs/langchain-community/src/tools/searchapi.ts +++ b/libs/langchain-community/src/tools/searchapi.ts @@ -67,10 +67,6 @@ export class SearchApi extends Tool { return "SearchApi"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - /** * Converts the SearchApi instance to JSON. This method is not implemented * and will throw an error if called. diff --git a/libs/langchain-community/src/tools/searxng_search.ts b/libs/langchain-community/src/tools/searxng_search.ts index 1b8932fab86b..eec2b90df82b 100644 --- a/libs/langchain-community/src/tools/searxng_search.ts +++ b/libs/langchain-community/src/tools/searxng_search.ts @@ -116,10 +116,6 @@ export class SearxngSearch extends Tool { return "SearxngSearch"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "searxng-search"; description = diff --git a/libs/langchain-community/src/tools/serpapi.ts b/libs/langchain-community/src/tools/serpapi.ts index 6f0d29dcd52a..f6a0f4dc099b 100644 --- a/libs/langchain-community/src/tools/serpapi.ts +++ b/libs/langchain-community/src/tools/serpapi.ts @@ -294,10 +294,6 @@ export class SerpAPI extends Tool { return "SerpAPI"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - toJSON() { return this.toJSONNotImplemented(); } diff --git a/libs/langchain-community/src/tools/serper.ts b/libs/langchain-community/src/tools/serper.ts index 179796665ca5..ca444d769187 100644 --- a/libs/langchain-community/src/tools/serper.ts +++ b/libs/langchain-community/src/tools/serper.ts @@ -22,10 +22,6 @@ export class Serper extends Tool { return "Serper"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - /** * Converts the Serper instance to JSON. This method is not implemented * and will throw an error if called. diff --git a/libs/langchain-community/src/tools/wikipedia_query_run.ts b/libs/langchain-community/src/tools/wikipedia_query_run.ts index 127eb6d1ec14..d0166e5c58d0 100644 --- a/libs/langchain-community/src/tools/wikipedia_query_run.ts +++ b/libs/langchain-community/src/tools/wikipedia_query_run.ts @@ -69,10 +69,6 @@ export class WikipediaQueryRun extends Tool { return "WikipediaQueryRun"; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - name = "wikipedia-api"; description = diff --git a/libs/langchain-community/src/tools/wolframalpha.ts b/libs/langchain-community/src/tools/wolframalpha.ts index f8816d6b53f7..dfa58807d5f6 100644 --- a/libs/langchain-community/src/tools/wolframalpha.ts +++ b/libs/langchain-community/src/tools/wolframalpha.ts @@ -22,10 +22,6 @@ export class WolframAlphaTool extends Tool { this.appid = fields.appid; } - get lc_namespace() { - return ["langchain_community", "tools"]; - } - static lc_name() { return "WolframAlphaTool"; } From 53f320d8d8c167616d341cdf8158daed31e640f3 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 16:53:39 -0800 Subject: [PATCH 17/22] Use OpenAI package --- langchain/package.json | 2 +- langchain/src/agents/openai/output_parser.ts | 2 +- .../src/chains/openai_functions/openapi.ts | 2 +- langchain/src/chains/openai_moderation.ts | 2 +- langchain/src/chat_models/openai.ts | 840 +----------------- .../fs/openai_whisper_audio.ts | 2 +- langchain/src/embeddings/openai.ts | 273 +----- .../tests/googlemakersuitehub.test.ts | 12 +- .../experimental/openai_assistant/index.ts | 2 +- .../experimental/openai_assistant/schema.ts | 2 +- .../tests/openai_assistant.int.test.ts | 2 +- .../src/experimental/openai_files/index.ts | 2 +- langchain/src/llms/openai-chat.ts | 473 +--------- langchain/src/llms/openai.ts | 559 +----------- langchain/src/schema/index.ts | 2 +- langchain/src/tools/convert_to_openai.ts | 2 +- langchain/src/types/openai-types.ts | 2 +- langchain/src/util/openai-format-fndef.ts | 4 +- langchain/src/util/openai.ts | 16 - langchain/src/vectorstores/memory.ts | 2 +- .../vectorstores/tests/analyticdb.int.test.ts | 4 +- .../vectorstores/tests/convex/convex/lib.ts | 10 +- .../tests/googlevertexai.int.test.ts | 2 +- .../tests/mongodb_atlas.int.test.ts | 5 +- .../vectorstores/tests/myscale.int.test.ts | 3 +- .../src/vectorstores/tests/xata.int.test.ts | 4 +- yarn.lock | 10 +- 27 files changed, 68 insertions(+), 2173 deletions(-) delete mode 100644 langchain/src/util/openai.ts diff --git a/langchain/package.json b/langchain/package.json index a08340faae16..6dd2c60090d2 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1415,6 +1415,7 @@ "@anthropic-ai/sdk": "^0.9.1", "@langchain/community": "~0.0.0", "@langchain/core": "~0.0.11-rc.1", + "@langchain/openai": "~0.0.1", "binary-extensions": "^2.2.0", "expr-eval": "^2.0.2", "js-tiktoken": "^1.0.7", @@ -1423,7 +1424,6 @@ "langchainhub": "~0.0.6", "langsmith": "~0.0.48", "ml-distance": "^4.0.0", - "openai": "^4.19.0", "openapi-types": "^12.1.3", "p-retry": "4", "uuid": "^9.0.0", diff --git a/langchain/src/agents/openai/output_parser.ts b/langchain/src/agents/openai/output_parser.ts index 960f6db84e47..fc14b6a4f160 100644 --- a/langchain/src/agents/openai/output_parser.ts +++ b/langchain/src/agents/openai/output_parser.ts @@ -1,4 +1,4 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { AgentAction, AgentFinish, diff --git a/langchain/src/chains/openai_functions/openapi.ts b/langchain/src/chains/openai_functions/openapi.ts index 06eb92ad4995..5306eef626db 100644 --- a/langchain/src/chains/openai_functions/openapi.ts +++ b/langchain/src/chains/openai_functions/openapi.ts @@ -1,4 +1,4 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { JsonSchema7ObjectType } from "zod-to-json-schema/src/parsers/object.js"; import { JsonSchema7ArrayType } from "zod-to-json-schema/src/parsers/array.js"; import { JsonSchema7Type } from "zod-to-json-schema/src/parseDef.js"; diff --git a/langchain/src/chains/openai_moderation.ts b/langchain/src/chains/openai_moderation.ts index 862552099405..2474baaab0ba 100644 --- a/langchain/src/chains/openai_moderation.ts +++ b/langchain/src/chains/openai_moderation.ts @@ -1,4 +1,4 @@ -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; +import { type ClientOptions, OpenAIClient } from "@langchain/openai"; import { BaseChain, ChainInputs } from "./base.js"; import { ChainValues } from "../schema/index.js"; import { AsyncCaller, AsyncCallerParams } from "../util/async_caller.js"; diff --git a/langchain/src/chat_models/openai.ts b/langchain/src/chat_models/openai.ts index c8e99c525246..cf822cb30a8d 100644 --- a/langchain/src/chat_models/openai.ts +++ b/langchain/src/chat_models/openai.ts @@ -1,836 +1,20 @@ -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; +import { + ChatOpenAI, + type ChatOpenAICallOptions, + messageToOpenAIRole, +} from "@langchain/openai"; import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { - AIMessage, - AIMessageChunk, - BaseMessage, - ChatGeneration, - ChatGenerationChunk, - ChatMessage, - ChatMessageChunk, - ChatResult, - FunctionMessageChunk, - HumanMessageChunk, - SystemMessageChunk, - ToolMessage, - ToolMessageChunk, -} from "../schema/index.js"; -import { StructuredTool } from "../tools/base.js"; -import { formatToOpenAITool } from "../tools/convert_to_openai.js"; -import { - AzureOpenAIInput, - OpenAICallOptions, - OpenAIChatInput, - OpenAICoreRequestOptions, - LegacyOpenAIInput, -} from "../types/openai-types.js"; -import { OpenAIEndpointConfig, getEndpoint } from "../util/azure.js"; -import { getEnvironmentVariable } from "../util/env.js"; +import { BaseMessage, ChatMessage, ChatResult } from "../schema/index.js"; import { promptLayerTrackRequest } from "../util/prompt-layer.js"; -import { BaseChatModel, BaseChatModelParams } from "./base.js"; -import { BaseFunctionCallOptions } from "../base_language/index.js"; -import { NewTokenIndices } from "../callbacks/base.js"; -import { wrapOpenAIClientError } from "../util/openai.js"; -import { - FunctionDef, - formatFunctionDefinitions, -} from "../util/openai-format-fndef.js"; - -export type { AzureOpenAIInput, OpenAICallOptions, OpenAIChatInput }; - -interface TokenUsage { - completionTokens?: number; - promptTokens?: number; - totalTokens?: number; -} - -interface OpenAILLMOutput { - tokenUsage: TokenUsage; -} - -// TODO import from SDK when available -type OpenAIRoleEnum = "system" | "assistant" | "user" | "function" | "tool"; - -type OpenAICompletionParam = - OpenAIClient.Chat.Completions.ChatCompletionMessageParam; -type OpenAIFnDef = OpenAIClient.Chat.ChatCompletionCreateParams.Function; -type OpenAIFnCallOption = OpenAIClient.Chat.ChatCompletionFunctionCallOption; - -function extractGenericMessageCustomRole(message: ChatMessage) { - if ( - message.role !== "system" && - message.role !== "assistant" && - message.role !== "user" && - message.role !== "function" && - message.role !== "tool" - ) { - console.warn(`Unknown message role: ${message.role}`); - } - - return message.role as OpenAIRoleEnum; -} - -function messageToOpenAIRole(message: BaseMessage): OpenAIRoleEnum { - const type = message._getType(); - switch (type) { - case "system": - return "system"; - case "ai": - return "assistant"; - case "human": - return "user"; - case "function": - return "function"; - case "tool": - return "tool"; - case "generic": { - if (!ChatMessage.isInstance(message)) - throw new Error("Invalid generic chat message"); - return extractGenericMessageCustomRole(message); - } - default: - throw new Error(`Unknown message type: ${type}`); - } -} - -function openAIResponseToChatMessage( - message: OpenAIClient.Chat.Completions.ChatCompletionMessage -): BaseMessage { - switch (message.role) { - case "assistant": - return new AIMessage(message.content || "", { - function_call: message.function_call, - tool_calls: message.tool_calls, - }); - default: - return new ChatMessage(message.content || "", message.role ?? "unknown"); - } -} - -function _convertDeltaToMessageChunk( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - delta: Record, - defaultRole?: OpenAIRoleEnum -) { - const role = delta.role ?? defaultRole; - const content = delta.content ?? ""; - let additional_kwargs; - if (delta.function_call) { - additional_kwargs = { - function_call: delta.function_call, - }; - } else if (delta.tool_calls) { - additional_kwargs = { - tool_calls: delta.tool_calls, - }; - } else { - additional_kwargs = {}; - } - if (role === "user") { - return new HumanMessageChunk({ content }); - } else if (role === "assistant") { - return new AIMessageChunk({ content, additional_kwargs }); - } else if (role === "system") { - return new SystemMessageChunk({ content }); - } else if (role === "function") { - return new FunctionMessageChunk({ - content, - additional_kwargs, - name: delta.name, - }); - } else if (role === "tool") { - return new ToolMessageChunk({ - content, - additional_kwargs, - tool_call_id: delta.tool_call_id, - }); - } else { - return new ChatMessageChunk({ content, role }); - } -} - -function convertMessagesToOpenAIParams(messages: BaseMessage[]) { - // TODO: Function messages do not support array content, fix cast - return messages.map( - (message) => - ({ - role: messageToOpenAIRole(message), - content: message.content, - name: message.name, - function_call: message.additional_kwargs.function_call, - tool_calls: message.additional_kwargs.tool_calls, - tool_call_id: (message as ToolMessage).tool_call_id, - } as OpenAICompletionParam) - ); -} - -export interface ChatOpenAICallOptions - extends OpenAICallOptions, - BaseFunctionCallOptions { - tools?: StructuredTool[] | OpenAIClient.ChatCompletionTool[]; - tool_choice?: OpenAIClient.ChatCompletionToolChoiceOption; - promptIndex?: number; - response_format?: { type: "json_object" }; - seed?: number; -} - -/** - * Wrapper around OpenAI large language models that use the Chat endpoint. - * - * To use you should have the `openai` package installed, with the - * `OPENAI_API_KEY` environment variable set. - * - * To use with Azure you should have the `openai` package installed, with the - * `AZURE_OPENAI_API_KEY`, - * `AZURE_OPENAI_API_INSTANCE_NAME`, - * `AZURE_OPENAI_API_DEPLOYMENT_NAME` - * and `AZURE_OPENAI_API_VERSION` environment variable set. - * `AZURE_OPENAI_BASE_PATH` is optional and will override `AZURE_OPENAI_API_INSTANCE_NAME` if you need to use a custom endpoint. - * - * @remarks - * Any parameters that are valid to be passed to {@link - * https://platform.openai.com/docs/api-reference/chat/create | - * `openai.createChatCompletion`} can be passed through {@link modelKwargs}, even - * if not explicitly available on this class. - * @example - * ```typescript - * // Create a new instance of ChatOpenAI with specific temperature and model name settings - * const model = new ChatOpenAI({ - * temperature: 0.9, - * modelName: "ft:gpt-3.5-turbo-0613:{ORG_NAME}::{MODEL_ID}", - * }); - * - * // Invoke the model with a message and await the response - * const message = await model.invoke("Hi there!"); - * - * // Log the response to the console - * console.log(message); - * - * ``` - */ -export class ChatOpenAI< - CallOptions extends ChatOpenAICallOptions = ChatOpenAICallOptions - > - extends BaseChatModel - implements OpenAIChatInput, AzureOpenAIInput -{ - static lc_name() { - return "ChatOpenAI"; - } - - get callKeys() { - return [ - ...super.callKeys, - "options", - "function_call", - "functions", - "tools", - "tool_choice", - "promptIndex", - "response_format", - "seed", - ]; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - openAIApiKey: "OPENAI_API_KEY", - azureOpenAIApiKey: "AZURE_OPENAI_API_KEY", - organization: "OPENAI_ORGANIZATION", - }; - } - - get lc_aliases(): Record { - return { - modelName: "model", - openAIApiKey: "openai_api_key", - azureOpenAIApiVersion: "azure_openai_api_version", - azureOpenAIApiKey: "azure_openai_api_key", - azureOpenAIApiInstanceName: "azure_openai_api_instance_name", - azureOpenAIApiDeploymentName: "azure_openai_api_deployment_name", - }; - } - - temperature = 1; - - topP = 1; - - frequencyPenalty = 0; - - presencePenalty = 0; - - n = 1; - - logitBias?: Record; - - modelName = "gpt-3.5-turbo"; - modelKwargs?: OpenAIChatInput["modelKwargs"]; - - stop?: string[]; - - user?: string; - - timeout?: number; - - streaming = false; - - maxTokens?: number; - - openAIApiKey?: string; - - azureOpenAIApiVersion?: string; - - azureOpenAIApiKey?: string; - - azureOpenAIApiInstanceName?: string; - - azureOpenAIApiDeploymentName?: string; - - azureOpenAIBasePath?: string; - - organization?: string; - - private client: OpenAIClient; - - private clientConfig: ClientOptions; - - constructor( - fields?: Partial & - Partial & - BaseChatModelParams & { - configuration?: ClientOptions & LegacyOpenAIInput; - }, - /** @deprecated */ - configuration?: ClientOptions & LegacyOpenAIInput - ) { - super(fields ?? {}); - - this.openAIApiKey = - fields?.openAIApiKey ?? getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY"); - - if (!this.azureOpenAIApiKey && !this.openAIApiKey) { - throw new Error("OpenAI or Azure OpenAI API key not found"); - } - - this.azureOpenAIApiInstanceName = - fields?.azureOpenAIApiInstanceName ?? - getEnvironmentVariable("AZURE_OPENAI_API_INSTANCE_NAME"); - - this.azureOpenAIApiDeploymentName = - fields?.azureOpenAIApiDeploymentName ?? - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME"); - - this.azureOpenAIApiVersion = - fields?.azureOpenAIApiVersion ?? - getEnvironmentVariable("AZURE_OPENAI_API_VERSION"); - - this.azureOpenAIBasePath = - fields?.azureOpenAIBasePath ?? - getEnvironmentVariable("AZURE_OPENAI_BASE_PATH"); - - this.organization = - fields?.configuration?.organization ?? - getEnvironmentVariable("OPENAI_ORGANIZATION"); - - this.modelName = fields?.modelName ?? this.modelName; - this.modelKwargs = fields?.modelKwargs ?? {}; - this.timeout = fields?.timeout; - - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.maxTokens = fields?.maxTokens; - this.n = fields?.n ?? this.n; - this.logitBias = fields?.logitBias; - this.stop = fields?.stop; - this.user = fields?.user; - - this.streaming = fields?.streaming ?? false; - - if (this.azureOpenAIApiKey) { - if (!this.azureOpenAIApiInstanceName && !this.azureOpenAIBasePath) { - throw new Error("Azure OpenAI API instance name not found"); - } - if (!this.azureOpenAIApiDeploymentName) { - throw new Error("Azure OpenAI API deployment name not found"); - } - if (!this.azureOpenAIApiVersion) { - throw new Error("Azure OpenAI API version not found"); - } - this.openAIApiKey = this.openAIApiKey ?? ""; - } - - this.clientConfig = { - apiKey: this.openAIApiKey, - organization: this.organization, - baseURL: configuration?.basePath ?? fields?.configuration?.basePath, - dangerouslyAllowBrowser: true, - defaultHeaders: - configuration?.baseOptions?.headers ?? - fields?.configuration?.baseOptions?.headers, - defaultQuery: - configuration?.baseOptions?.params ?? - fields?.configuration?.baseOptions?.params, - ...configuration, - ...fields?.configuration, - }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams( - options?: this["ParsedCallOptions"] - ): Omit { - function isStructuredToolArray( - tools?: unknown[] - ): tools is StructuredTool[] { - return ( - tools !== undefined && - tools.every((tool) => - Array.isArray((tool as StructuredTool).lc_namespace) - ) - ); - } - const params: Omit< - OpenAIClient.Chat.ChatCompletionCreateParams, - "messages" - > = { - model: this.modelName, - temperature: this.temperature, - top_p: this.topP, - frequency_penalty: this.frequencyPenalty, - presence_penalty: this.presencePenalty, - max_tokens: this.maxTokens === -1 ? undefined : this.maxTokens, - n: this.n, - logit_bias: this.logitBias, - stop: options?.stop ?? this.stop, - user: this.user, - stream: this.streaming, - functions: options?.functions, - function_call: options?.function_call, - tools: isStructuredToolArray(options?.tools) - ? options?.tools.map(formatToOpenAITool) - : options?.tools, - tool_choice: options?.tool_choice, - response_format: options?.response_format, - seed: options?.seed, - ...this.modelKwargs, - }; - return params; - } - - /** @ignore */ - _identifyingParams(): Omit< - OpenAIClient.Chat.ChatCompletionCreateParams, - "messages" - > & { - model_name: string; - } & ClientOptions { - return { - model_name: this.modelName, - ...this.invocationParams(), - ...this.clientConfig, - }; - } - - async *_streamResponseChunks( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const messagesMapped: OpenAICompletionParam[] = - convertMessagesToOpenAIParams(messages); - const params = { - ...this.invocationParams(options), - messages: messagesMapped, - stream: true as const, - }; - let defaultRole: OpenAIRoleEnum | undefined; - const streamIterable = await this.completionWithRetry(params, options); - for await (const data of streamIterable) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } +export { + type AzureOpenAIInput, + type OpenAICallOptions, + type OpenAIChatInput, +} from "@langchain/openai"; - const { delta } = choice; - if (!delta) { - continue; - } - const chunk = _convertDeltaToMessageChunk(delta, defaultRole); - defaultRole = delta.role ?? defaultRole; - const newTokenIndices = { - prompt: options.promptIndex ?? 0, - completion: choice.index ?? 0, - }; - if (typeof chunk.content !== "string") { - console.log( - "[WARNING]: Received non-string content from OpenAI. This is currently not supported." - ); - continue; - } - const generationChunk = new ChatGenerationChunk({ - message: chunk, - text: chunk.content, - generationInfo: newTokenIndices, - }); - yield generationChunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken( - generationChunk.text ?? "", - newTokenIndices, - undefined, - undefined, - undefined, - { chunk: generationChunk } - ); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - /** - * Get the identifying parameters for the model - * - */ - identifyingParams() { - return this._identifyingParams(); - } - - /** @ignore */ - async _generate( - messages: BaseMessage[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const tokenUsage: TokenUsage = {}; - const params = this.invocationParams(options); - const messagesMapped: OpenAICompletionParam[] = - convertMessagesToOpenAIParams(messages); - - if (params.stream) { - const stream = this._streamResponseChunks(messages, options, runManager); - const finalChunks: Record = {}; - for await (const chunk of stream) { - const index = - (chunk.generationInfo as NewTokenIndices)?.completion ?? 0; - if (finalChunks[index] === undefined) { - finalChunks[index] = chunk; - } else { - finalChunks[index] = finalChunks[index].concat(chunk); - } - } - const generations = Object.entries(finalChunks) - .sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10)) - .map(([_, value]) => value); - - const { functions, function_call } = this.invocationParams(options); - - // OpenAI does not support token usage report under stream mode, - // fallback to estimation. - - const promptTokenUsage = await this.getEstimatedTokenCountFromPrompt( - messages, - functions, - function_call - ); - const completionTokenUsage = await this.getNumTokensFromGenerations( - generations - ); - - tokenUsage.promptTokens = promptTokenUsage; - tokenUsage.completionTokens = completionTokenUsage; - tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage; - return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } }; - } else { - const data = await this.completionWithRetry( - { - ...params, - stream: false, - messages: messagesMapped, - }, - { - signal: options?.signal, - ...options?.options, - } - ); - const { - completion_tokens: completionTokens, - prompt_tokens: promptTokens, - total_tokens: totalTokens, - } = data?.usage ?? {}; - - if (completionTokens) { - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + completionTokens; - } - - if (promptTokens) { - tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; - } - - if (totalTokens) { - tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; - } - - const generations: ChatGeneration[] = []; - for (const part of data?.choices ?? []) { - const text = part.message?.content ?? ""; - const generation: ChatGeneration = { - text, - message: openAIResponseToChatMessage( - part.message ?? { role: "assistant" } - ), - }; - if (part.finish_reason) { - generation.generationInfo = { finish_reason: part.finish_reason }; - } - generations.push(generation); - } - return { - generations, - llmOutput: { tokenUsage }, - }; - } - } - - /** - * Estimate the number of tokens a prompt will use. - * Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts - */ - private async getEstimatedTokenCountFromPrompt( - messages: BaseMessage[], - functions?: OpenAIFnDef[], - function_call?: "none" | "auto" | OpenAIFnCallOption - ): Promise { - // It appears that if functions are present, the first system message is padded with a trailing newline. This - // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were. - - let tokens = (await this.getNumTokensFromMessages(messages)).totalCount; - - // If there are functions, add the function definitions as they count towards token usage - if (functions && function_call !== "auto") { - const promptDefinitions = formatFunctionDefinitions( - functions as unknown as FunctionDef[] - ); - tokens += await this.getNumTokens(promptDefinitions); - tokens += 9; // Add nine per completion - } - - // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because - // functions typically add a system message, but reuse the first one if it's already there. This offsets - // the extra 9 tokens added by the function definitions. - if (functions && messages.find((m) => m._getType() === "system")) { - tokens -= 4; - } - - // If function_call is 'none', add one token. - // If it's a FunctionCall object, add 4 + the number of tokens in the function name. - // If it's undefined or 'auto', don't add anything. - if (function_call === "none") { - tokens += 1; - } else if (typeof function_call === "object") { - tokens += (await this.getNumTokens(function_call.name)) + 4; - } - - return tokens; - } - - /** - * Estimate the number of tokens an array of generations have used. - */ - private async getNumTokensFromGenerations(generations: ChatGeneration[]) { - const generationUsages = await Promise.all( - generations.map(async (generation) => { - if (generation.message.additional_kwargs?.function_call) { - return (await this.getNumTokensFromMessages([generation.message])) - .countPerMessage[0]; - } else { - return await this.getNumTokens(generation.message.content); - } - }) - ); - - return generationUsages.reduce((a, b) => a + b, 0); - } - - async getNumTokensFromMessages(messages: BaseMessage[]) { - let totalCount = 0; - let tokensPerMessage = 0; - let tokensPerName = 0; - - // From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb - if (this.modelName === "gpt-3.5-turbo-0301") { - tokensPerMessage = 4; - tokensPerName = -1; - } else { - tokensPerMessage = 3; - tokensPerName = 1; - } - - const countPerMessage = await Promise.all( - messages.map(async (message) => { - const textCount = await this.getNumTokens(message.content); - const roleCount = await this.getNumTokens(messageToOpenAIRole(message)); - const nameCount = - message.name !== undefined - ? tokensPerName + (await this.getNumTokens(message.name)) - : 0; - let count = textCount + tokensPerMessage + roleCount + nameCount; - - // From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate - const openAIMessage = message; - if (openAIMessage._getType() === "function") { - count -= 2; - } - if (openAIMessage.additional_kwargs?.function_call) { - count += 3; - } - if (openAIMessage?.additional_kwargs.function_call?.name) { - count += await this.getNumTokens( - openAIMessage.additional_kwargs.function_call?.name - ); - } - if (openAIMessage.additional_kwargs.function_call?.arguments) { - count += await this.getNumTokens( - // Remove newlines and spaces - JSON.stringify( - JSON.parse( - openAIMessage.additional_kwargs.function_call?.arguments - ) - ) - ); - } - - totalCount += count; - return count; - }) - ); - - totalCount += 3; // every reply is primed with <|start|>assistant<|message|> - - return { totalCount, countPerMessage }; - } - - /** - * Calls the OpenAI API with retry logic in case of failures. - * @param request The request to send to the OpenAI API. - * @param options Optional configuration for the API call. - * @returns The response from the OpenAI API. - */ - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsStreaming, - options?: OpenAICoreRequestOptions - ): Promise>; - - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise; - - async completionWithRetry( - request: - | OpenAIClient.Chat.ChatCompletionCreateParamsStreaming - | OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise< - | AsyncIterable - | OpenAIClient.Chat.Completions.ChatCompletion - > { - const requestOptions = this._getClientOptions(options); - return this.caller.call(async () => { - try { - const res = await this.client.chat.completions.create( - request, - requestOptions - ); - return res; - } catch (e) { - const error = wrapOpenAIClientError(e); - throw error; - } - }); - } - - private _getClientOptions(options: OpenAICoreRequestOptions | undefined) { - if (!this.client) { - const openAIEndpointConfig: OpenAIEndpointConfig = { - azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName, - azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName, - azureOpenAIApiKey: this.azureOpenAIApiKey, - azureOpenAIBasePath: this.azureOpenAIBasePath, - baseURL: this.clientConfig.baseURL, - }; - - const endpoint = getEndpoint(openAIEndpointConfig); - const params = { - ...this.clientConfig, - baseURL: endpoint, - timeout: this.timeout, - maxRetries: 0, - }; - if (!params.baseURL) { - delete params.baseURL; - } - - this.client = new OpenAIClient(params); - } - const requestOptions = { - ...this.clientConfig, - ...options, - } as OpenAICoreRequestOptions; - if (this.azureOpenAIApiKey) { - requestOptions.headers = { - "api-key": this.azureOpenAIApiKey, - ...requestOptions.headers, - }; - requestOptions.query = { - "api-version": this.azureOpenAIApiVersion, - ...requestOptions.query, - }; - } - return requestOptions; - } - - _llmType() { - return "openai"; - } - - /** @ignore */ - _combineLLMOutput(...llmOutputs: OpenAILLMOutput[]): OpenAILLMOutput { - return llmOutputs.reduce<{ - [key in keyof OpenAILLMOutput]: Required; - }>( - (acc, llmOutput) => { - if (llmOutput && llmOutput.tokenUsage) { - acc.tokenUsage.completionTokens += - llmOutput.tokenUsage.completionTokens ?? 0; - acc.tokenUsage.promptTokens += llmOutput.tokenUsage.promptTokens ?? 0; - acc.tokenUsage.totalTokens += llmOutput.tokenUsage.totalTokens ?? 0; - } - return acc; - }, - { - tokenUsage: { - completionTokens: 0, - promptTokens: 0, - totalTokens: 0, - }, - } - ); - } -} +export { type ChatOpenAICallOptions, ChatOpenAI }; export class PromptLayerChatOpenAI extends ChatOpenAI { promptLayerApiKey?: string; diff --git a/langchain/src/document_loaders/fs/openai_whisper_audio.ts b/langchain/src/document_loaders/fs/openai_whisper_audio.ts index fbbdbef6f25b..468eba28c1e5 100644 --- a/langchain/src/document_loaders/fs/openai_whisper_audio.ts +++ b/langchain/src/document_loaders/fs/openai_whisper_audio.ts @@ -1,4 +1,4 @@ -import { type ClientOptions, OpenAI as OpenAIClient, toFile } from "openai"; +import { type ClientOptions, OpenAIClient, toFile } from "@langchain/openai"; import { Document } from "../../document.js"; import { BufferLoader } from "./buffer.js"; diff --git a/langchain/src/embeddings/openai.ts b/langchain/src/embeddings/openai.ts index 0cc726d03ca3..9c5af9d81995 100644 --- a/langchain/src/embeddings/openai.ts +++ b/langchain/src/embeddings/openai.ts @@ -1,269 +1,4 @@ -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; -import { getEnvironmentVariable } from "../util/env.js"; -import { - AzureOpenAIInput, - OpenAICoreRequestOptions, - LegacyOpenAIInput, -} from "../types/openai-types.js"; -import { chunkArray } from "../util/chunk.js"; -import { Embeddings, EmbeddingsParams } from "./base.js"; -import { getEndpoint, OpenAIEndpointConfig } from "../util/azure.js"; -import { wrapOpenAIClientError } from "../util/openai.js"; - -/** - * Interface for OpenAIEmbeddings parameters. Extends EmbeddingsParams and - * defines additional parameters specific to the OpenAIEmbeddings class. - */ -export interface OpenAIEmbeddingsParams extends EmbeddingsParams { - /** Model name to use */ - modelName: string; - - /** - * Timeout to use when making requests to OpenAI. - */ - timeout?: number; - - /** - * The maximum number of documents to embed in a single request. This is - * limited by the OpenAI API to a maximum of 2048. - */ - batchSize?: number; - - /** - * Whether to strip new lines from the input text. This is recommended by - * OpenAI, but may not be suitable for all use cases. - */ - stripNewLines?: boolean; -} - -/** - * Class for generating embeddings using the OpenAI API. Extends the - * Embeddings class and implements OpenAIEmbeddingsParams and - * AzureOpenAIInput. - * @example - * ```typescript - * // Embed a query using OpenAIEmbeddings to generate embeddings for a given text - * const model = new OpenAIEmbeddings(); - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?", - * ); - * console.log({ res }); - * - * ``` - */ -export class OpenAIEmbeddings - extends Embeddings - implements OpenAIEmbeddingsParams, AzureOpenAIInput -{ - modelName = "text-embedding-ada-002"; - - batchSize = 512; - - stripNewLines = true; - - timeout?: number; - - azureOpenAIApiVersion?: string; - - azureOpenAIApiKey?: string; - - azureOpenAIApiInstanceName?: string; - - azureOpenAIApiDeploymentName?: string; - - azureOpenAIBasePath?: string; - - organization?: string; - - private client: OpenAIClient; - - private clientConfig: ClientOptions; - - constructor( - fields?: Partial & - Partial & { - verbose?: boolean; - openAIApiKey?: string; - configuration?: ClientOptions; - }, - configuration?: ClientOptions & LegacyOpenAIInput - ) { - const fieldsWithDefaults = { maxConcurrency: 2, ...fields }; - - super(fieldsWithDefaults); - - let apiKey = - fieldsWithDefaults?.openAIApiKey ?? - getEnvironmentVariable("OPENAI_API_KEY"); - - const azureApiKey = - fieldsWithDefaults?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY"); - if (!azureApiKey && !apiKey) { - throw new Error("OpenAI or Azure OpenAI API key not found"); - } - - const azureApiInstanceName = - fieldsWithDefaults?.azureOpenAIApiInstanceName ?? - getEnvironmentVariable("AZURE_OPENAI_API_INSTANCE_NAME"); - - const azureApiDeploymentName = - (fieldsWithDefaults?.azureOpenAIApiEmbeddingsDeploymentName || - fieldsWithDefaults?.azureOpenAIApiDeploymentName) ?? - (getEnvironmentVariable("AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME") || - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME")); - - const azureApiVersion = - fieldsWithDefaults?.azureOpenAIApiVersion ?? - getEnvironmentVariable("AZURE_OPENAI_API_VERSION"); - - this.azureOpenAIBasePath = - fieldsWithDefaults?.azureOpenAIBasePath ?? - getEnvironmentVariable("AZURE_OPENAI_BASE_PATH"); - - this.organization = - fieldsWithDefaults?.configuration?.organization ?? - getEnvironmentVariable("OPENAI_ORGANIZATION"); - - this.modelName = fieldsWithDefaults?.modelName ?? this.modelName; - this.batchSize = - fieldsWithDefaults?.batchSize ?? (azureApiKey ? 1 : this.batchSize); - this.stripNewLines = - fieldsWithDefaults?.stripNewLines ?? this.stripNewLines; - this.timeout = fieldsWithDefaults?.timeout; - - this.azureOpenAIApiVersion = azureApiVersion; - this.azureOpenAIApiKey = azureApiKey; - this.azureOpenAIApiInstanceName = azureApiInstanceName; - this.azureOpenAIApiDeploymentName = azureApiDeploymentName; - - if (this.azureOpenAIApiKey) { - if (!this.azureOpenAIApiInstanceName && !this.azureOpenAIBasePath) { - throw new Error("Azure OpenAI API instance name not found"); - } - if (!this.azureOpenAIApiDeploymentName) { - throw new Error("Azure OpenAI API deployment name not found"); - } - if (!this.azureOpenAIApiVersion) { - throw new Error("Azure OpenAI API version not found"); - } - apiKey = apiKey ?? ""; - } - - this.clientConfig = { - apiKey, - organization: this.organization, - baseURL: configuration?.basePath, - dangerouslyAllowBrowser: true, - defaultHeaders: configuration?.baseOptions?.headers, - defaultQuery: configuration?.baseOptions?.params, - ...configuration, - ...fields?.configuration, - }; - } - - /** - * Method to generate embeddings for an array of documents. Splits the - * documents into batches and makes requests to the OpenAI API to generate - * embeddings. - * @param texts Array of documents to generate embeddings for. - * @returns Promise that resolves to a 2D array of embeddings for each document. - */ - async embedDocuments(texts: string[]): Promise { - const batches = chunkArray( - this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, - this.batchSize - ); - - const batchRequests = batches.map((batch) => - this.embeddingWithRetry({ - model: this.modelName, - input: batch, - }) - ); - const batchResponses = await Promise.all(batchRequests); - - const embeddings: number[][] = []; - for (let i = 0; i < batchResponses.length; i += 1) { - const batch = batches[i]; - const { data: batchResponse } = batchResponses[i]; - for (let j = 0; j < batch.length; j += 1) { - embeddings.push(batchResponse[j].embedding); - } - } - return embeddings; - } - - /** - * Method to generate an embedding for a single document. Calls the - * embeddingWithRetry method with the document as the input. - * @param text Document to generate an embedding for. - * @returns Promise that resolves to an embedding for the document. - */ - async embedQuery(text: string): Promise { - const { data } = await this.embeddingWithRetry({ - model: this.modelName, - input: this.stripNewLines ? text.replace(/\n/g, " ") : text, - }); - return data[0].embedding; - } - - /** - * Private method to make a request to the OpenAI API to generate - * embeddings. Handles the retry logic and returns the response from the - * API. - * @param request Request to send to the OpenAI API. - * @returns Promise that resolves to the response from the API. - */ - private async embeddingWithRetry( - request: OpenAIClient.EmbeddingCreateParams - ) { - if (!this.client) { - const openAIEndpointConfig: OpenAIEndpointConfig = { - azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName, - azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName, - azureOpenAIApiKey: this.azureOpenAIApiKey, - azureOpenAIBasePath: this.azureOpenAIBasePath, - baseURL: this.clientConfig.baseURL, - }; - - const endpoint = getEndpoint(openAIEndpointConfig); - - const params = { - ...this.clientConfig, - baseURL: endpoint, - timeout: this.timeout, - maxRetries: 0, - }; - - if (!params.baseURL) { - delete params.baseURL; - } - - this.client = new OpenAIClient(params); - } - const requestOptions: OpenAICoreRequestOptions = {}; - if (this.azureOpenAIApiKey) { - requestOptions.headers = { - "api-key": this.azureOpenAIApiKey, - ...requestOptions.headers, - }; - requestOptions.query = { - "api-version": this.azureOpenAIApiVersion, - ...requestOptions.query, - }; - } - return this.caller.call(async () => { - try { - const res = await this.client.embeddings.create( - request, - requestOptions - ); - return res; - } catch (e) { - const error = wrapOpenAIClientError(e); - throw error; - } - }); - } -} +export { + type OpenAIEmbeddingsParams, + OpenAIEmbeddings, +} from "@langchain/openai"; diff --git a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts index 10e071a4c25e..cd12a3de87c3 100644 --- a/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts +++ b/langchain/src/experimental/hubs/makersuite/tests/googlemakersuitehub.test.ts @@ -51,11 +51,7 @@ describe("Google Maker Suite Hub", () => { const prompt = new MakerSuitePrompt(textFile); const model = prompt.toModel(); // console.log(model.lc_namespace); - expect(model.lc_namespace).toEqual([ - "langchain", - "llms", - "googlepalm", - ]); + expect(model.lc_namespace).toEqual(["langchain", "llms", "googlepalm"]); }); test("data type", () => { @@ -81,11 +77,7 @@ describe("Google Maker Suite Hub", () => { test("data model", () => { const prompt = new MakerSuitePrompt(dataFile); const model = prompt.toModel(); - expect(model.lc_namespace).toEqual([ - "langchain", - "llms", - "googlepalm", - ]); + expect(model.lc_namespace).toEqual(["langchain", "llms", "googlepalm"]); }); test("chat type", () => { diff --git a/langchain/src/experimental/openai_assistant/index.ts b/langchain/src/experimental/openai_assistant/index.ts index 2e39836f30f3..248ff9407977 100644 --- a/langchain/src/experimental/openai_assistant/index.ts +++ b/langchain/src/experimental/openai_assistant/index.ts @@ -1,4 +1,4 @@ -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; +import { type ClientOptions, OpenAIClient } from "@langchain/openai"; import { Runnable } from "../../schema/runnable/base.js"; import { sleep } from "../../util/time.js"; import type { RunnableConfig } from "../../schema/runnable/config.js"; diff --git a/langchain/src/experimental/openai_assistant/schema.ts b/langchain/src/experimental/openai_assistant/schema.ts index 10d4ce2658f4..a74ca1fce69f 100644 --- a/langchain/src/experimental/openai_assistant/schema.ts +++ b/langchain/src/experimental/openai_assistant/schema.ts @@ -1,4 +1,4 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import type { AgentFinish, AgentAction } from "../../schema/index.js"; export type OpenAIAssistantFinish = AgentFinish & { diff --git a/langchain/src/experimental/openai_assistant/tests/openai_assistant.int.test.ts b/langchain/src/experimental/openai_assistant/tests/openai_assistant.int.test.ts index d1dac7daed6a..27d86403fc7e 100644 --- a/langchain/src/experimental/openai_assistant/tests/openai_assistant.int.test.ts +++ b/langchain/src/experimental/openai_assistant/tests/openai_assistant.int.test.ts @@ -2,7 +2,7 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { z } from "zod"; -import { OpenAI as OpenAIClient } from "openai"; +import { OpenAIClient } from "@langchain/openai"; import { AgentExecutor } from "../../../agents/executor.js"; import { StructuredTool } from "../../../tools/base.js"; import { OpenAIAssistantRunnable } from "../index.js"; diff --git a/langchain/src/experimental/openai_files/index.ts b/langchain/src/experimental/openai_files/index.ts index 6522e906fd29..a4e9fe11229b 100644 --- a/langchain/src/experimental/openai_files/index.ts +++ b/langchain/src/experimental/openai_files/index.ts @@ -1,4 +1,4 @@ -import { OpenAI as OpenAIClient, type ClientOptions } from "openai"; +import { OpenAI as OpenAIClient, type ClientOptions } from "@langchain/openai"; import { Serializable } from "../../load/serializable.js"; export type OpenAIFilesInput = { diff --git a/langchain/src/llms/openai-chat.ts b/langchain/src/llms/openai-chat.ts index 4077a5efde89..06cb7faf0f1d 100644 --- a/langchain/src/llms/openai-chat.ts +++ b/langchain/src/llms/openai-chat.ts @@ -1,471 +1,18 @@ -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; +import { OpenAIChat } from "@langchain/openai"; + import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { Generation, GenerationChunk, LLMResult } from "../schema/index.js"; -import { - AzureOpenAIInput, - OpenAICallOptions, - OpenAIChatInput, - OpenAICoreRequestOptions, - LegacyOpenAIInput, -} from "../types/openai-types.js"; -import { OpenAIEndpointConfig, getEndpoint } from "../util/azure.js"; +import type { Generation, LLMResult } from "../schema/index.js"; import { getEnvironmentVariable } from "../util/env.js"; import { promptLayerTrackRequest } from "../util/prompt-layer.js"; -import { BaseLLMParams, LLM } from "./base.js"; -import { wrapOpenAIClientError } from "../util/openai.js"; - -export { type AzureOpenAIInput, type OpenAIChatInput }; -/** - * Interface that extends the OpenAICallOptions interface and includes an - * optional promptIndex property. It represents the options that can be - * passed when making a call to the OpenAI Chat API. - */ -export interface OpenAIChatCallOptions extends OpenAICallOptions { - promptIndex?: number; -} - -/** - * Wrapper around OpenAI large language models that use the Chat endpoint. - * - * To use you should have the `openai` package installed, with the - * `OPENAI_API_KEY` environment variable set. - * - * To use with Azure you should have the `openai` package installed, with the - * `AZURE_OPENAI_API_KEY`, - * `AZURE_OPENAI_API_INSTANCE_NAME`, - * `AZURE_OPENAI_API_DEPLOYMENT_NAME` - * and `AZURE_OPENAI_API_VERSION` environment variable set. - * - * @remarks - * Any parameters that are valid to be passed to {@link - * https://platform.openai.com/docs/api-reference/chat/create | - * `openai.createCompletion`} can be passed through {@link modelKwargs}, even - * if not explicitly available on this class. - * - * @augments BaseLLM - * @augments OpenAIInput - * @augments AzureOpenAIChatInput - * @example - * ```typescript - * const model = new OpenAIChat({ - * prefixMessages: [ - * { - * role: "system", - * content: "You are a helpful assistant that answers in pirate language", - * }, - * ], - * maxTokens: 50, - * }); - * - * const res = await model.call( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * ``` - */ -export class OpenAIChat - extends LLM - implements OpenAIChatInput, AzureOpenAIInput -{ - static lc_name() { - return "OpenAIChat"; - } - - get callKeys() { - return [...super.callKeys, "options", "promptIndex"]; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - openAIApiKey: "OPENAI_API_KEY", - azureOpenAIApiKey: "AZURE_OPENAI_API_KEY", - organization: "OPENAI_ORGANIZATION", - }; - } - - get lc_aliases(): Record { - return { - modelName: "model", - openAIApiKey: "openai_api_key", - azureOpenAIApiVersion: "azure_openai_api_version", - azureOpenAIApiKey: "azure_openai_api_key", - azureOpenAIApiInstanceName: "azure_openai_api_instance_name", - azureOpenAIApiDeploymentName: "azure_openai_api_deployment_name", - }; - } - - temperature = 1; - - topP = 1; - - frequencyPenalty = 0; - - presencePenalty = 0; - - n = 1; - - logitBias?: Record; - - maxTokens?: number; - - modelName = "gpt-3.5-turbo"; - - prefixMessages?: OpenAIClient.Chat.ChatCompletionMessageParam[]; - - modelKwargs?: OpenAIChatInput["modelKwargs"]; - - timeout?: number; - - stop?: string[]; - - user?: string; - - streaming = false; - - openAIApiKey?: string; - - azureOpenAIApiVersion?: string; - - azureOpenAIApiKey?: string; - - azureOpenAIApiInstanceName?: string; - - azureOpenAIApiDeploymentName?: string; - - azureOpenAIBasePath?: string; - - organization?: string; - - private client: OpenAIClient; - - private clientConfig: ClientOptions; - - constructor( - fields?: Partial & - Partial & - BaseLLMParams & { - configuration?: ClientOptions & LegacyOpenAIInput; - }, - /** @deprecated */ - configuration?: ClientOptions & LegacyOpenAIInput - ) { - super(fields ?? {}); - - this.openAIApiKey = - fields?.openAIApiKey ?? getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY"); - - if (!this.azureOpenAIApiKey && !this.openAIApiKey) { - throw new Error("OpenAI or Azure OpenAI API key not found"); - } - - this.azureOpenAIApiInstanceName = - fields?.azureOpenAIApiInstanceName ?? - getEnvironmentVariable("AZURE_OPENAI_API_INSTANCE_NAME"); - - this.azureOpenAIApiDeploymentName = - (fields?.azureOpenAIApiCompletionsDeploymentName || - fields?.azureOpenAIApiDeploymentName) ?? - (getEnvironmentVariable("AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME") || - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME")); - - this.azureOpenAIApiVersion = - fields?.azureOpenAIApiVersion ?? - getEnvironmentVariable("AZURE_OPENAI_API_VERSION"); - - this.azureOpenAIBasePath = - fields?.azureOpenAIBasePath ?? - getEnvironmentVariable("AZURE_OPENAI_BASE_PATH"); - this.organization = - fields?.configuration?.organization ?? - getEnvironmentVariable("OPENAI_ORGANIZATION"); +export { + type AzureOpenAIInput, + type OpenAICallOptions, + type OpenAIInput, + type OpenAIChatCallOptions, +} from "@langchain/openai"; - this.modelName = fields?.modelName ?? this.modelName; - this.prefixMessages = fields?.prefixMessages ?? this.prefixMessages; - this.modelKwargs = fields?.modelKwargs ?? {}; - this.timeout = fields?.timeout; - - this.temperature = fields?.temperature ?? this.temperature; - this.topP = fields?.topP ?? this.topP; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.n = fields?.n ?? this.n; - this.logitBias = fields?.logitBias; - this.maxTokens = fields?.maxTokens; - this.stop = fields?.stop; - this.user = fields?.user; - - this.streaming = fields?.streaming ?? false; - - if (this.n > 1) { - throw new Error( - "Cannot use n > 1 in OpenAIChat LLM. Use ChatOpenAI Chat Model instead." - ); - } - - if (this.azureOpenAIApiKey) { - if (!this.azureOpenAIApiInstanceName && !this.azureOpenAIBasePath) { - throw new Error("Azure OpenAI API instance name not found"); - } - if (!this.azureOpenAIApiDeploymentName) { - throw new Error("Azure OpenAI API deployment name not found"); - } - if (!this.azureOpenAIApiVersion) { - throw new Error("Azure OpenAI API version not found"); - } - this.openAIApiKey = this.openAIApiKey ?? ""; - } - - this.clientConfig = { - apiKey: this.openAIApiKey, - organization: this.organization, - baseURL: configuration?.basePath ?? fields?.configuration?.basePath, - dangerouslyAllowBrowser: true, - defaultHeaders: - configuration?.baseOptions?.headers ?? - fields?.configuration?.baseOptions?.headers, - defaultQuery: - configuration?.baseOptions?.params ?? - fields?.configuration?.baseOptions?.params, - ...configuration, - ...fields?.configuration, - }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams( - options?: this["ParsedCallOptions"] - ): Omit { - return { - model: this.modelName, - temperature: this.temperature, - top_p: this.topP, - frequency_penalty: this.frequencyPenalty, - presence_penalty: this.presencePenalty, - n: this.n, - logit_bias: this.logitBias, - max_tokens: this.maxTokens === -1 ? undefined : this.maxTokens, - stop: options?.stop ?? this.stop, - user: this.user, - stream: this.streaming, - ...this.modelKwargs, - }; - } - - /** @ignore */ - _identifyingParams(): Omit< - OpenAIClient.Chat.ChatCompletionCreateParams, - "messages" - > & { - model_name: string; - } & ClientOptions { - return { - model_name: this.modelName, - ...this.invocationParams(), - ...this.clientConfig, - }; - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams(): Omit< - OpenAIClient.Chat.ChatCompletionCreateParams, - "messages" - > & { - model_name: string; - } & ClientOptions { - return { - model_name: this.modelName, - ...this.invocationParams(), - ...this.clientConfig, - }; - } - - /** - * Formats the messages for the OpenAI API. - * @param prompt The prompt to be formatted. - * @returns Array of formatted messages. - */ - private formatMessages( - prompt: string - ): OpenAIClient.Chat.ChatCompletionMessageParam[] { - const message: OpenAIClient.Chat.ChatCompletionMessageParam = { - role: "user", - content: prompt, - }; - return this.prefixMessages ? [...this.prefixMessages, message] : [message]; - } - - async *_streamResponseChunks( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const params = { - ...this.invocationParams(options), - messages: this.formatMessages(prompt), - stream: true as const, - }; - const stream = await this.completionWithRetry(params, options); - for await (const data of stream) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - const { delta } = choice; - const generationChunk = new GenerationChunk({ - text: delta.content ?? "", - }); - yield generationChunk; - const newTokenIndices = { - prompt: options.promptIndex ?? 0, - completion: choice.index ?? 0, - }; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken( - generationChunk.text ?? "", - newTokenIndices - ); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - /** @ignore */ - async _call( - prompt: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const params = this.invocationParams(options); - - if (params.stream) { - const stream = await this._streamResponseChunks( - prompt, - options, - runManager - ); - let finalChunk: GenerationChunk | undefined; - for await (const chunk of stream) { - if (finalChunk === undefined) { - finalChunk = chunk; - } else { - finalChunk = finalChunk.concat(chunk); - } - } - return finalChunk?.text ?? ""; - } else { - const response = await this.completionWithRetry( - { - ...params, - stream: false, - messages: this.formatMessages(prompt), - }, - { - signal: options.signal, - ...options.options, - } - ); - return response?.choices[0]?.message?.content ?? ""; - } - } - - /** - * Calls the OpenAI API with retry logic in case of failures. - * @param request The request to send to the OpenAI API. - * @param options Optional configuration for the API call. - * @returns The response from the OpenAI API. - */ - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsStreaming, - options?: OpenAICoreRequestOptions - ): Promise>; - - async completionWithRetry( - request: OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise; - - async completionWithRetry( - request: - | OpenAIClient.Chat.ChatCompletionCreateParamsStreaming - | OpenAIClient.Chat.ChatCompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise< - | AsyncIterable - | OpenAIClient.Chat.Completions.ChatCompletion - > { - const requestOptions = this._getClientOptions(options); - return this.caller.call(async () => { - try { - const res = await this.client.chat.completions.create( - request, - requestOptions - ); - return res; - } catch (e) { - const error = wrapOpenAIClientError(e); - throw error; - } - }); - } - - /** @ignore */ - private _getClientOptions(options: OpenAICoreRequestOptions | undefined) { - if (!this.client) { - const openAIEndpointConfig: OpenAIEndpointConfig = { - azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName, - azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName, - azureOpenAIApiKey: this.azureOpenAIApiKey, - azureOpenAIBasePath: this.azureOpenAIBasePath, - baseURL: this.clientConfig.baseURL, - }; - - const endpoint = getEndpoint(openAIEndpointConfig); - - const params = { - ...this.clientConfig, - baseURL: endpoint, - timeout: this.timeout, - maxRetries: 0, - }; - if (!params.baseURL) { - delete params.baseURL; - } - - this.client = new OpenAIClient(params); - } - const requestOptions = { - ...this.clientConfig, - ...options, - } as OpenAICoreRequestOptions; - if (this.azureOpenAIApiKey) { - requestOptions.headers = { - "api-key": this.azureOpenAIApiKey, - ...requestOptions.headers, - }; - requestOptions.query = { - "api-version": this.azureOpenAIApiVersion, - ...requestOptions.query, - }; - } - return requestOptions; - } - - _llmType() { - return "openai"; - } -} +export { OpenAIChat }; /** * PromptLayer wrapper to OpenAIChat diff --git a/langchain/src/llms/openai.ts b/langchain/src/llms/openai.ts index c962f30c096f..edfb429272b5 100644 --- a/langchain/src/llms/openai.ts +++ b/langchain/src/llms/openai.ts @@ -1,558 +1,17 @@ -import type { TiktokenModel } from "js-tiktoken/lite"; -import { type ClientOptions, OpenAI as OpenAIClient } from "openai"; -import { calculateMaxTokens } from "../base_language/count_tokens.js"; +import { OpenAI } from "@langchain/openai"; + import { CallbackManagerForLLMRun } from "../callbacks/manager.js"; -import { GenerationChunk, LLMResult } from "../schema/index.js"; -import { - AzureOpenAIInput, - OpenAICallOptions, - OpenAICoreRequestOptions, - OpenAIInput, - LegacyOpenAIInput, -} from "../types/openai-types.js"; -import { OpenAIEndpointConfig, getEndpoint } from "../util/azure.js"; -import { chunkArray } from "../util/chunk.js"; +import type { LLMResult } from "../schema/index.js"; import { getEnvironmentVariable } from "../util/env.js"; import { promptLayerTrackRequest } from "../util/prompt-layer.js"; -import { BaseLLM, BaseLLMParams } from "./base.js"; -import { OpenAIChat } from "./openai-chat.js"; -import { wrapOpenAIClientError } from "../util/openai.js"; - -export type { AzureOpenAIInput, OpenAICallOptions, OpenAIInput }; - -/** - * Interface for tracking token usage in OpenAI calls. - */ -interface TokenUsage { - completionTokens?: number; - promptTokens?: number; - totalTokens?: number; -} - -/** - * Wrapper around OpenAI large language models. - * - * To use you should have the `openai` package installed, with the - * `OPENAI_API_KEY` environment variable set. - * - * To use with Azure you should have the `openai` package installed, with the - * `AZURE_OPENAI_API_KEY`, - * `AZURE_OPENAI_API_INSTANCE_NAME`, - * `AZURE_OPENAI_API_DEPLOYMENT_NAME` - * and `AZURE_OPENAI_API_VERSION` environment variable set. - * - * @remarks - * Any parameters that are valid to be passed to {@link - * https://platform.openai.com/docs/api-reference/completions/create | - * `openai.createCompletion`} can be passed through {@link modelKwargs}, even - * if not explicitly available on this class. - * @example - * ```typescript - * const model = new OpenAI({ - * modelName: "gpt-4", - * temperature: 0.7, - * maxTokens: 1000, - * maxRetries: 5, - * }); - * - * const res = await model.call( - * "Question: What would be a good company name for a company that makes colorful socks?\nAnswer:" - * ); - * console.log({ res }); - * ``` - */ -export class OpenAI - extends BaseLLM - implements OpenAIInput, AzureOpenAIInput -{ - static lc_name() { - return "OpenAI"; - } - - get callKeys() { - return [...super.callKeys, "options"]; - } - - lc_serializable = true; - - get lc_secrets(): { [key: string]: string } | undefined { - return { - openAIApiKey: "OPENAI_API_KEY", - azureOpenAIApiKey: "AZURE_OPENAI_API_KEY", - organization: "OPENAI_ORGANIZATION", - }; - } - - get lc_aliases(): Record { - return { - modelName: "model", - openAIApiKey: "openai_api_key", - azureOpenAIApiVersion: "azure_openai_api_version", - azureOpenAIApiKey: "azure_openai_api_key", - azureOpenAIApiInstanceName: "azure_openai_api_instance_name", - azureOpenAIApiDeploymentName: "azure_openai_api_deployment_name", - }; - } - - temperature = 0.7; - - maxTokens = 256; - - topP = 1; - - frequencyPenalty = 0; - - presencePenalty = 0; - - n = 1; - - bestOf?: number; - - logitBias?: Record; - - modelName = "gpt-3.5-turbo-instruct"; - - modelKwargs?: OpenAIInput["modelKwargs"]; - - batchSize = 20; - - timeout?: number; - stop?: string[]; +export { + type AzureOpenAIInput, + type OpenAICallOptions, + type OpenAIInput, +} from "@langchain/openai"; - user?: string; - - streaming = false; - - openAIApiKey?: string; - - azureOpenAIApiVersion?: string; - - azureOpenAIApiKey?: string; - - azureOpenAIApiInstanceName?: string; - - azureOpenAIApiDeploymentName?: string; - - azureOpenAIBasePath?: string; - - organization?: string; - - private client: OpenAIClient; - - private clientConfig: ClientOptions; - - constructor( - fields?: Partial & - Partial & - BaseLLMParams & { - configuration?: ClientOptions & LegacyOpenAIInput; - }, - /** @deprecated */ - configuration?: ClientOptions & LegacyOpenAIInput - ) { - if ( - (fields?.modelName?.startsWith("gpt-3.5-turbo") || - fields?.modelName?.startsWith("gpt-4")) && - !fields?.modelName?.includes("-instruct") - ) { - // eslint-disable-next-line no-constructor-return - return new OpenAIChat( - fields, - configuration - ) as unknown as OpenAI; - } - super(fields ?? {}); - - this.openAIApiKey = - fields?.openAIApiKey ?? getEnvironmentVariable("OPENAI_API_KEY"); - - this.azureOpenAIApiKey = - fields?.azureOpenAIApiKey ?? - getEnvironmentVariable("AZURE_OPENAI_API_KEY"); - - if (!this.azureOpenAIApiKey && !this.openAIApiKey) { - throw new Error("OpenAI or Azure OpenAI API key not found"); - } - - this.azureOpenAIApiInstanceName = - fields?.azureOpenAIApiInstanceName ?? - getEnvironmentVariable("AZURE_OPENAI_API_INSTANCE_NAME"); - - this.azureOpenAIApiDeploymentName = - (fields?.azureOpenAIApiCompletionsDeploymentName || - fields?.azureOpenAIApiDeploymentName) ?? - (getEnvironmentVariable("AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME") || - getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME")); - - this.azureOpenAIApiVersion = - fields?.azureOpenAIApiVersion ?? - getEnvironmentVariable("AZURE_OPENAI_API_VERSION"); - - this.azureOpenAIBasePath = - fields?.azureOpenAIBasePath ?? - getEnvironmentVariable("AZURE_OPENAI_BASE_PATH"); - - this.organization = - fields?.configuration?.organization ?? - getEnvironmentVariable("OPENAI_ORGANIZATION"); - - this.modelName = fields?.modelName ?? this.modelName; - this.modelKwargs = fields?.modelKwargs ?? {}; - this.batchSize = fields?.batchSize ?? this.batchSize; - this.timeout = fields?.timeout; - - this.temperature = fields?.temperature ?? this.temperature; - this.maxTokens = fields?.maxTokens ?? this.maxTokens; - this.topP = fields?.topP ?? this.topP; - this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty; - this.presencePenalty = fields?.presencePenalty ?? this.presencePenalty; - this.n = fields?.n ?? this.n; - this.bestOf = fields?.bestOf ?? this.bestOf; - this.logitBias = fields?.logitBias; - this.stop = fields?.stop; - this.user = fields?.user; - - this.streaming = fields?.streaming ?? false; - - if (this.streaming && this.bestOf && this.bestOf > 1) { - throw new Error("Cannot stream results when bestOf > 1"); - } - - if (this.azureOpenAIApiKey) { - if (!this.azureOpenAIApiInstanceName && !this.azureOpenAIBasePath) { - throw new Error("Azure OpenAI API instance name not found"); - } - if (!this.azureOpenAIApiDeploymentName) { - throw new Error("Azure OpenAI API deployment name not found"); - } - if (!this.azureOpenAIApiVersion) { - throw new Error("Azure OpenAI API version not found"); - } - this.openAIApiKey = this.openAIApiKey ?? ""; - } - - this.clientConfig = { - apiKey: this.openAIApiKey, - organization: this.organization, - baseURL: configuration?.basePath ?? fields?.configuration?.basePath, - dangerouslyAllowBrowser: true, - defaultHeaders: - configuration?.baseOptions?.headers ?? - fields?.configuration?.baseOptions?.headers, - defaultQuery: - configuration?.baseOptions?.params ?? - fields?.configuration?.baseOptions?.params, - ...configuration, - ...fields?.configuration, - }; - } - - /** - * Get the parameters used to invoke the model - */ - invocationParams( - options?: this["ParsedCallOptions"] - ): Omit { - return { - model: this.modelName, - temperature: this.temperature, - max_tokens: this.maxTokens, - top_p: this.topP, - frequency_penalty: this.frequencyPenalty, - presence_penalty: this.presencePenalty, - n: this.n, - best_of: this.bestOf, - logit_bias: this.logitBias, - stop: options?.stop ?? this.stop, - user: this.user, - stream: this.streaming, - ...this.modelKwargs, - }; - } - - /** @ignore */ - _identifyingParams(): Omit & { - model_name: string; - } & ClientOptions { - return { - model_name: this.modelName, - ...this.invocationParams(), - ...this.clientConfig, - }; - } - - /** - * Get the identifying parameters for the model - */ - identifyingParams(): Omit & { - model_name: string; - } & ClientOptions { - return this._identifyingParams(); - } - - /** - * Call out to OpenAI's endpoint with k unique prompts - * - * @param [prompts] - The prompts to pass into the model. - * @param [options] - Optional list of stop words to use when generating. - * @param [runManager] - Optional callback manager to use when generating. - * - * @returns The full LLM output. - * - * @example - * ```ts - * import { OpenAI } from "langchain/llms/openai"; - * const openai = new OpenAI(); - * const response = await openai.generate(["Tell me a joke."]); - * ``` - */ - async _generate( - prompts: string[], - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): Promise { - const subPrompts = chunkArray(prompts, this.batchSize); - const choices: OpenAIClient.CompletionChoice[] = []; - const tokenUsage: TokenUsage = {}; - - const params = this.invocationParams(options); - - if (params.max_tokens === -1) { - if (prompts.length !== 1) { - throw new Error( - "max_tokens set to -1 not supported for multiple inputs" - ); - } - params.max_tokens = await calculateMaxTokens({ - prompt: prompts[0], - // Cast here to allow for other models that may not fit the union - modelName: this.modelName as TiktokenModel, - }); - } - - for (let i = 0; i < subPrompts.length; i += 1) { - const data = params.stream - ? await (async () => { - const choices: OpenAIClient.CompletionChoice[] = []; - let response: Omit | undefined; - const stream = await this.completionWithRetry( - { - ...params, - stream: true, - prompt: subPrompts[i], - }, - options - ); - for await (const message of stream) { - // on the first message set the response properties - if (!response) { - response = { - id: message.id, - object: message.object, - created: message.created, - model: message.model, - }; - } - - // on all messages, update choice - for (const part of message.choices) { - if (!choices[part.index]) { - choices[part.index] = part; - } else { - const choice = choices[part.index]; - choice.text += part.text; - choice.finish_reason = part.finish_reason; - choice.logprobs = part.logprobs; - } - void runManager?.handleLLMNewToken(part.text, { - prompt: Math.floor(part.index / this.n), - completion: part.index % this.n, - }); - } - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - return { ...response, choices }; - })() - : await this.completionWithRetry( - { - ...params, - stream: false, - prompt: subPrompts[i], - }, - { - signal: options.signal, - ...options.options, - } - ); - - choices.push(...data.choices); - const { - completion_tokens: completionTokens, - prompt_tokens: promptTokens, - total_tokens: totalTokens, - } = data.usage - ? data.usage - : { - completion_tokens: undefined, - prompt_tokens: undefined, - total_tokens: undefined, - }; - - if (completionTokens) { - tokenUsage.completionTokens = - (tokenUsage.completionTokens ?? 0) + completionTokens; - } - - if (promptTokens) { - tokenUsage.promptTokens = (tokenUsage.promptTokens ?? 0) + promptTokens; - } - - if (totalTokens) { - tokenUsage.totalTokens = (tokenUsage.totalTokens ?? 0) + totalTokens; - } - } - - const generations = chunkArray(choices, this.n).map((promptChoices) => - promptChoices.map((choice) => ({ - text: choice.text ?? "", - generationInfo: { - finishReason: choice.finish_reason, - logprobs: choice.logprobs, - }, - })) - ); - return { - generations, - llmOutput: { tokenUsage }, - }; - } - - // TODO(jacoblee): Refactor with _generate(..., {stream: true}) implementation? - async *_streamResponseChunks( - input: string, - options: this["ParsedCallOptions"], - runManager?: CallbackManagerForLLMRun - ): AsyncGenerator { - const params = { - ...this.invocationParams(options), - prompt: input, - stream: true as const, - }; - const stream = await this.completionWithRetry(params, options); - for await (const data of stream) { - const choice = data?.choices[0]; - if (!choice) { - continue; - } - const chunk = new GenerationChunk({ - text: choice.text, - generationInfo: { - finishReason: choice.finish_reason, - }, - }); - yield chunk; - // eslint-disable-next-line no-void - void runManager?.handleLLMNewToken(chunk.text ?? ""); - } - if (options.signal?.aborted) { - throw new Error("AbortError"); - } - } - - /** - * Calls the OpenAI API with retry logic in case of failures. - * @param request The request to send to the OpenAI API. - * @param options Optional configuration for the API call. - * @returns The response from the OpenAI API. - */ - async completionWithRetry( - request: OpenAIClient.CompletionCreateParamsStreaming, - options?: OpenAICoreRequestOptions - ): Promise>; - - async completionWithRetry( - request: OpenAIClient.CompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise; - - async completionWithRetry( - request: - | OpenAIClient.CompletionCreateParamsStreaming - | OpenAIClient.CompletionCreateParamsNonStreaming, - options?: OpenAICoreRequestOptions - ): Promise< - AsyncIterable | OpenAIClient.Completions.Completion - > { - const requestOptions = this._getClientOptions(options); - return this.caller.call(async () => { - try { - const res = await this.client.completions.create( - request, - requestOptions - ); - return res; - } catch (e) { - const error = wrapOpenAIClientError(e); - throw error; - } - }); - } - - /** - * Calls the OpenAI API with retry logic in case of failures. - * @param request The request to send to the OpenAI API. - * @param options Optional configuration for the API call. - * @returns The response from the OpenAI API. - */ - private _getClientOptions(options: OpenAICoreRequestOptions | undefined) { - if (!this.client) { - const openAIEndpointConfig: OpenAIEndpointConfig = { - azureOpenAIApiDeploymentName: this.azureOpenAIApiDeploymentName, - azureOpenAIApiInstanceName: this.azureOpenAIApiInstanceName, - azureOpenAIApiKey: this.azureOpenAIApiKey, - azureOpenAIBasePath: this.azureOpenAIBasePath, - baseURL: this.clientConfig.baseURL, - }; - - const endpoint = getEndpoint(openAIEndpointConfig); - - const params = { - ...this.clientConfig, - baseURL: endpoint, - timeout: this.timeout, - maxRetries: 0, - }; - - if (!params.baseURL) { - delete params.baseURL; - } - - this.client = new OpenAIClient(params); - } - const requestOptions = { - ...this.clientConfig, - ...options, - } as OpenAICoreRequestOptions; - if (this.azureOpenAIApiKey) { - requestOptions.headers = { - "api-key": this.azureOpenAIApiKey, - ...requestOptions.headers, - }; - requestOptions.query = { - "api-version": this.azureOpenAIApiVersion, - ...requestOptions.query, - }; - } - return requestOptions; - } - - _llmType() { - return "openai"; - } -} +export { OpenAI }; /** * PromptLayer wrapper to OpenAI diff --git a/langchain/src/schema/index.ts b/langchain/src/schema/index.ts index 784a99b42770..1fa111c7e7fa 100644 --- a/langchain/src/schema/index.ts +++ b/langchain/src/schema/index.ts @@ -1,4 +1,4 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { BaseMessage, HumanMessage, diff --git a/langchain/src/tools/convert_to_openai.ts b/langchain/src/tools/convert_to_openai.ts index 0c599a14e3b7..b5a875324ec8 100644 --- a/langchain/src/tools/convert_to_openai.ts +++ b/langchain/src/tools/convert_to_openai.ts @@ -1,5 +1,5 @@ import { zodToJsonSchema } from "zod-to-json-schema"; -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { StructuredTool } from "./base.js"; diff --git a/langchain/src/types/openai-types.ts b/langchain/src/types/openai-types.ts index f3df0278a6a9..a3acaf1904d0 100644 --- a/langchain/src/types/openai-types.ts +++ b/langchain/src/types/openai-types.ts @@ -1,4 +1,4 @@ -import type { OpenAI as OpenAIClient } from "openai"; +import type { OpenAIClient } from "@langchain/openai"; import { TiktokenModel } from "js-tiktoken/lite"; import { BaseLanguageModelCallOptions } from "../base_language/index.js"; diff --git a/langchain/src/util/openai-format-fndef.ts b/langchain/src/util/openai-format-fndef.ts index 3fe476b90554..652b9ba22e7c 100644 --- a/langchain/src/util/openai-format-fndef.ts +++ b/langchain/src/util/openai-format-fndef.ts @@ -5,9 +5,9 @@ * (c) 2023 Harry Marr * MIT license */ -import OpenAI from "openai"; +import type { OpenAIClient } from "@langchain/openai"; -type OpenAIFunction = OpenAI.Chat.ChatCompletionCreateParams.Function; +type OpenAIFunction = OpenAIClient.Chat.ChatCompletionCreateParams.Function; // Types representing the OpenAI function definitions. While the OpenAI client library // does have types for function definitions, the properties are just Record, diff --git a/langchain/src/util/openai.ts b/langchain/src/util/openai.ts deleted file mode 100644 index db7fe66f032e..000000000000 --- a/langchain/src/util/openai.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { APIConnectionTimeoutError, APIUserAbortError } from "openai"; - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export function wrapOpenAIClientError(e: any) { - let error; - if (e.constructor.name === APIConnectionTimeoutError.name) { - error = new Error(e.message); - error.name = "TimeoutError"; - } else if (e.constructor.name === APIUserAbortError.name) { - error = new Error(e.message); - error.name = "AbortError"; - } else { - error = e; - } - return error; -} diff --git a/langchain/src/vectorstores/memory.ts b/langchain/src/vectorstores/memory.ts index 917d18c6b806..5cf932eecac8 100644 --- a/langchain/src/vectorstores/memory.ts +++ b/langchain/src/vectorstores/memory.ts @@ -1 +1 @@ -export * from "@langchain/community/vectorstores/memory"; \ No newline at end of file +export * from "@langchain/community/vectorstores/memory"; diff --git a/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts index d472ae18136d..5ade9b6dd8bf 100644 --- a/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/analyticdb.int.test.ts @@ -2,9 +2,9 @@ /* eslint-disable import/no-extraneous-dependencies */ import { test } from "@jest/globals"; -import { AnalyticDBVectorStore } from "../analyticdb.js"; -import { Document } from "@langchain/core/documents"; import { OpenAIEmbeddings } from "@langchain/openai"; +import { Document } from "@langchain/core/documents"; +import { AnalyticDBVectorStore } from "../analyticdb.js"; const connectionOptions = { host: process.env.ANALYTICDB_HOST || "localhost", diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts index a47d55a4dce4..156bb06805c6 100644 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts @@ -1,8 +1,8 @@ // eslint-disable-next-line import/no-extraneous-dependencies import { v } from "convex/values"; -import { OpenAIEmbeddings } from "@langchain/openai"; import { ConvexVectorStore } from "../../../convex.js"; import { action, mutation } from "./_generated/server.js"; +import { FakeEmbeddings } from "../../../../utils/testing.js"; export const reset = mutation({ args: {}, @@ -18,11 +18,11 @@ export const ingest = action({ texts: v.array(v.string()), metadatas: v.array(v.any()), }, - handler: async (ctx, { openAIApiKey, texts, metadatas }) => { + handler: async (ctx, { texts, metadatas }) => { await ConvexVectorStore.fromTexts( texts, metadatas, - new OpenAIEmbeddings({ openAIApiKey }), + new FakeEmbeddings({}), { ctx } ); }, @@ -33,9 +33,9 @@ export const similaritySearch = action({ openAIApiKey: v.string(), query: v.string(), }, - handler: async (ctx, { openAIApiKey, query }) => { + handler: async (ctx, { query }) => { const vectorStore = new ConvexVectorStore( - new OpenAIEmbeddings({ openAIApiKey }), + new FakeEmbeddings({}), { ctx } ); diff --git a/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts b/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts index 7004c193069e..db93d4be707a 100644 --- a/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/googlevertexai.int.test.ts @@ -2,6 +2,7 @@ /* eslint-disable @typescript-eslint/no-non-null-assertion */ import { beforeAll, expect, test } from "@jest/globals"; import { Document } from "@langchain/core/documents"; +import { Embeddings } from "@langchain/core/embeddings"; import { SyntheticEmbeddings } from "../../utils/testing.js"; import { InMemoryDocstore } from "../../stores/doc/in_memory.js"; import { @@ -10,7 +11,6 @@ import { IdDocument, Restriction, } from "../googlevertexai.js"; -import { Embeddings } from "@langchain/core/embeddings"; describe("Vertex AI matching", () => { let embeddings: Embeddings; diff --git a/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts index c27ae96c70f1..3a8181767f55 100755 --- a/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts @@ -4,11 +4,12 @@ import { test, expect } from "@jest/globals"; import { MongoClient } from "mongodb"; import { setTimeout } from "timers/promises"; -import { MongoDBAtlasVectorSearch } from "../mongodb_atlas.js"; - import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; +import { MongoDBAtlasVectorSearch } from "../mongodb_atlas.js"; + + /** * The following json can be used to create an index in atlas for Cohere embeddings. * Use "langchain.test" for the namespace and "default" for the index name. diff --git a/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts index 4315ab47a215..d50ea8cf5926 100644 --- a/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/myscale.int.test.ts @@ -1,10 +1,11 @@ /* eslint-disable no-process-env */ import { test, expect } from "@jest/globals"; -import { MyScaleStore } from "../myscale.js"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; +import { MyScaleStore } from "../myscale.js"; + test.skip("MyScaleStore.fromText", async () => { const vectorStore = await MyScaleStore.fromTexts( ["Hello world", "Bye bye", "hello nice world"], diff --git a/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts index 572f61b83c7b..c7040c029c76 100644 --- a/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/xata.int.test.ts @@ -1,11 +1,11 @@ /* eslint-disable no-process-env */ // eslint-disable-next-line import/no-extraneous-dependencies import { BaseClient } from "@xata.io/client"; - -import { XataVectorSearch } from "../xata.js"; import { OpenAIEmbeddings } from "@langchain/openai"; import { Document } from "@langchain/core/documents"; +import { XataVectorSearch } from "../xata.js"; + // Tests require a DB with a table called "docs" with: // * a column name content of type Text // * a column named embedding of type Vector diff --git a/yarn.lock b/yarn.lock index ee5eca7a6868..a6a3c54dd491 100644 --- a/yarn.lock +++ b/yarn.lock @@ -23017,8 +23017,6 @@ __metadata: resolution: "langchain@workspace:langchain" dependencies: "@anthropic-ai/sdk": ^0.9.1 - "@aws-sdk/client-dynamodb": ^3.310.0 - "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 "@aws-sdk/client-s3": ^3.310.0 "@aws-sdk/client-sagemaker-runtime": ^3.414.0 @@ -23042,6 +23040,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/community": ~0.0.0 "@langchain/core": ~0.0.11-rc.1 + "@langchain/openai": ~0.0.1 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 "@opensearch-project/opensearch": ^2.2.0 @@ -23136,7 +23135,6 @@ __metadata: node-llama-cpp: 2.7.3 notion-to-md: ^3.1.0 officeparser: ^4.0.4 - openai: ^4.19.0 openapi-types: ^12.1.3 p-retry: 4 pdf-parse: 1.1.1 @@ -23173,8 +23171,6 @@ __metadata: zod: ^3.22.3 zod-to-json-schema: 3.20.3 peerDependencies: - "@aws-sdk/client-dynamodb": ^3.310.0 - "@aws-sdk/client-kendra": ^3.352.0 "@aws-sdk/client-lambda": ^3.310.0 "@aws-sdk/client-s3": ^3.310.0 "@aws-sdk/client-sagemaker-runtime": ^3.310.0 @@ -23273,10 +23269,6 @@ __metadata: youtube-transcript: ^1.0.6 youtubei.js: ^5.8.0 peerDependenciesMeta: - "@aws-sdk/client-dynamodb": - optional: true - "@aws-sdk/client-kendra": - optional: true "@aws-sdk/client-lambda": optional: true "@aws-sdk/client-s3": From 5c27febff075cdda9c29e9313a1e777faa39bc51 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 16:56:02 -0800 Subject: [PATCH 18/22] Remove unused file --- langchain/src/util/openai-format-fndef.ts | 135 ---------------------- 1 file changed, 135 deletions(-) delete mode 100644 langchain/src/util/openai-format-fndef.ts diff --git a/langchain/src/util/openai-format-fndef.ts b/langchain/src/util/openai-format-fndef.ts deleted file mode 100644 index 652b9ba22e7c..000000000000 --- a/langchain/src/util/openai-format-fndef.ts +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Formatting function definitions for calculating openai function defination token usage. - * - * https://github.com/hmarr/openai-chat-tokens/blob/main/src/functions.ts - * (c) 2023 Harry Marr - * MIT license - */ -import type { OpenAIClient } from "@langchain/openai"; - -type OpenAIFunction = OpenAIClient.Chat.ChatCompletionCreateParams.Function; - -// Types representing the OpenAI function definitions. While the OpenAI client library -// does have types for function definitions, the properties are just Record, -// which isn't very useful for type checking this formatting code. -export interface FunctionDef extends Omit { - name: string; - description?: string; - parameters: ObjectProp; -} - -interface ObjectProp { - type: "object"; - properties?: { - [key: string]: Prop; - }; - required?: string[]; -} - -interface AnyOfProp { - anyOf: Prop[]; -} - -type Prop = { - description?: string; -} & ( - | AnyOfProp - | ObjectProp - | { - type: "string"; - enum?: string[]; - } - | { - type: "number" | "integer"; - minimum?: number; - maximum?: number; - enum?: number[]; - } - | { type: "boolean" } - | { type: "null" } - | { - type: "array"; - items?: Prop; - } -); - -function isAnyOfProp(prop: Prop): prop is AnyOfProp { - return ( - (prop as AnyOfProp).anyOf !== undefined && - Array.isArray((prop as AnyOfProp).anyOf) - ); -} - -// When OpenAI use functions in the prompt, they format them as TypeScript definitions rather than OpenAPI JSON schemas. -// This function converts the JSON schemas into TypeScript definitions. -export function formatFunctionDefinitions(functions: FunctionDef[]) { - const lines = ["namespace functions {", ""]; - for (const f of functions) { - if (f.description) { - lines.push(`// ${f.description}`); - } - if (Object.keys(f.parameters.properties ?? {}).length > 0) { - lines.push(`type ${f.name} = (_: {`); - lines.push(formatObjectProperties(f.parameters, 0)); - lines.push("}) => any;"); - } else { - lines.push(`type ${f.name} = () => any;`); - } - lines.push(""); - } - lines.push("} // namespace functions"); - return lines.join("\n"); -} - -// Format just the properties of an object (not including the surrounding braces) -function formatObjectProperties(obj: ObjectProp, indent: number): string { - const lines: string[] = []; - for (const [name, param] of Object.entries(obj.properties ?? {})) { - if (param.description && indent < 2) { - lines.push(`// ${param.description}`); - } - if (obj.required?.includes(name)) { - lines.push(`${name}: ${formatType(param, indent)},`); - } else { - lines.push(`${name}?: ${formatType(param, indent)},`); - } - } - return lines.map((line) => " ".repeat(indent) + line).join("\n"); -} - -// Format a single property type -function formatType(param: Prop, indent: number): string { - if (isAnyOfProp(param)) { - return param.anyOf.map((v) => formatType(v, indent)).join(" | "); - } - switch (param.type) { - case "string": - if (param.enum) { - return param.enum.map((v) => `"${v}"`).join(" | "); - } - return "string"; - case "number": - if (param.enum) { - return param.enum.map((v) => `${v}`).join(" | "); - } - return "number"; - case "integer": - if (param.enum) { - return param.enum.map((v) => `${v}`).join(" | "); - } - return "number"; - case "boolean": - return "boolean"; - case "null": - return "null"; - case "object": - return ["{", formatObjectProperties(param, indent + 2), "}"].join("\n"); - case "array": - if (param.items) { - return `${formatType(param.items, indent)}[]`; - } - return "any[]"; - default: - return ""; - } -} From 974d86f26474dbe1323d38149570e27a6e1ce1d6 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 16:57:06 -0800 Subject: [PATCH 19/22] Format --- .../src/vectorstores/tests/convex/convex/lib.ts | 5 +---- .../src/vectorstores/tests/mongodb_atlas.int.test.ts | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts index 156bb06805c6..95175610faf8 100644 --- a/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts +++ b/libs/langchain-community/src/vectorstores/tests/convex/convex/lib.ts @@ -34,10 +34,7 @@ export const similaritySearch = action({ query: v.string(), }, handler: async (ctx, { query }) => { - const vectorStore = new ConvexVectorStore( - new FakeEmbeddings({}), - { ctx } - ); + const vectorStore = new ConvexVectorStore(new FakeEmbeddings({}), { ctx }); const result = await vectorStore.similaritySearch(query, 3); return result.map(({ metadata }) => metadata); diff --git a/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts index 3a8181767f55..6f4a9f0730b7 100755 --- a/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/mongodb_atlas.int.test.ts @@ -9,7 +9,6 @@ import { Document } from "@langchain/core/documents"; import { MongoDBAtlasVectorSearch } from "../mongodb_atlas.js"; - /** * The following json can be used to create an index in atlas for Cohere embeddings. * Use "langchain.test" for the namespace and "default" for the index name. From 5a3d0be8c99735f65c856493749c2eb6112b87ec Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 17:08:54 -0800 Subject: [PATCH 20/22] Fix build --- langchain/src/experimental/openai_files/index.ts | 2 +- langchain/src/load/import_type.d.ts | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/langchain/src/experimental/openai_files/index.ts b/langchain/src/experimental/openai_files/index.ts index a4e9fe11229b..62f35dadf438 100644 --- a/langchain/src/experimental/openai_files/index.ts +++ b/langchain/src/experimental/openai_files/index.ts @@ -1,4 +1,4 @@ -import { OpenAI as OpenAIClient, type ClientOptions } from "@langchain/openai"; +import { OpenAIClient, type ClientOptions } from "@langchain/openai"; import { Serializable } from "../../load/serializable.js"; export type OpenAIFilesInput = { diff --git a/langchain/src/load/import_type.d.ts b/langchain/src/load/import_type.d.ts index a02354d086b2..5add518536fb 100644 --- a/langchain/src/load/import_type.d.ts +++ b/langchain/src/load/import_type.d.ts @@ -523,9 +523,7 @@ export interface SecretMap { ANTHROPIC_API_KEY?: string; AWS_ACCESS_KEY_ID?: string; AWS_SECRET_ACCESS_KEY?: string; - AZURE_OPENAI_API_KEY?: string; OPENAI_API_KEY?: string; - OPENAI_ORGANIZATION?: string; PROMPTLAYER_API_KEY?: string; REMOTE_RETRIEVER_AUTH_BEARER?: string; ZAPIER_NLA_API_KEY?: string; From a2c45f1b2e4f9b8b1e521fee288f592761eaeb99 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 18:19:59 -0800 Subject: [PATCH 21/22] Sync core --- langchain/package.json | 2 +- libs/langchain-community/package.json | 2 +- libs/langchain-openai/package.json | 5 ++--- yarn.lock | 8 ++++---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/langchain/package.json b/langchain/package.json index 6dd2c60090d2..b858507fa64a 100644 --- a/langchain/package.json +++ b/langchain/package.json @@ -1415,7 +1415,7 @@ "@anthropic-ai/sdk": "^0.9.1", "@langchain/community": "~0.0.0", "@langchain/core": "~0.0.11-rc.1", - "@langchain/openai": "~0.0.1", + "@langchain/openai": "~0.0.2-rc.0", "binary-extensions": "^2.2.0", "expr-eval": "^2.0.2", "js-tiktoken": "^1.0.7", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 37c14ac54263..e159d7b5949e 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -34,7 +34,7 @@ "license": "MIT", "dependencies": { "@langchain/core": "~0.0.11-rc.1", - "@langchain/openai": "~0.0.1", + "@langchain/openai": "~0.0.2-rc.0", "flat": "^5.0.2", "langsmith": "~0.0.48", "ml-distance": "^4.0.0", diff --git a/libs/langchain-openai/package.json b/libs/langchain-openai/package.json index 6e5aea4a189c..49e5f04823f3 100644 --- a/libs/langchain-openai/package.json +++ b/libs/langchain-openai/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/openai", - "version": "0.0.1", + "version": "0.0.2-rc.0", "description": "OpenAI integrations for LangChain.js", "type": "module", "engines": { @@ -34,14 +34,13 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/core": "~0.0.1", + "@langchain/core": "~0.0.11-rc.1", "js-tiktoken": "^1.0.7", "openai": "^4.19.0", "zod-to-json-schema": "3.20.3" }, "devDependencies": { "@jest/globals": "^29.5.0", - "@langchain/core": "workspace:*", "@swc/core": "^1.3.90", "@swc/jest": "^0.2.29", "dpdm": "^3.12.0", diff --git a/yarn.lock b/yarn.lock index a6a3c54dd491..4796b182f1fd 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8024,7 +8024,7 @@ __metadata: "@huggingface/inference": ^2.6.4 "@jest/globals": ^29.5.0 "@langchain/core": ~0.0.11-rc.1 - "@langchain/openai": ~0.0.1 + "@langchain/openai": ~0.0.2-rc.0 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 "@opensearch-project/opensearch": ^2.2.0 @@ -8424,12 +8424,12 @@ __metadata: languageName: unknown linkType: soft -"@langchain/openai@workspace:libs/langchain-openai, @langchain/openai@~0.0.1": +"@langchain/openai@workspace:libs/langchain-openai, @langchain/openai@~0.0.2-rc.0": version: 0.0.0-use.local resolution: "@langchain/openai@workspace:libs/langchain-openai" dependencies: "@jest/globals": ^29.5.0 - "@langchain/core": "workspace:*" + "@langchain/core": ~0.0.11-rc.1 "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 dpdm: ^3.12.0 @@ -23040,7 +23040,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/community": ~0.0.0 "@langchain/core": ~0.0.11-rc.1 - "@langchain/openai": ~0.0.1 + "@langchain/openai": ~0.0.2-rc.0 "@mozilla/readability": ^0.4.4 "@notionhq/client": ^2.2.10 "@opensearch-project/opensearch": ^2.2.0 From e01ae33ca861d4090ad3a6568989c0ee1e038e58 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Thu, 7 Dec 2023 18:25:53 -0800 Subject: [PATCH 22/22] Fix build command --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index c34923c8ed19..f8dce25ba422 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,7 @@ "packageManager": "yarn@3.4.1", "scripts": { "build": "turbo run build --filter=\"!test-exports-*\" --concurrency 1", - "build:deps": "yarn workspace @langchain/core build && yarn workspace @langchain/community build && yarn workspace @langchain/anthropic build && yarn workspace @langchain/openai build", + "build:deps": "yarn workspace @langchain/core build && yarn workspace @langchain/anthropic build && yarn workspace @langchain/openai build && yarn workspace @langchain/community build", "format": "turbo run format", "format:check": "turbo run format:check", "lint": "turbo run lint --concurrency 1",