From 72aa0189e1425c561f677dbd3875c43040f39c75 Mon Sep 17 00:00:00 2001 From: Tsukasa OISHI Date: Tue, 5 Dec 2023 07:54:23 +0900 Subject: [PATCH] core[patch]: Reducing heap area consumption regardless of the number of prompts (#3519) * Remove unused option * Cache the Tiktoken object * Fix format * Bump core version * Upgrade to js-tiktoken@1.0.8 --------- Co-authored-by: jacoblee93 Co-authored-by: Tat Dat Duong --- langchain-core/package.json | 4 ++-- langchain-core/src/utils/tiktoken.ts | 28 +++++++--------------------- yarn.lock | 11 ++++++++++- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/langchain-core/package.json b/langchain-core/package.json index 1927a865c10c..a11c036e3a4d 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.0.7", + "version": "0.0.8", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { @@ -37,7 +37,7 @@ "ansi-styles": "^5.0.0", "camelcase": "6", "decamelize": "1.2.0", - "js-tiktoken": "^1.0.7", + "js-tiktoken": "^1.0.8", "langsmith": "^0.0.48", "p-queue": "^6.6.2", "p-retry": "4", diff --git a/langchain-core/src/utils/tiktoken.ts b/langchain-core/src/utils/tiktoken.ts index a823b5b18637..be930b41eb9d 100644 --- a/langchain-core/src/utils/tiktoken.ts +++ b/langchain-core/src/utils/tiktoken.ts @@ -1,44 +1,30 @@ import { Tiktoken, - TiktokenBPE, TiktokenEncoding, TiktokenModel, getEncodingNameForModel, } from "js-tiktoken/lite"; import { AsyncCaller } from "./async_caller.js"; -const cache: Record> = {}; +const cache: Record> = {}; const caller = /* #__PURE__ */ new AsyncCaller({}); -export async function getEncoding( - encoding: TiktokenEncoding, - options?: { - signal?: AbortSignal; - extendedSpecialTokens?: Record; - } -) { +export async function getEncoding(encoding: TiktokenEncoding) { if (!(encoding in cache)) { cache[encoding] = caller - .fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, { - signal: options?.signal, - }) + .fetch(`https://tiktoken.pages.dev/js/${encoding}.json`) .then((res) => res.json()) + .then((data) => new Tiktoken(data)) .catch((e) => { delete cache[encoding]; throw e; }); } - return new Tiktoken(await cache[encoding], options?.extendedSpecialTokens); + return await cache[encoding]; } -export async function encodingForModel( - model: TiktokenModel, - options?: { - signal?: AbortSignal; - extendedSpecialTokens?: Record; - } -) { - return getEncoding(getEncodingNameForModel(model), options); +export async function encodingForModel(model: TiktokenModel) { + return getEncoding(getEncodingNameForModel(model)); } diff --git a/yarn.lock b/yarn.lock index ae44b3e678f9..d1f3163cabf2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8038,7 +8038,7 @@ __metadata: eslint-plugin-prettier: ^4.2.1 jest: ^29.5.0 jest-environment-node: ^29.6.4 - js-tiktoken: ^1.0.7 + js-tiktoken: ^1.0.8 langsmith: ^0.0.48 p-queue: ^6.6.2 p-retry: 4 @@ -22255,6 +22255,15 @@ __metadata: languageName: node linkType: hard +"js-tiktoken@npm:^1.0.8": + version: 1.0.8 + resolution: "js-tiktoken@npm:1.0.8" + dependencies: + base64-js: ^1.5.1 + checksum: ac6e666f14661b4e744bd60987e35275668863a686413bb35baca2d9a503bc95fcfa907f3c02b8e8abe4a4a70abe622c06cbb72ce3574b3b929d22f9146c3f85 + languageName: node + linkType: hard + "js-tokens@npm:^3.0.0 || ^4.0.0, js-tokens@npm:^4.0.0": version: 4.0.0 resolution: "js-tokens@npm:4.0.0"