Skip to content

Commit

Permalink
core[patch]: Reducing heap area consumption regardless of the number …
Browse files Browse the repository at this point in the history
…of prompts (#3519)

* Remove unused option

* Cache the Tiktoken object

* Fix format

* Bump core version

* Upgrade to [email protected]

---------

Co-authored-by: jacoblee93 <[email protected]>
Co-authored-by: Tat Dat Duong <[email protected]>
  • Loading branch information
3 people authored Dec 4, 2023
1 parent e0c23e3 commit 72aa018
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 24 deletions.
4 changes: 2 additions & 2 deletions langchain-core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@langchain/core",
"version": "0.0.7",
"version": "0.0.8",
"description": "Core LangChain.js abstractions and schemas",
"type": "module",
"engines": {
Expand Down Expand Up @@ -37,7 +37,7 @@
"ansi-styles": "^5.0.0",
"camelcase": "6",
"decamelize": "1.2.0",
"js-tiktoken": "^1.0.7",
"js-tiktoken": "^1.0.8",
"langsmith": "^0.0.48",
"p-queue": "^6.6.2",
"p-retry": "4",
Expand Down
28 changes: 7 additions & 21 deletions langchain-core/src/utils/tiktoken.ts
Original file line number Diff line number Diff line change
@@ -1,44 +1,30 @@
import {
Tiktoken,
TiktokenBPE,
TiktokenEncoding,
TiktokenModel,
getEncodingNameForModel,
} from "js-tiktoken/lite";
import { AsyncCaller } from "./async_caller.js";

const cache: Record<string, Promise<TiktokenBPE>> = {};
const cache: Record<string, Promise<Tiktoken>> = {};

const caller = /* #__PURE__ */ new AsyncCaller({});

export async function getEncoding(
encoding: TiktokenEncoding,
options?: {
signal?: AbortSignal;
extendedSpecialTokens?: Record<string, number>;
}
) {
export async function getEncoding(encoding: TiktokenEncoding) {
if (!(encoding in cache)) {
cache[encoding] = caller
.fetch(`https://tiktoken.pages.dev/js/${encoding}.json`, {
signal: options?.signal,
})
.fetch(`https://tiktoken.pages.dev/js/${encoding}.json`)
.then((res) => res.json())
.then((data) => new Tiktoken(data))
.catch((e) => {
delete cache[encoding];
throw e;
});
}

return new Tiktoken(await cache[encoding], options?.extendedSpecialTokens);
return await cache[encoding];
}

export async function encodingForModel(
model: TiktokenModel,
options?: {
signal?: AbortSignal;
extendedSpecialTokens?: Record<string, number>;
}
) {
return getEncoding(getEncodingNameForModel(model), options);
export async function encodingForModel(model: TiktokenModel) {
return getEncoding(getEncodingNameForModel(model));
}
11 changes: 10 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8038,7 +8038,7 @@ __metadata:
eslint-plugin-prettier: ^4.2.1
jest: ^29.5.0
jest-environment-node: ^29.6.4
js-tiktoken: ^1.0.7
js-tiktoken: ^1.0.8
langsmith: ^0.0.48
p-queue: ^6.6.2
p-retry: 4
Expand Down Expand Up @@ -22255,6 +22255,15 @@ __metadata:
languageName: node
linkType: hard

"js-tiktoken@npm:^1.0.8":
version: 1.0.8
resolution: "js-tiktoken@npm:1.0.8"
dependencies:
base64-js: ^1.5.1
checksum: ac6e666f14661b4e744bd60987e35275668863a686413bb35baca2d9a503bc95fcfa907f3c02b8e8abe4a4a70abe622c06cbb72ce3574b3b929d22f9146c3f85
languageName: node
linkType: hard

"js-tokens@npm:^3.0.0 || ^4.0.0, js-tokens@npm:^4.0.0":
version: 4.0.0
resolution: "js-tokens@npm:4.0.0"
Expand Down

2 comments on commit 72aa018

@vercel
Copy link

@vercel vercel bot commented on 72aa018 Dec 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vercel
Copy link

@vercel vercel bot commented on 72aa018 Dec 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

langchainjs-docs – ./docs/core_docs/

langchainjs-docs-ruddy.vercel.app
js.langchain.com
langchainjs-docs-git-main-langchain.vercel.app
langchainjs-docs-langchain.vercel.app

Please sign in to comment.