diff --git a/.gitignore b/.gitignore index 4cd5ca18..ff19abfa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +scripts/ibm_vllm_generate_protos/dist +scripts/ibm_vllm_generate_protos/dts +scripts/ibm_vllm_generate_protos/types + +infra/bee-code-interpreter/* +!infra/bee-code-interpreter/bee-code-interpreter.yaml + ### Node template # Logs logs diff --git a/eslint.config.js b/eslint.config.js index 628930f4..de19607a 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -8,11 +8,15 @@ import markdown from "@eslint/markdown"; export default tseslint.config( { - ignores: ["node_modules/**", "dist/**"], + ignores: ["node_modules/**", "dist/**", "scripts/ibm_vllm_generate_protos/**"], }, eslint.configs.recommended, ...tseslint.configs.strict, ...tseslint.configs.stylistic, + { + files: ["src/adapters/ibm-vllm/types.ts"], + rules: { "@typescript-eslint/unified-signatures": "off" }, + }, { files: ["**/*.md/**"], languageOptions: { diff --git a/examples/llms/providers/ibm-vllm.ts b/examples/llms/providers/ibm-vllm.ts new file mode 100644 index 00000000..a995a93a --- /dev/null +++ b/examples/llms/providers/ibm-vllm.ts @@ -0,0 +1,36 @@ +import "dotenv/config.js"; +import { IBMvLLM } from "bee-agent-framework/adapters/ibm-vllm/llm"; +import { IBMVllmChatLLM } from "bee-agent-framework/adapters/ibm-vllm/chat"; +import { BaseMessage } from "bee-agent-framework/llms/primitives/message"; +import { Client } from "bee-agent-framework/adapters/ibm-vllm/client"; + +const client = new Client(); +{ + console.info("===RAW==="); + const llm = new IBMvLLM({ + client, + modelId: "meta-llama/llama-3-1-70b-instruct", + }); + + console.info("Meta", await llm.meta()); + + const response = await llm.generate("Hello world!", { + stream: false, + }); + console.info(response.text); +} + +{ + console.info("===CHAT==="); + const llm = IBMVllmChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", { client }); + + console.info("Meta", await llm.meta()); + + const response = await llm.generate([ + BaseMessage.of({ + role: "user", + text: "Hello world!", + }), + ]); + console.info(response.messages); +} diff --git a/infra/bee-code-interpreter/k8s/.gitignore b/infra/bee-code-interpreter/k8s/.gitignore deleted file mode 100644 index 97e18588..00000000 --- a/infra/bee-code-interpreter/k8s/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!.gitignore -!bee-code-interpreter.yaml \ No newline at end of file diff --git a/package.json b/package.json index de5f70c0..678586aa 100644 --- a/package.json +++ b/package.json @@ -78,6 +78,7 @@ "prepare": "husky", "copyright": "./scripts/copyright.sh", "release": "release-it", + "ibm-vllm:generate-types": "./scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh", "_ensure_env": "cp -n .env.template .env || true", "_docker": "yarn _ensure_env && bash -c 'source ./.env && docker_cmd=$(which docker >/dev/null 2>&1 && printf docker || printf podman) && $docker_cmd \"$@\"' sh" }, @@ -115,6 +116,8 @@ }, "peerDependencies": { "@googleapis/customsearch": "^3.2.0", + "@grpc/grpc-js": "^1.11.3", + "@grpc/proto-loader": "^0.7.13", "@ibm-generative-ai/node-sdk": "~3.2.3", "@langchain/community": "~0.2.28", "@langchain/core": "~0.2.27", @@ -131,6 +134,8 @@ "@eslint/js": "^9.9.0", "@eslint/markdown": "^6.0.0", "@googleapis/customsearch": "^3.2.0", + "@grpc/grpc-js": "^1.11.3", + "@grpc/proto-loader": "^0.7.13", "@ibm-generative-ai/node-sdk": "~3.2.3", "@langchain/community": "~0.2.28", "@langchain/core": "~0.2.27", @@ -172,7 +177,7 @@ "strip-ansi": "^7.1.0", "temp-dir": "^3.0.0", "tsc-files": "^1.1.4", - "tsup": "^8.2.4", + "tsup": "^8.3.0", "tsx": "^4.19.0", "typescript": "^5.5.4", "typescript-eslint": "^8.2.0", diff --git a/scripts/copyright.sh b/scripts/copyright.sh index 4188b6d9..2461b506 100755 --- a/scripts/copyright.sh +++ b/scripts/copyright.sh @@ -44,7 +44,7 @@ fi if command -v nwa &> /dev/null; then nwa add -l apache -c "$AUTHOR" src dist tests scripts elif command -v docker &> /dev/null; then - docker run -it -v "${PWD}:/src" ghcr.io/b1nary-gr0up/nwa:main add -l apache -c "$AUTHOR" src dist tests scripts + docker run --rm -v "${PWD}:/src" ghcr.io/b1nary-gr0up/nwa:main add -l apache -c "$AUTHOR" src dist tests scripts else echo "Error: 'nwa' is not available. Either install it manually or install go/docker." exit 1 diff --git a/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh b/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh new file mode 100755 index 00000000..1b2e7589 --- /dev/null +++ b/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2024 IBM Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +GRPC_PROTO_PATH="./src/adapters/ibm-vllm/proto" +GRPC_TYPES_PATH="./src/adapters/ibm-vllm/types.ts" + +SCRIPT_DIR="$(dirname "$0")" +OUTPUT_RELATIVE_PATH="dist/generation.d.ts" +GRPC_TYPES_TMP_PATH=types + +rm -f "$GRPC_TYPES_PATH" + +rm -rf "${SCRIPT_DIR}"/{dist,dts,types} + + +yarn run proto-loader-gen-types \ + --defaults \ + --keepCase \ + --oneofs \ + --longs=Number \ + --enums=String \ + --grpcLib=@grpc/grpc-js \ + --"outDir=${SCRIPT_DIR}/${GRPC_TYPES_TMP_PATH}" \ + "${GRPC_PROTO_PATH}"/*.proto + + +cd "$SCRIPT_DIR" + tsup --dts-only + sed -i.bak '$ d' "$OUTPUT_RELATIVE_PATH" + sed -i.bak -E "s/^interface/export interface/" "$OUTPUT_RELATIVE_PATH" + sed -i.bak -E "s/^type/export type/" "$OUTPUT_RELATIVE_PATH" +cd - + +mv "$SCRIPT_DIR/$OUTPUT_RELATIVE_PATH" "$GRPC_TYPES_PATH" +rm -rf "${SCRIPT_DIR}"/{dist,dts,types} + +yarn run lint:fix "${GRPC_TYPES_PATH}" +yarn prettier --write "${GRPC_TYPES_PATH}" +yarn copyright diff --git a/scripts/ibm_vllm_generate_protos/package.json b/scripts/ibm_vllm_generate_protos/package.json new file mode 100644 index 00000000..d25b1a7b --- /dev/null +++ b/scripts/ibm_vllm_generate_protos/package.json @@ -0,0 +1,6 @@ +{ + "name": "ibm-vllm-proto-types", + "type": "module", + "version": "1.0.0", + "typings": "./types/generation.d.ts" +} diff --git a/scripts/ibm_vllm_generate_protos/tsconfig.proto.json b/scripts/ibm_vllm_generate_protos/tsconfig.proto.json new file mode 100644 index 00000000..3f3a9b39 --- /dev/null +++ b/scripts/ibm_vllm_generate_protos/tsconfig.proto.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "moduleResolution": "node", + "rootDir": ".", + "baseUrl": ".", + "target": "ESNext", + "module": "ES6", + "outDir": "dist", + "declaration": true, + "emitDeclarationOnly": true, + "skipLibCheck": true, + "sourceMap": false + } +} diff --git a/scripts/ibm_vllm_generate_protos/tsup.config.ts b/scripts/ibm_vllm_generate_protos/tsup.config.ts new file mode 100644 index 00000000..3cf25ebf --- /dev/null +++ b/scripts/ibm_vllm_generate_protos/tsup.config.ts @@ -0,0 +1,32 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["types/generation.ts"], + tsconfig: "./tsconfig.proto.json", + sourcemap: false, + dts: true, + format: ["esm"], + treeshake: false, + legacyOutput: false, + skipNodeModulesBundle: true, + bundle: true, + splitting: false, + silent: false, + clean: true, +}); diff --git a/src/adapters/ibm-vllm/chat.ts b/src/adapters/ibm-vllm/chat.ts new file mode 100644 index 00000000..57677dd5 --- /dev/null +++ b/src/adapters/ibm-vllm/chat.ts @@ -0,0 +1,187 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { isFunction, isObjectType } from "remeda"; + +import { IBMvLLM, IBMvLLMGenerateOptions, IBMvLLMOutput, IBMvLLMParameters } from "./llm.js"; + +import { Cache } from "@/cache/decoratorCache.js"; +import { BaseMessage, Role } from "@/llms/primitives/message.js"; +import { Emitter } from "@/emitter/emitter.js"; +import { ChatLLM, ChatLLMOutput } from "@/llms/chat.js"; +import { + AsyncStream, + BaseLLMTokenizeOutput, + GenerateCallbacks, + LLMError, + LLMMeta, +} from "@/llms/base.js"; +import { transformAsyncIterable } from "@/internals/helpers/stream.js"; +import { shallowCopy } from "@/serializer/utils.js"; +import { IBMVllmChatLLMPreset, IBMVllmChatLLMPresetModel } from "@/adapters/ibm-vllm/chatPreset.js"; +import { Client } from "./client.js"; +import { GetRunContext } from "@/context.js"; + +export class GrpcChatLLMOutput extends ChatLLMOutput { + public readonly raw: IBMvLLMOutput; + + constructor(rawOutput: IBMvLLMOutput) { + super(); + this.raw = rawOutput; + } + + @Cache() + get messages(): BaseMessage[] { + const text = this.raw.getTextContent(); + return [ + BaseMessage.of({ + role: Role.ASSISTANT, + text, + meta: this.raw.meta, + }), + ]; + } + + merge(other: GrpcChatLLMOutput): void { + Cache.getInstance(this, "messages").clear(); + this.raw.merge(other.raw); + } + + getTextContent(): string { + const [message] = this.messages; + return message.text; + } + + toString(): string { + return this.getTextContent(); + } + + createSnapshot() { + return { + raw: shallowCopy(this.raw), + }; + } + + loadSnapshot(snapshot: ReturnType) { + Object.assign(this, snapshot); + } +} + +export interface IBMVllmInputConfig { + messagesToPrompt: (messages: BaseMessage[]) => string; +} + +export interface GrpcChatLLMInput { + llm: IBMvLLM; + config: IBMVllmInputConfig; +} + +export class IBMVllmChatLLM extends ChatLLM { + public readonly emitter = new Emitter({ + namespace: ["ibm_vllm", "chat_llm"], + creator: this, + }); + + public readonly llm: IBMvLLM; + protected readonly config: IBMVllmInputConfig; + + constructor({ llm, config }: GrpcChatLLMInput) { + super(llm.modelId, llm.executionOptions); + this.llm = llm; + this.config = config; + } + + static { + this.register(); + } + + async meta(): Promise { + return this.llm.meta(); + } + + createSnapshot() { + return { + ...super.createSnapshot(), + modelId: this.modelId, + executionOptions: this.executionOptions, + llm: this.llm, + config: shallowCopy(this.config), + }; + } + + async tokenize(messages: BaseMessage[]): Promise { + const prompt = this.messagesToPrompt(messages); + return this.llm.tokenize(prompt); + } + + protected async _generate( + messages: BaseMessage[], + options: IBMvLLMGenerateOptions | undefined, + run: GetRunContext, + ): Promise { + const prompt = this.messagesToPrompt(messages); + // @ts-expect-error protected property + const rawResponse = await this.llm._generate(prompt, options, run); + return new GrpcChatLLMOutput(rawResponse); + } + + protected async *_stream( + messages: BaseMessage[], + options?: IBMvLLMGenerateOptions, + ): AsyncStream { + const prompt = this.messagesToPrompt(messages); + // @ts-expect-error protected property + const response = this.llm._stream(prompt, options); + return yield* transformAsyncIterable(response, (output) => new GrpcChatLLMOutput(output)); + } + + messagesToPrompt(messages: BaseMessage[]) { + return this.config.messagesToPrompt(messages); + } + + static fromPreset( + modelId: IBMVllmChatLLMPresetModel, + overrides?: { + client?: Client; + parameters?: IBMvLLMParameters | ((value: IBMvLLMParameters) => IBMvLLMParameters); + }, + ) { + const presetFactory = IBMVllmChatLLMPreset[modelId]; + if (!presetFactory) { + throw new LLMError(`Model "${modelId}" does not exist in preset.`); + } + + const preset = presetFactory(); + let parameters = preset.base.parameters ?? {}; + if (overrides) { + if (isFunction(overrides.parameters)) { + parameters = overrides.parameters(parameters); + } else if (isObjectType(overrides.parameters)) { + parameters = overrides.parameters; + } + } + + return new IBMVllmChatLLM({ + config: preset.chat, + llm: new IBMvLLM({ + ...preset.base, + ...overrides, + parameters, + modelId, + }), + }); + } +} diff --git a/src/adapters/ibm-vllm/chatPreset.ts b/src/adapters/ibm-vllm/chatPreset.ts new file mode 100644 index 00000000..03d874f8 --- /dev/null +++ b/src/adapters/ibm-vllm/chatPreset.ts @@ -0,0 +1,150 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { LLMChatTemplate, LLMChatTemplates } from "@/adapters/shared/llmChatTemplates.js"; +import { z } from "zod"; + +import { IBMVllmInputConfig } from "./chat.js"; +import { IBMvLLMInput } from "./llm.js"; + +import { PromptTemplate } from "@/template.js"; + +interface IBMVllmChatLLMPreset { + chat: IBMVllmInputConfig; + base: IBMvLLMInput; +} + +export const IBMVllmModel = { + LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8", + LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct", + QWEN2_72B_INSTRUCT: "qwen/qwen2-72b-instruct", + GRANITE_INSTRUCT: "ibm/granite-instruct", // Generic model ID is used for ease of development, ground it once stable +} as const; +export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel]; + +export const IBMVllmChatLLMPreset = { + [IBMVllmModel.LLAMA_3_1_405B_INSTRUCT_FP8]: (): IBMVllmChatLLMPreset => { + const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1"); + return { + base: { + modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT, + parameters: { + method: "GREEDY", + stopping: { + stop_sequences: [...parameters.stop_sequence], + include_stop_sequence: false, + max_new_tokens: 2048, + }, + decoding: { + repetition_penalty: 1, + }, + }, + }, + chat: { + messagesToPrompt: messagesToPrompt(template), + }, + }; + }, + [IBMVllmModel.LLAMA_3_1_70B_INSTRUCT]: (): IBMVllmChatLLMPreset => { + const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1"); + return { + base: { + modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT, + parameters: { + method: "GREEDY", + stopping: { + stop_sequences: [...parameters.stop_sequence], + include_stop_sequence: false, + max_new_tokens: 2048, + }, + decoding: { + repetition_penalty: 1, + }, + }, + }, + chat: { + messagesToPrompt: messagesToPrompt(template), + }, + }; + }, + [IBMVllmModel.QWEN2_72B_INSTRUCT]: (): IBMVllmChatLLMPreset => { + const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2"); + return { + base: { + modelId: IBMVllmModel.QWEN2_72B_INSTRUCT, + parameters: { + method: "GREEDY", + stopping: { + stop_sequences: [...parameters.stop_sequence], + include_stop_sequence: false, + max_new_tokens: 1024, + }, + }, + }, + chat: { + messagesToPrompt: messagesToPrompt(template), + }, + }; + }, + [IBMVllmModel.GRANITE_INSTRUCT]: (): IBMVllmChatLLMPreset => { + const llama31config = LLMChatTemplates.get("llama3.1"); + const { template, parameters, messagesToPrompt } = { + template: new PromptTemplate({ + schema: z.object({ + messages: z.array( + z.object({ + system: z.array(z.string()), + user: z.array(z.string()), + assistant: z.array(z.string()), + ipython: z.array(z.string()), + }), + ), + }), + template: `{{#messages}}{{#system}}<|start_of_role|>system<|end_of_role|> + + {{system}}<|end_of_text|>{{/system}}{{#user}}<|start_of_role|>user<|end_of_role|> + + {{user}}<|end_of_text|>{{/user}}{{#assistant}}<|start_of_role|>assistant<|end_of_role|> + + {{assistant}}<|end_of_text|>{{/assistant}}{{#ipython}}<|start_of_role|>ipython<|end_of_role|> + + {{ipython}}<|end_of_text|>{{/ipython}}{{/messages}}<|start_of_role|>assistant<|end_of_role|> + `, + }), + messagesToPrompt: llama31config.messagesToPrompt, + parameters: { + stop_sequence: ["<|end_of_text|>"], + }, + } satisfies LLMChatTemplate; + return { + base: { + modelId: IBMVllmModel.GRANITE_INSTRUCT, + parameters: { + method: "GREEDY", + stopping: { + stop_sequences: [...parameters.stop_sequence], + include_stop_sequence: false, + }, + }, + }, + chat: { + messagesToPrompt: messagesToPrompt(template), + }, + }; + }, +} as const satisfies { [key in IBMVllmModel]: () => IBMVllmChatLLMPreset }; + +export type IBMVllmChatLLMPresetModel = keyof typeof IBMVllmChatLLMPreset; diff --git a/src/adapters/ibm-vllm/client.ts b/src/adapters/ibm-vllm/client.ts new file mode 100644 index 00000000..be4b8746 --- /dev/null +++ b/src/adapters/ibm-vllm/client.ts @@ -0,0 +1,216 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import grpc, { + CallOptions as GRPCCallOptions, + ClientOptions as GRPCClientOptions, + ClientReadableStream, + ClientUnaryCall, +} from "@grpc/grpc-js"; + +import * as R from "remeda"; +// eslint-disable-next-line no-restricted-imports +import { UnaryCallback } from "@grpc/grpc-js/build/src/client.js"; +import { FrameworkError, ValueError } from "@/errors.js"; +import protoLoader from "@grpc/proto-loader"; + +import { + BatchedGenerationRequest, + BatchedGenerationResponse__Output, + BatchedTokenizeRequest, + BatchedTokenizeResponse__Output, + GenerationRequest__Output, + ModelInfoRequest, + ModelInfoResponse__Output, + ProtoGrpcType as GenerationProtoGentypes, + SingleGenerationRequest, +} from "@/adapters/ibm-vllm/types.js"; +import { parseEnv } from "@/internals/env.js"; +import { z } from "zod"; +import { Cache } from "@/cache/decoratorCache.js"; +import { Serializable } from "@/internals/serializable.js"; + +const GENERATION_PROTO_PATH = new URL("./proto/generation.proto", import.meta.url); + +interface ClientOptions { + modelRouterSubdomain?: string; + url: string; + credentials: { + rootCert: string; + certChain: string; + privateKey: string; + }; + grpcClientOptions: GRPCClientOptions; + clientShutdownDelay: number; +} + +const defaultOptions = { + clientShutdownDelay: 5 * 60 * 1000, + grpcClientOptions: { + // This is needed, otherwise communication to DIPC cluster fails with "Dropped connection" error after +- 50 secs + "grpc.keepalive_time_ms": 25000, + "grpc.max_receive_message_length": 32 * 1024 * 1024, // 32MiB + }, +}; + +const generationPackageObject = grpc.loadPackageDefinition( + protoLoader.loadSync([GENERATION_PROTO_PATH.pathname], { + longs: Number, + enums: String, + arrays: true, + objects: true, + oneofs: true, + keepCase: true, + defaults: true, + }), +) as unknown as GenerationProtoGentypes; + +const GRPC_CLIENT_TTL = 15 * 60 * 1000; + +type CallOptions = GRPCCallOptions & { signal?: AbortSignal }; +type RequiredModel = T & { model_id: string }; + +export class Client extends Serializable { + public readonly options: ClientOptions; + private usedDefaultCredentials = false; + + @Cache({ ttl: GRPC_CLIENT_TTL }) + protected getClient(modelId: string) { + const modelSpecificUrl = this.options.url.replace(/{model_id}/, modelId.replaceAll("/", "--")); + const client = new generationPackageObject.fmaas.GenerationService( + modelSpecificUrl, + grpc.credentials.createSsl( + Buffer.from(this.options.credentials.rootCert), + Buffer.from(this.options.credentials.privateKey), + Buffer.from(this.options.credentials.certChain), + ), + this.options.grpcClientOptions, + ); + setTimeout(() => { + try { + client.close(); + } catch { + /* empty */ + } + }, GRPC_CLIENT_TTL + this.options.clientShutdownDelay).unref(); + return client; + } + + protected getDefaultCredentials() { + this.usedDefaultCredentials = true; + return { + rootCert: parseEnv("IBM_VLLM_ROOT_CERT", z.string()), + privateKey: parseEnv("IBM_VLLM_PRIVATE_KEY", z.string()), + certChain: parseEnv("IBM_VLLM_CERT_CHAIN", z.string()), + }; + } + + constructor(options?: Partial) { + super(); + this.options = { + ...defaultOptions, + ...options, + url: options?.url ?? parseEnv("IBM_VLLM_URL", z.string()), + credentials: options?.credentials ?? this.getDefaultCredentials(), + }; + } + + async modelInfo(request: RequiredModel, options?: CallOptions) { + const client = this.getClient(request.model_id); + return this.wrapGrpcCall( + client.modelInfo.bind(client), + )(request, options); + } + + async generate(request: RequiredModel, options?: CallOptions) { + const client = this.getClient(request.model_id); + return this.wrapGrpcCall( + client.generate.bind(client), + )(request, options); + } + + async generateStream(request: RequiredModel, options?: CallOptions) { + const client = this.getClient(request.model_id); + return this.wrapGrpcStream( + client.generateStream.bind(client), + )(request, options); + } + + async tokenize(request: RequiredModel, options?: CallOptions) { + const client = this.getClient(request.model_id); + return this.wrapGrpcCall( + client.tokenize.bind(client), + )(request, options); + } + + protected wrapGrpcCall( + fn: ( + request: TRequest, + options: CallOptions, + callback: UnaryCallback, + ) => ClientUnaryCall, + ) { + return (request: TRequest, { signal, ...options }: CallOptions = {}): Promise => { + return new Promise((resolve, reject) => { + const call = fn(request, options, (err, response) => { + signal?.removeEventListener("abort", abortHandler); + if (err) { + reject(err); + } else { + if (response === undefined) { + reject(new FrameworkError("Invalid response from GRPC server")); + } else { + resolve(response); + } + } + }); + const abortHandler = () => call.cancel(); + signal?.addEventListener("abort", abortHandler, { once: true }); + }); + }; + } + + protected wrapGrpcStream( + fn: (request: TRequest, options: CallOptions) => ClientReadableStream, + ) { + return async ( + request: TRequest, + { signal, ...options }: CallOptions = {}, + ): Promise> => { + const stream = fn(request, options); + const abortHandler = () => stream.cancel(); + signal?.addEventListener("abort", abortHandler, { once: true }); + stream.addListener("close", () => signal?.removeEventListener("abort", abortHandler)); + return stream; + }; + } + + createSnapshot() { + if (!this.usedDefaultCredentials) { + throw new ValueError( + "Cannot serialize a client with credentials passed directly. Use environment variables.", + ); + } + return { + options: R.omit(this.options, ["credentials"]), + }; + } + + loadSnapshot(snapshot: ReturnType) { + Object.assign(this, snapshot); + this.options.credentials = this.getDefaultCredentials(); + } +} diff --git a/src/adapters/ibm-vllm/llm.ts b/src/adapters/ibm-vllm/llm.ts new file mode 100644 index 00000000..9f3c5485 --- /dev/null +++ b/src/adapters/ibm-vllm/llm.ts @@ -0,0 +1,248 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + AsyncStream, + BaseLLMOutput, + BaseLLMTokenizeOutput, + ExecutionOptions, + GenerateCallbacks, + GenerateOptions, + LLMError, + LLMMeta, +} from "@/llms/base.js"; +import { isEmpty, isString } from "remeda"; +import type { SingleGenerationRequest } from "@/adapters/ibm-vllm/types.js"; +import { LLM, LLMInput } from "@/llms/llm.js"; +import { Emitter } from "@/emitter/emitter.js"; +import { GenerationResponse__Output } from "@/adapters/ibm-vllm/types.js"; +import { shallowCopy } from "@/serializer/utils.js"; +import { FrameworkError, NotImplementedError } from "@/errors.js"; +import { assign } from "@/internals/helpers/object.js"; +import { ServiceError } from "@grpc/grpc-js"; +import { Client } from "@/adapters/ibm-vllm/client.js"; +import { GetRunContext } from "@/context.js"; +import { BatchedGenerationRequest } from "./types.js"; + +function isGrpcServiceError(err: unknown): err is ServiceError { + return ( + err instanceof Error && + err.constructor.name === "Error" && + "code" in err && + typeof err.code === "number" + ); +} + +export class IBMvLLMOutput extends BaseLLMOutput { + constructor( + public text: string, + public readonly meta: Record, + ) { + super(); + } + + static { + this.register(); + } + + merge(other: IBMvLLMOutput): void { + this.text += other.text; + assign(this.meta, other.meta); + } + + getTextContent(): string { + return this.text; + } + + toString(): string { + return this.getTextContent(); + } + + createSnapshot() { + return { + text: this.text, + meta: shallowCopy(this.meta), + }; + } + + loadSnapshot(snapshot: ReturnType) { + Object.assign(this, snapshot); + } +} + +export interface IBMvLLMInput { + client?: Client; + modelId: string; + parameters?: IBMvLLMParameters; + executionOptions?: ExecutionOptions; +} + +export type IBMvLLMParameters = NonNullable< + BatchedGenerationRequest["params"] & SingleGenerationRequest["params"] +>; + +export interface IBMvLLMGenerateOptions extends GenerateOptions {} + +export class IBMvLLM extends LLM { + public readonly emitter = new Emitter({ + namespace: ["ibm_vllm", "llm"], + creator: this, + }); + + public readonly client: Client; + public readonly parameters: Partial; + + constructor({ client, modelId, parameters = {}, executionOptions }: IBMvLLMInput) { + super(modelId, executionOptions); + this.client = client ?? new Client(); + this.parameters = parameters ?? {}; + } + + static { + this.register(); + } + + async meta(): Promise { + const response = await this.client.modelInfo({ model_id: this.modelId }); + return { + tokenLimit: response.max_sequence_length, + }; + } + + async tokenize(input: LLMInput): Promise { + try { + const response = await this.client.tokenize({ + model_id: this.modelId, + requests: [{ text: input }], + }); + const output = response.responses.at(0); + if (!output) { + throw new LLMError("Missing output", [], { context: { response } }); + } + return { + tokens: output.tokens, + tokensCount: output.token_count, + }; + } catch (err) { + throw this._transformError(err); + } + } + + protected async _generate( + input: LLMInput, + options: IBMvLLMGenerateOptions | undefined, + run: GetRunContext, + ): Promise { + try { + const response = await this.client.generate( + { + model_id: this.modelId, + requests: [{ text: input }], + params: this._prepareParameters(options), + }, + { signal: run.signal }, + ); + const output = response.responses.at(0); + if (!output) { + throw new LLMError("Missing output", [], { context: { response } }); + } + + const { text, ...rest } = output; + return new IBMvLLMOutput(text, rest); + } catch (err) { + throw this._transformError(err); + } + } + + protected async *_stream( + input: string, + options: IBMvLLMGenerateOptions | undefined, + run: GetRunContext, + ): AsyncStream { + try { + const stream = await this.client.generateStream( + { + model_id: this.modelId, + request: { text: input }, + params: this._prepareParameters(options), + }, + { signal: run.signal }, + ); + for await (const chunk of stream) { + const typedChunk = chunk as GenerationResponse__Output; + const { text, ...rest } = typedChunk; + if (text.length > 0) { + yield new IBMvLLMOutput(text, rest); + } + } + } catch (err) { + throw this._transformError(err); + } + } + + createSnapshot() { + return { + ...super.createSnapshot(), + client: this.client, + modelId: this.modelId, + parameters: shallowCopy(this.parameters), + executionOptions: shallowCopy(this.executionOptions), + }; + } + + loadSnapshot(snapshot: ReturnType) { + super.loadSnapshot(snapshot); + Object.assign(this, snapshot); + } + + protected _transformError(error: Error): Error { + if (error instanceof FrameworkError) { + throw error; + } + if (isGrpcServiceError(error)) { + throw new LLMError("LLM has occurred an error!", [error], { + isRetryable: [8, 4, 14].includes(error.code), + }); + } + return new LLMError("LLM has occurred an error!", [error]); + } + + protected _prepareParameters(overrides?: GenerateOptions): typeof this.parameters { + const guided = overrides?.guided ? {} : (this.parameters.decoding ?? {}); + const guidedOverride = overrides?.guided; + + if (guidedOverride?.choice) { + guided.choice = { ...guided.choice, choices: guidedOverride.choice }; + } else if (guidedOverride?.grammar) { + guided.grammar = guidedOverride.grammar; + } else if (guidedOverride?.json) { + guided.json_schema = isString(guidedOverride.json) + ? JSON.parse(guidedOverride.json) + : guidedOverride.json; + } else if (guidedOverride?.regex) { + guided.regex = guidedOverride.regex; + } else if (!isEmpty(guidedOverride ?? {})) { + throw new NotImplementedError( + `Following types ${Object.keys(overrides!.guided!).join(",")}" for the constraint decoding are not supported!`, + ); + } + + return { + ...this.parameters, + decoding: guided, + }; + } +} diff --git a/src/adapters/ibm-vllm/proto/generation.proto b/src/adapters/ibm-vllm/proto/generation.proto new file mode 100644 index 00000000..709bc751 --- /dev/null +++ b/src/adapters/ibm-vllm/proto/generation.proto @@ -0,0 +1,289 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Source: https://github.com/IBM/vllm/blob/main/proto/generation.proto + +syntax = "proto3"; +package fmaas; + + +service GenerationService { + // Generates text given a text prompt, for one or more inputs + rpc Generate (BatchedGenerationRequest) returns (BatchedGenerationResponse) {} + // Generates text given a single input prompt, streaming the response + rpc GenerateStream (SingleGenerationRequest) returns (stream GenerationResponse) {} + // Tokenize text + rpc Tokenize (BatchedTokenizeRequest) returns (BatchedTokenizeResponse) {} + // Model info + rpc ModelInfo (ModelInfoRequest) returns (ModelInfoResponse) {} +} + +// ============================================================================================================ +// Generation API + +enum DecodingMethod { + GREEDY = 0; + SAMPLE = 1; +} + +message BatchedGenerationRequest { + string model_id = 1; + // Deprecated in favor of adapter_id + optional string prefix_id = 2; + optional string adapter_id = 4; + repeated GenerationRequest requests = 3; + + Parameters params = 10; +} + +message SingleGenerationRequest { + string model_id = 1; + // Deprecated in favor of adapter_id + optional string prefix_id = 2; + optional string adapter_id = 4; + GenerationRequest request = 3; + + Parameters params = 10; +} + +message BatchedGenerationResponse { + repeated GenerationResponse responses = 1; +} + +message GenerationRequest { + string text = 2; +} + +message GenerationResponse { + uint32 input_token_count = 6; + uint32 generated_token_count = 2; + string text = 4; + StopReason stop_reason = 7; + // The stop sequence encountered, iff stop_reason == STOP_SEQUENCE + string stop_sequence = 11; + // Random seed used, not applicable for greedy requests + uint64 seed = 10; + + // Individual generated tokens and associated details, if requested + repeated TokenInfo tokens = 8; + + // Input tokens and associated details, if requested + repeated TokenInfo input_tokens = 9; +} + +message Parameters { + // The high level decoding approach + DecodingMethod method = 1; + // Parameters related to sampling, applicable only when method == SAMPLING + SamplingParameters sampling = 2; + // Parameters controlling when generation should stop + StoppingCriteria stopping = 3; + // Flags to control what is returned in the response + ResponseOptions response = 4; + // Parameters for conditionally penalizing/boosting + // candidate tokens during decoding + DecodingParameters decoding = 5; + // Truncate to this many input tokens. Can be used to avoid requests + // failing due to input being longer than configured limits. + // Zero means don't truncate. + uint32 truncate_input_tokens = 6; +} + +message DecodingParameters { + message LengthPenalty { + // Start the decay after this number of tokens have been generated + uint32 start_index = 1; + // Factor of exponential decay + float decay_factor = 2; + } + + // Default (0.0) means no penalty (equivalent to 1.0) + // 1.2 is a recommended value + float repetition_penalty = 1; + + // Exponentially increases the score of the EOS token + // once start_index tokens have been generated + optional LengthPenalty length_penalty = 2; + + enum ResponseFormat { + // Plain text, no constraints + TEXT = 0; + // Valid json + JSON = 1; + } + + message StringChoices { + repeated string choices = 1; + } + + // Mutually-exclusive guided decoding options + oneof guided { + // Output will be in the specified format + ResponseFormat format = 3; + // Output will follow the provided JSON schema + string json_schema = 4; + // Output will follow the provided regex pattern + string regex = 5; + // Output will be exactly one of the specified choices + StringChoices choice = 6; + // Output will follow the provided context free grammar + string grammar = 7; + } +} + + +message SamplingParameters { + // Default (0.0) means disabled (equivalent to 1.0) + float temperature = 1; + // Default (0) means disabled + uint32 top_k = 2; + // Default (0) means disabled (equivalent to 1.0) + float top_p = 3; + // Default (0) means disabled (equivalent to 1.0) + float typical_p = 4; + + optional uint64 seed = 5; +} + +message StoppingCriteria { + // Default (0) is currently 20 + uint32 max_new_tokens = 1; + // Default (0) means no minimum + uint32 min_new_tokens = 2; + // Default (0) means no time limit + uint32 time_limit_millis = 3; + repeated string stop_sequences = 4; + // If not specified, default behavior depends on server setting + optional bool include_stop_sequence = 5; + + //more to come +} + +message ResponseOptions { + // Include input text + bool input_text = 1; + // Include list of individual generated tokens + // "Extra" token information is included based on the other flags below + bool generated_tokens = 2; + // Include list of input tokens + // "Extra" token information is included based on the other flags here, + // but only for decoder-only models + bool input_tokens = 3; + // Include logprob for each returned token + // Applicable only if generated_tokens == true and/or input_tokens == true + bool token_logprobs = 4; + // Include rank of each returned token + // Applicable only if generated_tokens == true and/or input_tokens == true + bool token_ranks = 5; + // Include top n candidate tokens at the position of each returned token + // The maximum value permitted is 5, but more may be returned if there is a tie + // for nth place. + // Applicable only if generated_tokens == true and/or input_tokens == true + uint32 top_n_tokens = 6; +} + +enum StopReason { + // Possibly more tokens to be streamed + NOT_FINISHED = 0; + // Maximum requested tokens reached + MAX_TOKENS = 1; + // End-of-sequence token encountered + EOS_TOKEN = 2; + // Request cancelled by client + CANCELLED = 3; + // Time limit reached + TIME_LIMIT = 4; + // Stop sequence encountered + STOP_SEQUENCE = 5; + // Total token limit reached + TOKEN_LIMIT = 6; + // Decoding error + ERROR = 7; +} + +message TokenInfo { + // uint32 id = 1; // TBD + string text = 2; + // The logprob (log of normalized probability), if requested + float logprob = 3; + // One-based rank relative to other tokens, if requested + uint32 rank = 4; + + message TopToken { + // uint32 id = 1; // TBD + string text = 2; + float logprob = 3; + } + + // Top N candidate tokens at this position, if requested + // May or may not include this token + repeated TopToken top_tokens = 5; +} + + +// ============================================================================================================ +// Tokenization API + +message BatchedTokenizeRequest { + string model_id = 1; + repeated TokenizeRequest requests = 2; + bool return_tokens = 3; + bool return_offsets = 4; + + // Zero means don't truncate. + uint32 truncate_input_tokens = 5; +} + +message BatchedTokenizeResponse { + repeated TokenizeResponse responses = 1; +} + +message TokenizeRequest { + string text = 1; +} + +message TokenizeResponse { + message Offset { + uint32 start = 1; + uint32 end = 2; + } + + uint32 token_count = 1; + + // if return_tokens = true + repeated string tokens = 2; + // if return_tokens = true + repeated Offset offsets = 3; +} + + +// ============================================================================================================ +// Model Info API + +message ModelInfoRequest { + string model_id = 1; +} + +message ModelInfoResponse { + enum ModelKind { + DECODER_ONLY = 0; + ENCODER_DECODER = 1; + } + + ModelKind model_kind = 1; + uint32 max_sequence_length = 2; + uint32 max_new_tokens = 3; +} diff --git a/src/adapters/ibm-vllm/types.ts b/src/adapters/ibm-vllm/types.ts new file mode 100644 index 00000000..55bcc902 --- /dev/null +++ b/src/adapters/ibm-vllm/types.ts @@ -0,0 +1,535 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as grpc from "@grpc/grpc-js"; +import { + Long, + MethodDefinition, + MessageTypeDefinition, + EnumTypeDefinition, +} from "@grpc/proto-loader"; + +export interface GenerationRequest { + text?: string; +} +export interface GenerationRequest__Output { + text: string; +} + +declare const DecodingMethod: { + readonly GREEDY: "GREEDY"; + readonly SAMPLE: "SAMPLE"; +}; +export type DecodingMethod = "GREEDY" | 0 | "SAMPLE" | 1; +export type DecodingMethod__Output = (typeof DecodingMethod)[keyof typeof DecodingMethod]; + +export interface SamplingParameters { + temperature?: number | string; + top_k?: number; + top_p?: number | string; + typical_p?: number | string; + seed?: number | string | Long; + _seed?: "seed"; +} +export interface SamplingParameters__Output { + temperature: number; + top_k: number; + top_p: number; + typical_p: number; + seed?: number; + _seed: "seed"; +} + +export interface StoppingCriteria { + max_new_tokens?: number; + min_new_tokens?: number; + time_limit_millis?: number; + stop_sequences?: string[]; + include_stop_sequence?: boolean; + _include_stop_sequence?: "include_stop_sequence"; +} +export interface StoppingCriteria__Output { + max_new_tokens: number; + min_new_tokens: number; + time_limit_millis: number; + stop_sequences: string[]; + include_stop_sequence?: boolean; + _include_stop_sequence: "include_stop_sequence"; +} + +export interface ResponseOptions { + input_text?: boolean; + generated_tokens?: boolean; + input_tokens?: boolean; + token_logprobs?: boolean; + token_ranks?: boolean; + top_n_tokens?: number; +} +export interface ResponseOptions__Output { + input_text: boolean; + generated_tokens: boolean; + input_tokens: boolean; + token_logprobs: boolean; + token_ranks: boolean; + top_n_tokens: number; +} + +export interface _fmaas_DecodingParameters_LengthPenalty { + start_index?: number; + decay_factor?: number | string; +} +export interface _fmaas_DecodingParameters_LengthPenalty__Output { + start_index: number; + decay_factor: number; +} +declare const _fmaas_DecodingParameters_ResponseFormat: { + readonly TEXT: "TEXT"; + readonly JSON: "JSON"; +}; +export type _fmaas_DecodingParameters_ResponseFormat = "TEXT" | 0 | "JSON" | 1; +export type _fmaas_DecodingParameters_ResponseFormat__Output = + (typeof _fmaas_DecodingParameters_ResponseFormat)[keyof typeof _fmaas_DecodingParameters_ResponseFormat]; +export interface _fmaas_DecodingParameters_StringChoices { + choices?: string[]; +} +export interface _fmaas_DecodingParameters_StringChoices__Output { + choices: string[]; +} +export interface DecodingParameters { + repetition_penalty?: number | string; + length_penalty?: _fmaas_DecodingParameters_LengthPenalty | null; + format?: _fmaas_DecodingParameters_ResponseFormat; + json_schema?: string; + regex?: string; + choice?: _fmaas_DecodingParameters_StringChoices | null; + grammar?: string; + _length_penalty?: "length_penalty"; + guided?: "format" | "json_schema" | "regex" | "choice" | "grammar"; +} +export interface DecodingParameters__Output { + repetition_penalty: number; + length_penalty?: _fmaas_DecodingParameters_LengthPenalty__Output | null; + format?: _fmaas_DecodingParameters_ResponseFormat__Output; + json_schema?: string; + regex?: string; + choice?: _fmaas_DecodingParameters_StringChoices__Output | null; + grammar?: string; + _length_penalty: "length_penalty"; + guided: "format" | "json_schema" | "regex" | "choice" | "grammar"; +} + +export interface Parameters { + method?: DecodingMethod; + sampling?: SamplingParameters | null; + stopping?: StoppingCriteria | null; + response?: ResponseOptions | null; + decoding?: DecodingParameters | null; + truncate_input_tokens?: number; +} +export interface Parameters__Output { + method: DecodingMethod__Output; + sampling: SamplingParameters__Output | null; + stopping: StoppingCriteria__Output | null; + response: ResponseOptions__Output | null; + decoding: DecodingParameters__Output | null; + truncate_input_tokens: number; +} + +export interface BatchedGenerationRequest { + model_id?: string; + prefix_id?: string; + requests?: GenerationRequest[]; + adapter_id?: string; + params?: Parameters | null; + _prefix_id?: "prefix_id"; + _adapter_id?: "adapter_id"; +} +export interface BatchedGenerationRequest__Output { + model_id: string; + prefix_id?: string; + requests: GenerationRequest__Output[]; + adapter_id?: string; + params: Parameters__Output | null; + _prefix_id: "prefix_id"; + _adapter_id: "adapter_id"; +} + +declare const StopReason: { + readonly NOT_FINISHED: "NOT_FINISHED"; + readonly MAX_TOKENS: "MAX_TOKENS"; + readonly EOS_TOKEN: "EOS_TOKEN"; + readonly CANCELLED: "CANCELLED"; + readonly TIME_LIMIT: "TIME_LIMIT"; + readonly STOP_SEQUENCE: "STOP_SEQUENCE"; + readonly TOKEN_LIMIT: "TOKEN_LIMIT"; + readonly ERROR: "ERROR"; +}; +export type StopReason = + | "NOT_FINISHED" + | 0 + | "MAX_TOKENS" + | 1 + | "EOS_TOKEN" + | 2 + | "CANCELLED" + | 3 + | "TIME_LIMIT" + | 4 + | "STOP_SEQUENCE" + | 5 + | "TOKEN_LIMIT" + | 6 + | "ERROR" + | 7; +export type StopReason__Output = (typeof StopReason)[keyof typeof StopReason]; + +export interface _fmaas_TokenInfo_TopToken { + text?: string; + logprob?: number | string; +} +export interface _fmaas_TokenInfo_TopToken__Output { + text: string; + logprob: number; +} +export interface TokenInfo { + text?: string; + logprob?: number | string; + rank?: number; + top_tokens?: _fmaas_TokenInfo_TopToken[]; +} +export interface TokenInfo__Output { + text: string; + logprob: number; + rank: number; + top_tokens: _fmaas_TokenInfo_TopToken__Output[]; +} + +export interface GenerationResponse { + generated_token_count?: number; + text?: string; + input_token_count?: number; + stop_reason?: StopReason; + tokens?: TokenInfo[]; + input_tokens?: TokenInfo[]; + seed?: number | string | Long; + stop_sequence?: string; +} +export interface GenerationResponse__Output { + generated_token_count: number; + text: string; + input_token_count: number; + stop_reason: StopReason__Output; + tokens: TokenInfo__Output[]; + input_tokens: TokenInfo__Output[]; + seed: number; + stop_sequence: string; +} + +export interface BatchedGenerationResponse { + responses?: GenerationResponse[]; +} +export interface BatchedGenerationResponse__Output { + responses: GenerationResponse__Output[]; +} + +export interface TokenizeRequest { + text?: string; +} +export interface TokenizeRequest__Output { + text: string; +} + +export interface BatchedTokenizeRequest { + model_id?: string; + requests?: TokenizeRequest[]; + return_tokens?: boolean; + return_offsets?: boolean; + truncate_input_tokens?: number; +} +export interface BatchedTokenizeRequest__Output { + model_id: string; + requests: TokenizeRequest__Output[]; + return_tokens: boolean; + return_offsets: boolean; + truncate_input_tokens: number; +} + +export interface _fmaas_TokenizeResponse_Offset { + start?: number; + end?: number; +} +export interface _fmaas_TokenizeResponse_Offset__Output { + start: number; + end: number; +} +export interface TokenizeResponse { + token_count?: number; + tokens?: string[]; + offsets?: _fmaas_TokenizeResponse_Offset[]; +} +export interface TokenizeResponse__Output { + token_count: number; + tokens: string[]; + offsets: _fmaas_TokenizeResponse_Offset__Output[]; +} + +export interface BatchedTokenizeResponse { + responses?: TokenizeResponse[]; +} +export interface BatchedTokenizeResponse__Output { + responses: TokenizeResponse__Output[]; +} + +export interface ModelInfoRequest { + model_id?: string; +} +export interface ModelInfoRequest__Output { + model_id: string; +} + +declare const _fmaas_ModelInfoResponse_ModelKind: { + readonly DECODER_ONLY: "DECODER_ONLY"; + readonly ENCODER_DECODER: "ENCODER_DECODER"; +}; +export type _fmaas_ModelInfoResponse_ModelKind = "DECODER_ONLY" | 0 | "ENCODER_DECODER" | 1; +export type _fmaas_ModelInfoResponse_ModelKind__Output = + (typeof _fmaas_ModelInfoResponse_ModelKind)[keyof typeof _fmaas_ModelInfoResponse_ModelKind]; +export interface ModelInfoResponse { + model_kind?: _fmaas_ModelInfoResponse_ModelKind; + max_sequence_length?: number; + max_new_tokens?: number; +} +export interface ModelInfoResponse__Output { + model_kind: _fmaas_ModelInfoResponse_ModelKind__Output; + max_sequence_length: number; + max_new_tokens: number; +} + +export interface SingleGenerationRequest { + model_id?: string; + prefix_id?: string; + request?: GenerationRequest | null; + adapter_id?: string; + params?: Parameters | null; + _prefix_id?: "prefix_id"; + _adapter_id?: "adapter_id"; +} +export interface SingleGenerationRequest__Output { + model_id: string; + prefix_id?: string; + request: GenerationRequest__Output | null; + adapter_id?: string; + params: Parameters__Output | null; + _prefix_id: "prefix_id"; + _adapter_id: "adapter_id"; +} + +export interface GenerationServiceClient extends grpc.Client { + Generate( + argument: BatchedGenerationRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Generate( + argument: BatchedGenerationRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Generate( + argument: BatchedGenerationRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Generate( + argument: BatchedGenerationRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + generate( + argument: BatchedGenerationRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + generate( + argument: BatchedGenerationRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + generate( + argument: BatchedGenerationRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + generate( + argument: BatchedGenerationRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + GenerateStream( + argument: SingleGenerationRequest, + metadata: grpc.Metadata, + options?: grpc.CallOptions, + ): grpc.ClientReadableStream; + GenerateStream( + argument: SingleGenerationRequest, + options?: grpc.CallOptions, + ): grpc.ClientReadableStream; + generateStream( + argument: SingleGenerationRequest, + metadata: grpc.Metadata, + options?: grpc.CallOptions, + ): grpc.ClientReadableStream; + generateStream( + argument: SingleGenerationRequest, + options?: grpc.CallOptions, + ): grpc.ClientReadableStream; + ModelInfo( + argument: ModelInfoRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + ModelInfo( + argument: ModelInfoRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + ModelInfo( + argument: ModelInfoRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + ModelInfo( + argument: ModelInfoRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + modelInfo( + argument: ModelInfoRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + modelInfo( + argument: ModelInfoRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + modelInfo( + argument: ModelInfoRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + modelInfo( + argument: ModelInfoRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Tokenize( + argument: BatchedTokenizeRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Tokenize( + argument: BatchedTokenizeRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Tokenize( + argument: BatchedTokenizeRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + Tokenize( + argument: BatchedTokenizeRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + tokenize( + argument: BatchedTokenizeRequest, + metadata: grpc.Metadata, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + tokenize( + argument: BatchedTokenizeRequest, + metadata: grpc.Metadata, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + tokenize( + argument: BatchedTokenizeRequest, + options: grpc.CallOptions, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; + tokenize( + argument: BatchedTokenizeRequest, + callback: grpc.requestCallback, + ): grpc.ClientUnaryCall; +} +export interface GenerationServiceDefinition extends grpc.ServiceDefinition { + Generate: MethodDefinition< + BatchedGenerationRequest, + BatchedGenerationResponse, + BatchedGenerationRequest__Output, + BatchedGenerationResponse__Output + >; + GenerateStream: MethodDefinition< + SingleGenerationRequest, + GenerationResponse, + SingleGenerationRequest__Output, + GenerationResponse__Output + >; + ModelInfo: MethodDefinition< + ModelInfoRequest, + ModelInfoResponse, + ModelInfoRequest__Output, + ModelInfoResponse__Output + >; + Tokenize: MethodDefinition< + BatchedTokenizeRequest, + BatchedTokenizeResponse, + BatchedTokenizeRequest__Output, + BatchedTokenizeResponse__Output + >; +} + +export type SubtypeConstructor any, Subtype> = new ( + ...args: ConstructorParameters +) => Subtype; +export interface ProtoGrpcType { + fmaas: { + BatchedGenerationRequest: MessageTypeDefinition; + BatchedGenerationResponse: MessageTypeDefinition; + BatchedTokenizeRequest: MessageTypeDefinition; + BatchedTokenizeResponse: MessageTypeDefinition; + DecodingMethod: EnumTypeDefinition; + DecodingParameters: MessageTypeDefinition; + GenerationRequest: MessageTypeDefinition; + GenerationResponse: MessageTypeDefinition; + GenerationService: SubtypeConstructor & { + service: GenerationServiceDefinition; + }; + ModelInfoRequest: MessageTypeDefinition; + ModelInfoResponse: MessageTypeDefinition; + Parameters: MessageTypeDefinition; + ResponseOptions: MessageTypeDefinition; + SamplingParameters: MessageTypeDefinition; + SingleGenerationRequest: MessageTypeDefinition; + StopReason: EnumTypeDefinition; + StoppingCriteria: MessageTypeDefinition; + TokenInfo: MessageTypeDefinition; + TokenizeRequest: MessageTypeDefinition; + TokenizeResponse: MessageTypeDefinition; + }; +} diff --git a/tests/e2e/adapters/ibm-vllm/chat.test.ts b/tests/e2e/adapters/ibm-vllm/chat.test.ts new file mode 100644 index 00000000..165d0d3b --- /dev/null +++ b/tests/e2e/adapters/ibm-vllm/chat.test.ts @@ -0,0 +1,76 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { BaseMessage } from "@/llms/primitives/message.js"; +import { expect } from "vitest"; +import { verifyDeserialization } from "@tests/e2e/utils.js"; +import { IBMVllmChatLLM } from "@/adapters/ibm-vllm/chat.js"; + +describe.runIf( + [ + process.env.IBM_VLLM_URL, + process.env.IBM_VLLM_ROOT_CERT, + process.env.IBM_VLLM_PRIVATE_KEY, + process.env.IBM_VLLM_CERT_CHAIN, + ].every((env) => Boolean(env)), +)("IBM Chat vLLM", () => { + const createChatLLM = () => { + return IBMVllmChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", { + parameters: { + method: "GREEDY", + stopping: { + min_new_tokens: 5, + max_new_tokens: 50, + }, + }, + }); + }; + + it("Generates", async () => { + const conversation = [ + BaseMessage.of({ + role: "system", + text: `You are a helpful and respectful and honest assistant. Your answer should be short and concise.`, + }), + ]; + const llm = createChatLLM(); + + for (const { question, answer } of [ + { question: `What is the coldest continent?`, answer: "arctica" }, + { question: "What is the most common typical animal that lives there?", answer: "penguin" }, + ]) { + conversation.push( + BaseMessage.of({ + role: "user", + text: question, + }), + ); + const response = await llm.generate(conversation); + + const newMessages = response.messages; + expect(newMessages).toHaveLength(1); + expect(newMessages[0].text.toLowerCase()).toContain(answer.toLowerCase()); + conversation.push(...newMessages); + } + }); + + it("Serializes", () => { + const llm = createChatLLM(); + const serialized = llm.serialize(); + const deserialized = IBMVllmChatLLM.fromSerialized(serialized); + verifyDeserialization(llm, deserialized); + }); +}); diff --git a/tests/e2e/adapters/ibm-vllm/llm.test.ts b/tests/e2e/adapters/ibm-vllm/llm.test.ts new file mode 100644 index 00000000..24496aee --- /dev/null +++ b/tests/e2e/adapters/ibm-vllm/llm.test.ts @@ -0,0 +1,59 @@ +/** + * Copyright 2024 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { verifyDeserialization } from "@tests/e2e/utils.js"; +import { IBMvLLM, IBMvLLMOutput } from "@/adapters/ibm-vllm/llm.js"; +import { IBMVllmModel } from "@/adapters/ibm-vllm/chatPreset.js"; + +describe.runIf( + [ + process.env.IBM_VLLM_URL, + process.env.IBM_VLLM_ROOT_CERT, + process.env.IBM_VLLM_PRIVATE_KEY, + process.env.IBM_VLLM_CERT_CHAIN, + ].every((env) => Boolean(env)), +)("IBM vLLM", () => { + const createLLM = () => { + return new IBMvLLM({ modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT }); + }; + + it("Meta", async () => { + const llm = createLLM(); + const response = await llm.meta(); + expect(response.tokenLimit).toBeGreaterThan(0); + }); + + it("Generates", async () => { + const llm = createLLM(); + const response = await llm.generate("Hello world!"); + expect(response).toBeInstanceOf(IBMvLLMOutput); + }); + + it("Streams", async () => { + const llm = createLLM(); + for await (const chunk of llm.stream("Hello world!")) { + expect(chunk).toBeInstanceOf(IBMvLLMOutput); + expect(chunk.text).toBeTruthy(); + } + }); + + it("Serializes", () => { + const llm = createLLM(); + const serialized = llm.serialize(); + const deserialized = IBMvLLM.fromSerialized(serialized); + verifyDeserialization(llm, deserialized); + }); +}); diff --git a/tests/e2e/utils.ts b/tests/e2e/utils.ts index 2293c4c7..192129ec 100644 --- a/tests/e2e/utils.ts +++ b/tests/e2e/utils.ts @@ -112,7 +112,6 @@ export function verifyDeserialization( if (target instanceof ZodType) { target = toJsonSchema(target); } - Serializer.findFactory(target); verifyDeserialization(value, target, parent, path.concat(key), ignoredPaths); } diff --git a/yarn.lock b/yarn.lock index 5fcbb358..b8e63753 100644 --- a/yarn.lock +++ b/yarn.lock @@ -718,6 +718,30 @@ __metadata: languageName: node linkType: hard +"@grpc/grpc-js@npm:^1.11.3": + version: 1.11.3 + resolution: "@grpc/grpc-js@npm:1.11.3" + dependencies: + "@grpc/proto-loader": "npm:^0.7.13" + "@js-sdsl/ordered-map": "npm:^4.4.2" + checksum: 10c0/2946a70c709688737603be573f6836beea26e4c132a50164591020860ae0e62375c1475c26017011fabfbaf6a9fa2bfdabfe9058aed11bab2f697e4242533afc + languageName: node + linkType: hard + +"@grpc/proto-loader@npm:^0.7.13": + version: 0.7.13 + resolution: "@grpc/proto-loader@npm:0.7.13" + dependencies: + lodash.camelcase: "npm:^4.3.0" + long: "npm:^5.0.0" + protobufjs: "npm:^7.2.5" + yargs: "npm:^17.7.2" + bin: + proto-loader-gen-types: build/bin/proto-loader-gen-types.js + checksum: 10c0/dc8ed7aa1454c15e224707cc53d84a166b98d76f33606a9f334c7a6fb1aedd3e3614dcd2c2b02a6ffaf140587d19494f93b3a56346c6c2e26bc564f6deddbbf3 + languageName: node + linkType: hard + "@humanwhocodes/module-importer@npm:^1.0.1": version: 1.0.1 resolution: "@humanwhocodes/module-importer@npm:1.0.1" @@ -825,6 +849,13 @@ __metadata: languageName: node linkType: hard +"@js-sdsl/ordered-map@npm:^4.4.2": + version: 4.4.2 + resolution: "@js-sdsl/ordered-map@npm:4.4.2" + checksum: 10c0/cc7e15dc4acf6d9ef663757279600bab70533d847dcc1ab01332e9e680bd30b77cdf9ad885cc774276f51d98b05a013571c940e5b360985af5eb798dc1a2ee2b + languageName: node + linkType: hard + "@jsdevtools/ono@npm:^7.1.3": version: 7.1.3 resolution: "@jsdevtools/ono@npm:7.1.3" @@ -1506,6 +1537,79 @@ __metadata: languageName: node linkType: hard +"@protobufjs/aspromise@npm:^1.1.1, @protobufjs/aspromise@npm:^1.1.2": + version: 1.1.2 + resolution: "@protobufjs/aspromise@npm:1.1.2" + checksum: 10c0/a83343a468ff5b5ec6bff36fd788a64c839e48a07ff9f4f813564f58caf44d011cd6504ed2147bf34835bd7a7dd2107052af755961c6b098fd8902b4f6500d0f + languageName: node + linkType: hard + +"@protobufjs/base64@npm:^1.1.2": + version: 1.1.2 + resolution: "@protobufjs/base64@npm:1.1.2" + checksum: 10c0/eec925e681081af190b8ee231f9bad3101e189abbc182ff279da6b531e7dbd2a56f1f306f37a80b1be9e00aa2d271690d08dcc5f326f71c9eed8546675c8caf6 + languageName: node + linkType: hard + +"@protobufjs/codegen@npm:^2.0.4": + version: 2.0.4 + resolution: "@protobufjs/codegen@npm:2.0.4" + checksum: 10c0/26ae337c5659e41f091606d16465bbcc1df1f37cc1ed462438b1f67be0c1e28dfb2ca9f294f39100c52161aef82edf758c95d6d75650a1ddf31f7ddee1440b43 + languageName: node + linkType: hard + +"@protobufjs/eventemitter@npm:^1.1.0": + version: 1.1.0 + resolution: "@protobufjs/eventemitter@npm:1.1.0" + checksum: 10c0/1eb0a75180e5206d1033e4138212a8c7089a3d418c6dfa5a6ce42e593a4ae2e5892c4ef7421f38092badba4040ea6a45f0928869989411001d8c1018ea9a6e70 + languageName: node + linkType: hard + +"@protobufjs/fetch@npm:^1.1.0": + version: 1.1.0 + resolution: "@protobufjs/fetch@npm:1.1.0" + dependencies: + "@protobufjs/aspromise": "npm:^1.1.1" + "@protobufjs/inquire": "npm:^1.1.0" + checksum: 10c0/cda6a3dc2d50a182c5865b160f72077aac197046600091dbb005dd0a66db9cce3c5eaed6d470ac8ed49d7bcbeef6ee5f0bc288db5ff9a70cbd003e5909065233 + languageName: node + linkType: hard + +"@protobufjs/float@npm:^1.0.2": + version: 1.0.2 + resolution: "@protobufjs/float@npm:1.0.2" + checksum: 10c0/18f2bdede76ffcf0170708af15c9c9db6259b771e6b84c51b06df34a9c339dbbeec267d14ce0bddd20acc142b1d980d983d31434398df7f98eb0c94a0eb79069 + languageName: node + linkType: hard + +"@protobufjs/inquire@npm:^1.1.0": + version: 1.1.0 + resolution: "@protobufjs/inquire@npm:1.1.0" + checksum: 10c0/64372482efcba1fb4d166a2664a6395fa978b557803857c9c03500e0ac1013eb4b1aacc9ed851dd5fc22f81583670b4f4431bae186f3373fedcfde863ef5921a + languageName: node + linkType: hard + +"@protobufjs/path@npm:^1.1.2": + version: 1.1.2 + resolution: "@protobufjs/path@npm:1.1.2" + checksum: 10c0/cece0a938e7f5dfd2fa03f8c14f2f1cf8b0d6e13ac7326ff4c96ea311effd5fb7ae0bba754fbf505312af2e38500250c90e68506b97c02360a43793d88a0d8b4 + languageName: node + linkType: hard + +"@protobufjs/pool@npm:^1.1.0": + version: 1.1.0 + resolution: "@protobufjs/pool@npm:1.1.0" + checksum: 10c0/eda2718b7f222ac6e6ad36f758a92ef90d26526026a19f4f17f668f45e0306a5bd734def3f48f51f8134ae0978b6262a5c517c08b115a551756d1a3aadfcf038 + languageName: node + linkType: hard + +"@protobufjs/utf8@npm:^1.1.0": + version: 1.1.0 + resolution: "@protobufjs/utf8@npm:1.1.0" + checksum: 10c0/a3fe31fe3fa29aa3349e2e04ee13dc170cc6af7c23d92ad49e3eeaf79b9766264544d3da824dba93b7855bd6a2982fb40032ef40693da98a136d835752beb487 + languageName: node + linkType: hard + "@redocly/ajv@npm:^8.11.0": version: 8.11.0 resolution: "@redocly/ajv@npm:8.11.0" @@ -2027,6 +2131,15 @@ __metadata: languageName: node linkType: hard +"@types/node@npm:>=13.7.0": + version: 22.7.4 + resolution: "@types/node@npm:22.7.4" + dependencies: + undici-types: "npm:~6.19.2" + checksum: 10c0/c22bf54515c78ff3170142c1e718b90e2a0003419dc2d55f79c9c9362edd590a6ab1450deb09ff6e1b32d1b4698da407930b16285e8be3a009ea6cd2695cac01 + languageName: node + linkType: hard + "@types/node@npm:^18.11.18": version: 18.19.45 resolution: "@types/node@npm:18.19.45" @@ -2633,6 +2746,8 @@ __metadata: "@eslint/js": "npm:^9.9.0" "@eslint/markdown": "npm:^6.0.0" "@googleapis/customsearch": "npm:^3.2.0" + "@grpc/grpc-js": "npm:^1.11.3" + "@grpc/proto-loader": "npm:^0.7.13" "@ibm-generative-ai/node-sdk": "npm:~3.2.3" "@langchain/community": "npm:~0.2.28" "@langchain/core": "npm:~0.2.27" @@ -2697,7 +2812,7 @@ __metadata: strip-ansi: "npm:^7.1.0" temp-dir: "npm:^3.0.0" tsc-files: "npm:^1.1.4" - tsup: "npm:^8.2.4" + tsup: "npm:^8.3.0" tsx: "npm:^4.19.0" turndown: "npm:^7.2.0" typescript: "npm:^5.5.4" @@ -2710,6 +2825,8 @@ __metadata: zod-to-json-schema: "npm:^3.23.3" peerDependencies: "@googleapis/customsearch": ^3.2.0 + "@grpc/grpc-js": ^1.11.3 + "@grpc/proto-loader": ^0.7.13 "@ibm-generative-ai/node-sdk": ~3.2.3 "@langchain/community": ~0.2.28 "@langchain/core": ~0.2.27 @@ -4552,6 +4669,18 @@ __metadata: languageName: node linkType: hard +"fdir@npm:^6.3.0": + version: 6.4.0 + resolution: "fdir@npm:6.4.0" + peerDependencies: + picomatch: ^3 || ^4 + peerDependenciesMeta: + picomatch: + optional: true + checksum: 10c0/9a03efa1335d78ea386b701799b08ad9e7e8da85d88567dc162cd28dd8e9486e8c269b3e95bfeb21dd6a5b14ebf69d230eb6e18f49d33fbda3cd97432f648c48 + languageName: node + linkType: hard + "fetch-blob@npm:^3.1.2, fetch-blob@npm:^3.1.4": version: 3.2.0 resolution: "fetch-blob@npm:3.2.0" @@ -6553,6 +6682,13 @@ __metadata: languageName: node linkType: hard +"long@npm:^5.0.0": + version: 5.2.3 + resolution: "long@npm:5.2.3" + checksum: 10c0/6a0da658f5ef683b90330b1af76f06790c623e148222da9d75b60e266bbf88f803232dd21464575681638894a84091616e7f89557aa087fd14116c0f4e0e43d9 + languageName: node + linkType: hard + "longest-streak@npm:^3.0.0": version: 3.1.0 resolution: "longest-streak@npm:3.1.0" @@ -8315,6 +8451,13 @@ __metadata: languageName: node linkType: hard +"picomatch@npm:^4.0.2": + version: 4.0.2 + resolution: "picomatch@npm:4.0.2" + checksum: 10c0/7c51f3ad2bb42c776f49ebf964c644958158be30d0a510efd5a395e8d49cb5acfed5b82c0c5b365523ce18e6ab85013c9ebe574f60305892ec3fa8eee8304ccc + languageName: node + linkType: hard + "pidtree@npm:~0.6.0": version: 0.6.0 resolution: "pidtree@npm:0.6.0" @@ -8533,6 +8676,26 @@ __metadata: languageName: node linkType: hard +"protobufjs@npm:^7.2.5": + version: 7.4.0 + resolution: "protobufjs@npm:7.4.0" + dependencies: + "@protobufjs/aspromise": "npm:^1.1.2" + "@protobufjs/base64": "npm:^1.1.2" + "@protobufjs/codegen": "npm:^2.0.4" + "@protobufjs/eventemitter": "npm:^1.1.0" + "@protobufjs/fetch": "npm:^1.1.0" + "@protobufjs/float": "npm:^1.0.2" + "@protobufjs/inquire": "npm:^1.1.0" + "@protobufjs/path": "npm:^1.1.2" + "@protobufjs/pool": "npm:^1.1.0" + "@protobufjs/utf8": "npm:^1.1.0" + "@types/node": "npm:>=13.7.0" + long: "npm:^5.0.0" + checksum: 10c0/a5460a63fe596523b9a067cbce39a6b310d1a71750fda261f076535662aada97c24450e18c5bc98a27784f70500615904ff1227e1742183509f0db4fdede669b + languageName: node + linkType: hard + "protocols@npm:^2.0.0, protocols@npm:^2.0.1": version: 2.0.1 resolution: "protocols@npm:2.0.1" @@ -9836,6 +9999,16 @@ __metadata: languageName: node linkType: hard +"tinyglobby@npm:^0.2.1": + version: 0.2.6 + resolution: "tinyglobby@npm:0.2.6" + dependencies: + fdir: "npm:^6.3.0" + picomatch: "npm:^4.0.2" + checksum: 10c0/d7b5eb4c5b9c341f961c1d3c30624f9a1e22b27b48a79a65b48120245a77c143827f75f5854628fef1a4bd4bc3cfaf06ce76497f3a574e3f933229c5e556e5fe + languageName: node + linkType: hard + "tinypool@npm:^1.0.0": version: 1.0.1 resolution: "tinypool@npm:1.0.1" @@ -9955,9 +10128,9 @@ __metadata: languageName: node linkType: hard -"tsup@npm:^8.2.4": - version: 8.2.4 - resolution: "tsup@npm:8.2.4" +"tsup@npm:^8.3.0": + version: 8.3.0 + resolution: "tsup@npm:8.3.0" dependencies: bundle-require: "npm:^5.0.0" cac: "npm:^6.7.14" @@ -9966,7 +10139,6 @@ __metadata: debug: "npm:^4.3.5" esbuild: "npm:^0.23.0" execa: "npm:^5.1.1" - globby: "npm:^11.1.0" joycon: "npm:^3.1.1" picocolors: "npm:^1.0.1" postcss-load-config: "npm:^6.0.1" @@ -9974,6 +10146,7 @@ __metadata: rollup: "npm:^4.19.0" source-map: "npm:0.8.0-beta.0" sucrase: "npm:^3.35.0" + tinyglobby: "npm:^0.2.1" tree-kill: "npm:^1.2.2" peerDependencies: "@microsoft/api-extractor": ^7.36.0 @@ -9992,7 +10165,7 @@ __metadata: bin: tsup: dist/cli-default.js tsup-node: dist/cli-node.js - checksum: 10c0/0fa967ae0feb483528ae52fd9988d7931a092b3645a456e23b62ac03a0556da6f569fb2a77b3baf50ff1d12b9afac38aa9d29f15eb82c8a2b4faeda8362858da + checksum: 10c0/7f7132e48fca2284fd721077c6462c440dabdc95bcacf2e9837f81d2ca9771f804ff4f8b81a743e8fc6c3def856cf3ae99421c0568a7f030196abdc9e12e97e8 languageName: node linkType: hard @@ -10802,7 +10975,7 @@ __metadata: languageName: node linkType: hard -"yargs@npm:^17.0.0": +"yargs@npm:^17.0.0, yargs@npm:^17.7.2": version: 17.7.2 resolution: "yargs@npm:17.7.2" dependencies: